1 //
2 //  utils.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 11/13/17.
6 //  Copyright © 2017 Schloss Lab. All rights reserved.
7 //
8 
9 #include "utils.hpp"
10 #include "ordervector.hpp"
11 #include "sharedordervector.h"
12 #include "phylotree.h"
13 #include "taxonomy.hpp"
14 #include "inputdata.h"
15 #include "sharedclrvectors.hpp"
16 #include "sharedrabundfloatvectors.hpp"
17 
18 /***********************************************************************/
getLabelTag(string label)19 string getLabelTag(string label){
20 
21     string tag = "";
22 
23     //remove OTU or phylo tag
24     string newLabel1 = "";
25     for (int i = 0; i < label.length(); i++) {
26         if(label[i]>47 && label[i]<58) { //is a digit
27         }else {  tag += label[i];  }
28     }
29 
30     return tag;
31 }
32 /***********************************************************************/
Utils()33 Utils::Utils(){
34     try {
35 
36         m = MothurOut::getInstance();  modifyNames = m->getChangedSeqNames();
37         long long s = m->getRandomSeed();
38         mersenne_twister_engine.seed(s); srand(s);
39         homePath = m->getHomePath(); currentWorkingDirectory = "";
40         paths = m->getPaths();
41     }
42     catch(exception& e) {
43         m->errorOut(e, "Utils", "mothurRandomShuffle");
44         exit(1);
45     }
46 }
47 /***********************************************************************/
randomUniform()48 float Utils::randomUniform() {
49     try {
50         uniform_real_distribution<float> unif;
51         return (unif(mersenne_twister_engine));
52     }
53     catch(exception& e) {
54         m->errorOut(e, "Utils", "randomUniform");
55         exit(1);
56     }
57 }
58 /***********************************************************************/
randomExp()59 float Utils::randomExp() {
60     try {
61         exponential_distribution<float> unif;
62         return (unif(mersenne_twister_engine));
63     }
64     catch(exception& e) {
65         m->errorOut(e, "Utils", "randomExp");
66         exit(1);
67     }
68 }
69 /***********************************************************************/
randomNorm()70 float Utils::randomNorm() {
71     try {
72         normal_distribution<float> unif;
73         return (unif(mersenne_twister_engine));
74     }
75     catch(exception& e) {
76         m->errorOut(e, "Utils", "randomNorm");
77         exit(1);
78     }
79 }
80 /***********************************************************************/
randomGamma(float range)81 float Utils::randomGamma(float range) {
82     try {
83         gamma_distribution<float> unif(range, range);
84         return (unif(mersenne_twister_engine));
85     }
86     catch(exception& e) {
87         m->errorOut(e, "Utils", "randomGamma");
88         exit(1);
89     }
90 }
91 /***********************************************************************/
randomDirichlet(vector<float> alphas)92 vector<float> Utils::randomDirichlet(vector<float> alphas) {
93     try {
94         int nAlphas = (int)alphas.size();
95         vector<float> dirs(nAlphas, 0);
96 
97         float sum = 0.0000;
98         for(int i=0;i<nAlphas;i++){
99             dirs[i] = randomGamma(alphas[i]);
100 						while(isinf(dirs[i])) { dirs[i] = randomGamma(alphas[i]);	}
101             sum += dirs[i];
102         }
103 
104         for(int i=0;i<nAlphas;i++){ dirs[i] /= sum; }
105 
106         return dirs;
107     }
108     catch(exception& e) {
109         m->errorOut(e, "Utils", "randomDirichlet");
110         exit(1);
111     }
112 }
113 /***********************************************************************/
mothurRandomShuffle(vector<int> & randomize)114 void Utils::mothurRandomShuffle(vector<int>& randomize){
115     try {
116         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
117     }
118     catch(exception& e) {
119         m->errorOut(e, "Utils", "mothurRandomShuffle");
120         exit(1);
121     }
122 
123 }
124 /***********************************************************************/
mothurRandomShuffle(vector<weightedSeq> & randomize)125 void Utils::mothurRandomShuffle(vector<weightedSeq>& randomize){
126     try {
127         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
128     }
129     catch(exception& e) {
130         m->errorOut(e, "Utils", "mothurRandomShuffle");
131         exit(1);
132     }
133 
134 }
135 /***********************************************************************/
mothurRandomShuffle(vector<long long> & randomize)136 void Utils::mothurRandomShuffle(vector<long long>& randomize){
137     try {
138         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
139     }
140     catch(exception& e) {
141         m->errorOut(e, "Utils", "mothurRandomShuffle");
142         exit(1);
143     }
144 
145 }
146 /***********************************************************************/
mothurRandomShuffle(OrderVector & randomize)147 void Utils::mothurRandomShuffle(OrderVector& randomize){
148     try {
149         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
150     }
151     catch(exception& e) {
152         m->errorOut(e, "Utils", "mothurRandomShuffle");
153         exit(1);
154     }
155 
156 }
157 /***********************************************************************/
mothurRandomShuffle(vector<SharedRAbundVector * > & randomize)158 void Utils::mothurRandomShuffle(vector<SharedRAbundVector*>& randomize){
159     try {
160         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
161     }
162     catch(exception& e) {
163         m->errorOut(e, "Utils", "mothurRandomShuffle");
164         exit(1);
165     }
166 
167 }
168 /***********************************************************************/
mothurRandomShuffle(SharedOrderVector & randomize)169 void Utils::mothurRandomShuffle(SharedOrderVector& randomize){
170     try {
171         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
172     }
173     catch(exception& e) {
174         m->errorOut(e, "Utils", "mothurRandomShuffle");
175         exit(1);
176     }
177 
178 }
179 /***********************************************************************/
mothurRandomShuffle(vector<string> & randomize)180 void Utils::mothurRandomShuffle(vector<string>& randomize){
181     try {
182         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
183     }
184     catch(exception& e) {
185         m->errorOut(e, "Utils", "mothurRandomShuffle");
186         exit(1);
187     }
188 
189 }
190 /***********************************************************************/
mothurRandomShuffle(vector<item> & randomize)191 void Utils::mothurRandomShuffle(vector<item>& randomize){
192     try {
193         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
194     }
195     catch(exception& e) {
196         m->errorOut(e, "Utils", "mothurRandomShuffle");
197         exit(1);
198     }
199 
200 }
201 /***********************************************************************/
mothurRandomShuffle(vector<PCell * > & randomize)202 void Utils::mothurRandomShuffle(vector<PCell*>& randomize){
203     try {
204         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
205     }
206     catch(exception& e) {
207         m->errorOut(e, "Utils", "mothurRandomShuffle");
208         exit(1);
209     }
210 
211 }
212 /***********************************************************************/
mothurRandomShuffle(vector<colDist> & randomize)213 void Utils::mothurRandomShuffle(vector<colDist>& randomize){
214     try {
215         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
216     }
217     catch(exception& e) {
218         m->errorOut(e, "Utils", "mothurRandomShuffle");
219         exit(1);
220     }
221 
222 }
223 /***********************************************************************/
mothurRandomShuffle(vector<PDistCellMin> & randomize)224 void Utils::mothurRandomShuffle(vector<PDistCellMin>& randomize){
225     try {
226         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
227     }
228     catch(exception& e) {
229         m->errorOut(e, "Utils", "mothurRandomShuffle");
230         exit(1);
231     }
232 
233 }
234 /***********************************************************************/
mothurRandomShuffle(vector<vector<double>> & randomize)235 void Utils::mothurRandomShuffle(vector< vector<double> >& randomize){
236     try {
237         shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
238     }
239     catch(exception& e) {
240         m->errorOut(e, "Utils", "mothurRandomShuffle");
241         exit(1);
242     }
243 
244 }
245 /***********************************************************************/
getRandomIndex(long long highest)246 long long Utils::getRandomIndex(long long highest){
247     try {
248 
249         if (highest == 0) {  return 0; }
250 
251         uniform_int_distribution<long long> dis(0, highest);
252 
253         long long random = dis(mersenne_twister_engine);
254 
255         return random;
256     }
257     catch(exception& e) {
258         m->errorOut(e, "Utils", "getRandomIndex");
259         exit(1);
260     }
261 }
262 /***********************************************************************/
263 
getRandomIndex(int highest)264 int Utils::getRandomIndex(int highest){
265     try {
266         if (highest == 0) { return 0; }
267 
268         uniform_int_distribution<int> dis(0, highest);
269         int random = dis(mersenne_twister_engine);
270         return random;
271     }
272     catch(exception& e) {
273         m->errorOut(e, "Utils", "getRandomIndex");
274         exit(1);
275     }
276 
277 }
278 /***********************************************************************/
getRandomNumber()279 int Utils::getRandomNumber(){
280     try {
281         uniform_int_distribution<int> dis;
282 
283         int random = dis(mersenne_twister_engine);
284 
285         return random;
286     }
287     catch(exception& e) {
288         m->errorOut(e, "Utils", "getRandomNumber");
289         exit(1);
290     }
291 
292 }
293 /***********************************************************************/
getRandomDouble0to1()294 double Utils::getRandomDouble0to1(){
295     try {
296         uniform_real_distribution<double> dis(0, 1);
297 
298         double random = dis(mersenne_twister_engine);
299 
300         return random;
301     }
302     catch(exception& e) {
303         m->errorOut(e, "Utils", "getRandomNumber");
304         exit(1);
305     }
306 
307 }
308 
309 /*********************************************************************************************/
getRAMUsed()310 double Utils::getRAMUsed() {
311     try {
312 
313 #if defined (__APPLE__) || (__MACH__)
314         /* Mac: ru_maxrss gives the size in bytes */
315         struct rusage r_usage;
316         getrusage(RUSAGE_SELF, & r_usage);
317 
318         return r_usage.ru_maxrss;
319 #elif (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
320         /* Linux: ru_maxrss gives the size in kilobytes  */
321         struct rusage r_usage;
322         getrusage(RUSAGE_SELF, & r_usage);
323         return r_usage.ru_maxrss * 1024;
324 #else
325         MEMORYSTATUSEX status;
326         status.dwLength = sizeof(status);
327         GlobalMemoryStatusEx(&status);
328         return (size_t)(status.ullTotalPhys - status.ullAvailPhys);
329 #endif
330     }
331     catch(exception& e) {
332         m->errorOut(e, "Utils", "getMemoryUsed");
333         exit(1);
334     }
335 }
336 /*********************************************************************************************/
getTotalRAM()337 double Utils::getTotalRAM() {
338     try {
339 
340 #if defined NON_WINDOWS
341 #if defined _SC_PHYS_PAGES && defined _SC_PAGESIZE
342         /* This works on linux-gnu, solaris2 and cygwin.  */
343         double pages = sysconf (_SC_PHYS_PAGES);
344         double pagesize = sysconf (_SC_PAGESIZE);
345         if (0 <= pages && 0 <= pagesize)
346             return pages * pagesize;
347 #else
348         m->mothurOut("[WARNING]: Cannot determine amount of RAM");
349 #endif
350 
351 #elif defined (_WIN32)
352         MEMORYSTATUSEX status;
353         status.dwLength = sizeof(status);
354         GlobalMemoryStatusEx(&status);
355         return (size_t)status.ullTotalPhys;
356 #else
357         struct sysinfo si;
358         if (sysinfo(&si))
359             mothurOut("[WARNING]: Cannot determine amount of RAM");
360         return si.totalram * si.mem_unit;
361 
362 #endif
363         return 0;
364     }
365     catch(exception& e) {
366         m->errorOut(e, "Utils", "getTotalRAM");
367         exit(1);
368     }
369 }
370 //********************************************************************/
reverseOligo(string oligo)371 string Utils::reverseOligo(string oligo){
372     try {
373         string reverse = "";
374 
375         for(int i=oligo.length()-1;i>=0;i--){
376 
377             if(oligo[i] == 'A')		{	reverse += 'T';	}
378             else if(oligo[i] == 'T'){	reverse += 'A';	}
379             else if(oligo[i] == 'U'){	reverse += 'A';	}
380 
381             else if(oligo[i] == 'G'){	reverse += 'C';	}
382             else if(oligo[i] == 'C'){	reverse += 'G';	}
383 
384             else if(oligo[i] == 'R'){	reverse += 'Y';	}
385             else if(oligo[i] == 'Y'){	reverse += 'R';	}
386 
387             else if(oligo[i] == 'M'){	reverse += 'K';	}
388             else if(oligo[i] == 'K'){	reverse += 'M';	}
389 
390             else if(oligo[i] == 'W'){	reverse += 'W';	}
391             else if(oligo[i] == 'S'){	reverse += 'S';	}
392 
393             else if(oligo[i] == 'B'){	reverse += 'V';	}
394             else if(oligo[i] == 'V'){	reverse += 'B';	}
395 
396             else if(oligo[i] == 'D'){	reverse += 'H';	}
397             else if(oligo[i] == 'H'){	reverse += 'D';	}
398 
399             else						{	reverse += 'N';	}
400         }
401 
402 
403         return reverse;
404     }
405     catch(exception& e) {
406         m->errorOut(e, "Utils", "reverseOligo");
407         exit(1);
408     }
409 }
410 
411 /*********************************************************************************************/
fileExists(string name)412 bool Utils::fileExists(string name)  {
413     try {
414         bool fExists = false;
415         name = getFullPathName(name);
416 
417 #if defined USE_BOOST
418 
419            boost::filesystem::path p(name.c_str());
420 
421            if (exists(p)) {
422               if (is_regular_file(p)) { fExists = true; } // is path p a regular file?
423            }
424 #else
425 
426     #if defined NON_WINDOWS
427         ifstream in; openInputFile(name, in, "");
428 
429         //if this file exists
430         if (in) { in.close(); fExists = true;  }
431     #else
432 
433         DWORD attributes = GetFileAttributes(name.c_str());
434         fExists = (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
435     #endif
436 
437 #endif
438 
439         return fExists;
440     }
441     catch(exception& e) {
442         m->errorOut(e, "Utils", "fileExists");
443         exit(1);
444     }
445 }
446 /***********************************************************************/
getFullPathName(string fileName)447 string Utils::getFullPathName(string fileName){
448     try{
449         string path = hasPath(fileName);
450         string newFileName;
451         int pos;
452         vector<string> dirs;
453         int index = 0;
454 
455         if (path == "") { return fileName; } //its a simple name
456         else { //we need to complete the pathname
457             // ex. ../../../filename
458             // cwd = /user/work/desktop
459             //get current working directory
460             string cwd = currentWorkingDirectory;
461 
462             if (path.find("~") != string::npos) { //go to home directory
463                 newFileName = homePath + fileName.substr(fileName.find("~")+1);
464                 return newFileName;
465             }else { //find path
466                 string pattern = ".";  pattern += PATH_SEPARATOR;
467                 if (path.rfind(pattern) == string::npos) { return fileName; } //already complete name
468                 else { newFileName = fileName.substr(fileName.rfind(pattern)+2); } //save the complete part of the name
469 
470                 if (cwd == "") {
471                     char *cwdpath = NULL; cwdpath = getcwd(NULL, 0); // or _getcwd
472                     if (cwdpath != NULL)    { cwd = cwdpath;    }
473                     else                    { cwd = "";         }
474                     currentWorkingDirectory = cwd;
475                 }
476                 //rip off first '/'
477                 string simpleCWD = cwd;
478 #if defined NON_WINDOWS
479                 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
480 #endif
481                 //break apart the current working directory
482                 while (simpleCWD.find_first_of(PATH_SEPARATOR) != string::npos) {
483                     string dir = simpleCWD.substr(0,simpleCWD.find_first_of(PATH_SEPARATOR));
484                     simpleCWD = simpleCWD.substr(simpleCWD.find_first_of(PATH_SEPARATOR)+1, simpleCWD.length());
485                     dirs.push_back(dir);
486                 }
487                 //get last one              // ex. ../../../filename = /user/work/desktop/filename
488                 dirs.push_back(simpleCWD);  //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
489 
490                 index = dirs.size()-1;
491                 string searchString = "."; searchString += PATH_SEPARATOR;
492                 while((pos = path.rfind(searchString)) != string::npos) { //while you don't have a complete path
493                     if (pos == 0) { break;  //you are at the end
494                     }else if (path[(pos-1)] == '.') { //you want your parent directory ../
495                         path = path.substr(0, pos-1);
496                         index--;
497                         if (index == 0) {  break; }
498                     }else if (path[(pos-1)] == '/') { //you want the current working dir ./
499                         path = path.substr(0, pos);
500                     }else if (pos == 1) { break;  //you are at the end
501                     }else { m->mothurOut("[ERROR}: Can not resolve path for " +  fileName + "\n"); m->setControl_pressed(true); return fileName;  }
502                 }
503             }
504 
505             for (int i = index; i >= 0; i--) { newFileName = dirs[i] +  PATH_SEPARATOR + newFileName; }
506 
507 #if defined NON_WINDOWS
508             newFileName =  PATH_SEPARATOR +  newFileName;
509 #endif
510             return newFileName;
511         }
512     }
513     catch(exception& e) {
514         m->errorOut(e, "Utils", "getFullPathName");
515         exit(1);
516     }
517 }
518 /********************************************************************/
findProgramPath(string programName)519 string Utils::findProgramPath(string programName){
520     try {
521         //look in ./
522         //is this the programs path?
523         string tempIn = ".";
524         tempIn += PATH_SEPARATOR;
525 
526         //if this file exists
527         string pPath = "";
528         if (fileExists(tempIn+programName)) { pPath = getFullPathName(tempIn); if (m->getDebug()) { m->mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } return pPath;   }
529 
530         if (m->getDebug()) { m->mothurOut("[DEBUG]: dir's in path: \n"); }
531 
532         //get path related to mothur
533         for (int i = 0; i < paths.size(); i++) {
534 
535             if (m->getDebug()) { m->mothurOut("[DEBUG]: " + paths[i] + "\n"); }
536 
537             //to lower so we can find it
538             string tempLower = "";
539             for (int j = 0; j < paths[i].length(); j++) {  tempLower += tolower(paths[i][j]);  }
540 
541             //is this mothurs path?
542             if (tempLower.find(programName) != -1) {  pPath = paths[i]; break;  }
543         }
544 
545         if (m->getDebug()) { m->mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
546 
547         //add programName so it looks like what argv would look like
548         if (pPath != "") { pPath += PATH_SEPARATOR;  }
549         else {
550             //okay programName is not in the path, so the folder programName is in must be in the path
551             //lets find out which one
552 
553             //get path related to the program
554             for (int i = 0; i < paths.size(); i++) {
555 
556                 if (m->getDebug()) { m->mothurOut("[DEBUG]: looking in " + paths[i] + " for " + programName + " \n"); }
557 
558                 //is this the programs path?
559                 string tempIn = paths[i] + PATH_SEPARATOR;
560 
561                 //if this file exists
562                 if (fileExists(tempIn + programName)) { pPath = getFullPathName(tempIn); if (m->getDebug()) { m->mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break;   }
563             }
564         }
565 
566 #if defined NON_WINDOWS
567 #else
568         if (pPath == "") {
569             char buffer[MAX_PATH];
570             GetModuleFileName(NULL, buffer, MAX_PATH) ;
571 
572             pPath = buffer;
573             pPath = getPathName(pPath);
574 
575             //if this file exists
576             if (fileExists(pPath + programName)) { pPath = getFullPathName(pPath); if (m->getDebug()) { m->mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } }
577         }
578 #endif
579         return pPath;
580     }
581     catch(exception& e) {
582         m->errorOut(e, "Utils", "findProgramPath");
583         exit(1);
584     }
585 }
586 /***********************************************************************/
checkLocations(string & filename,vector<string> locations)587 bool Utils::checkLocations(string& filename, vector<string> locations){
588     try {
589         filename = getFullPathName(filename);
590         string inputDir = locations[0];
591         string outputDir = locations[1];
592         string defaultPath = locations[2];
593         string mothurPath = locations[3];
594         string mothurToolsPath = locations[4];
595 
596         bool ableToOpen;
597         ifstream in;
598         ableToOpen = openInputFile(filename, in, "noerror");
599         in.close();
600 
601         //if you can't open it, try input location
602         if (!ableToOpen) {
603             if (inputDir != "") { //default path is set
604                 string tryPath = inputDir + getSimpleName(filename);
605                 m->mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath + ".\n");
606                 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
607                 filename = tryPath;
608             }
609         }
610 
611         //if you can't open it, try output location
612         if (!ableToOpen) {
613             if (outputDir != "") { //default path is set
614                 string tryPath = outputDir + getSimpleName(filename);
615                 m->mothurOut("Unable to open " + filename + ". Trying output directory " + tryPath+ ".\n");
616                 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
617                 filename = tryPath;
618             }
619         }
620 
621 
622         //if you can't open it, try default location
623         if (!ableToOpen) {
624             if (defaultPath != "") { //default path is set
625                 string tryPath = defaultPath + getSimpleName(filename);
626                 m->mothurOut("Unable to open " + filename + ". Trying default " + tryPath+ ".\n");
627                 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
628                 filename = tryPath;
629             }
630         }
631 
632         //if you can't open it its not in current working directory or inputDir, try mothur excutable location
633         if (!ableToOpen) {
634             string tryPath = mothurPath + getSimpleName(filename);
635             m->mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath+ ".\n");
636             ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
637             filename = tryPath;
638         }
639 
640         //if you can't open it its not in current working directory or inputDir, try mothur excutable location
641         if (!ableToOpen) {
642             string tryPath = mothurToolsPath + getSimpleName(filename);
643             m->mothurOut("Unable to open " + filename + ". Trying mothur's tools location " + tryPath+ ".\n");
644             ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
645             filename = tryPath;
646         }
647 
648         if (!ableToOpen) { m->mothurOut("Unable to open " + filename + ".\n");  return false;  }
649 
650         return true;
651     }
652     catch(exception& e) {
653         m->errorOut(e, "Utils", "checkLocations");
654         exit(1);
655     }
656 }
657 /***********************************************************************/
checkLocations(string & filename,vector<string> locations,string silent)658 bool Utils::checkLocations(string& filename, vector<string> locations, string silent){
659     try {
660         filename = getFullPathName(filename);
661         string inputDir = locations[0];
662         string outputDir = locations[1];
663         string defaultPath = locations[2];
664         string mothurPath = locations[3];
665         string mothurToolsPath = locations[4];
666 
667         bool ableToOpen;
668         ifstream in;
669         ableToOpen = openInputFile(filename, in, "noerror");
670         in.close();
671 
672         //if you can't open it, try input location
673         if (!ableToOpen) {
674             if (inputDir != "") { //default path is set
675                 string tryPath = inputDir + getSimpleName(filename);
676                 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
677                 filename = tryPath;
678             }
679         }
680 
681         //if you can't open it, try output location
682         if (!ableToOpen) {
683             if (outputDir != "") { //default path is set
684                 string tryPath = outputDir + getSimpleName(filename);
685                 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
686                 filename = tryPath;
687             }
688         }
689 
690 
691         //if you can't open it, try default location
692         if (!ableToOpen) {
693             if (defaultPath != "") { //default path is set
694                 string tryPath = defaultPath + getSimpleName(filename);
695                 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
696                 filename = tryPath;
697             }
698         }
699 
700         //if you can't open it its not in current working directory or inputDir, try mothur excutable location
701         if (!ableToOpen) {
702             string tryPath = mothurPath + getSimpleName(filename);
703             ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
704             filename = tryPath;
705         }
706 
707         //if you can't open it its not in current working directory or inputDir, try mothur excutable location
708         if (!ableToOpen) {
709             if (mothurToolsPath != "") { //default path is set
710                 string tryPath = mothurToolsPath + getSimpleName(filename);
711                 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
712                 filename = tryPath;
713             }
714         }
715 
716         if (!ableToOpen) { return false;  }
717 
718         return true;
719     }
720     catch(exception& e) {
721         m->errorOut(e, "Utils", "checkLocations");
722         exit(1);
723     }
724 }
725 /***********************************************************************/
findBlastLocation(string & toolLocation,string mothurProgramPath,vector<string> locations)726 bool Utils::findBlastLocation(string& toolLocation, string mothurProgramPath, vector<string> locations){
727     try {
728         bool foundTool = false;
729         string programName = "formatdb"; programName += EXECUTABLE_EXT;
730 
731         toolLocation = "";
732         string blastBin = "blast"; blastBin += PATH_SEPARATOR; blastBin += "bin"; blastBin += PATH_SEPARATOR;
733 
734         for (int i = 0; i < locations.size(); i++) { locations[i] += blastBin; }
735 
736         vector<string> versionOutputs;
737         foundTool = findTool(programName, toolLocation, mothurProgramPath, versionOutputs, locations);
738 
739         if (foundTool) { toolLocation = hasPath(toolLocation); }
740         else { toolLocation = ""; }
741 
742         return foundTool;
743     }
744     catch(exception& e) {
745         m->errorOut(e, "Utils", "findBlastLocation");
746         exit(1);
747     }
748 }
749 /***********************************************************************/
findTool(string & toolName,string & toolLocation,string mothurProgramPath,vector<string> & versionOutputs,vector<string> locations)750 bool Utils::findTool(string& toolName, string& toolLocation, string mothurProgramPath, vector<string>& versionOutputs, vector<string> locations){
751     try {
752         bool foundTool = false;
753         string toolCommand = mothurProgramPath + toolName; //windows def
754 
755         //test to make sure tool exists
756         ifstream in;
757         toolCommand = getFullPathName(toolCommand);
758         bool ableToOpen = openInputFile(toolCommand, in, "no error"); in.close();
759         if(!ableToOpen) {
760 
761             if (checkLocations(toolCommand, locations)) { toolLocation = toolCommand; foundTool = true; }
762             else {
763 
764             m->mothurOut(toolCommand + " file does not exist. Checking path... \n");
765             //check to see if tool is in the path??
766 
767             ifstream in2;
768             string uLocation = findProgramPath(toolName);
769             uLocation += toolName;
770             ableToOpen = openInputFile(uLocation, in2, "no error"); in2.close();
771 
772             if(!ableToOpen) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the " + toolName + " executable.\n");  foundTool = false; }
773             else {  m->mothurOut("Found " + toolName + " in your path, using " + uLocation + "\n"); toolLocation = uLocation; foundTool = true; }
774             }
775         }else {  toolLocation = toolCommand; foundTool = true;  }
776 
777         toolLocation = getFullPathName(toolLocation);
778 
779         if (foundTool) { //check fasterq_dump version
780             string versionTestCommand = toolLocation + " --version > ./commandScreen.output 2>&1";
781             system(versionTestCommand.c_str());
782 
783             ifstream in;
784             string versionOutput = "./commandScreen.output";
785             openInputFile(versionOutput, in, "no error");
786 
787             string output = getline(in); gobble(in);
788             versionOutputs = splitWhiteSpace(output);
789             in.close();
790 
791             mothurRemove(versionOutput);
792         }
793 
794         return foundTool;
795     }
796     catch(exception& e) {
797         m->errorOut(e, "Utils", "findTool");
798         exit(1);
799     }
800 }
801 /***********************************************************************/
trimStringEnd(string name,int numToRemove)802 string Utils::trimStringEnd(string name, int numToRemove){
803     try {
804         int length = name.length();
805         string trimmedName = "";
806 
807         if (length > numToRemove) { trimmedName = name.substr(0, (length-numToRemove)); }
808 
809         return trimmedName;
810     }
811     catch(exception& e) {
812         m->errorOut(e, "Utils", "trimString");
813         exit(1);
814     }
815 }
816 /***********************************************************************/
817 
openInputFile(string fileName,ifstream & fileHandle,string mode)818 bool Utils::openInputFile(string fileName, ifstream& fileHandle, string mode){
819     try {
820         //get full path name
821         string completeFileName = getFullPathName(fileName);
822 
823         fileHandle.open(completeFileName.c_str());
824         if(!fileHandle) { return false;  }
825         else {
826             //check for blank file
827             zapGremlins(fileHandle);
828             gobble(fileHandle);
829             return true;
830         }
831     }
832     catch(exception& e) {
833         m->errorOut(e, "Utils", "openInputFile - no Error");
834         exit(1);
835     }
836 }
837 /***********************************************************************/
838 
openInputFile(string fileName,ifstream & fileHandle)839 bool Utils::openInputFile(string fileName, ifstream& fileHandle){
840     try {
841 
842         //get full path name
843         string completeFileName = getFullPathName(fileName);
844 
845         fileHandle.open(completeFileName.c_str());
846         if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n"); return false; }
847         else {
848             //check for blank file
849             zapGremlins(fileHandle);
850             gobble(fileHandle);
851             if (fileHandle.eof()) { m->mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct.\n");   }
852             return true;
853         }
854     }
855     catch(exception& e) {
856         m->errorOut(e, "Utils", "openInputFile");
857         exit(1);
858     }
859 }
860 /***********************************************************************/
openInputFileBinary(string fileName,ifstream & fileHandle)861 bool Utils::openInputFileBinary(string fileName, ifstream& fileHandle){
862     try {
863 
864         //get full path name
865         string completeFileName = getFullPathName(fileName);
866 
867         fileHandle.open(completeFileName.c_str(), ios::binary);
868         if(!fileHandle) {
869             m->mothurOut("[ERROR]: Could not open " + completeFileName+ "\n");  return false; }
870         else {
871             if (fileHandle.eof()) { m->mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct.\n");  }
872             return true;
873         }
874     }
875     catch(exception& e) {
876         m->errorOut(e, "Utils", "openInputFileBinary");
877         exit(1);
878     }
879 }
880 /***********************************************************************/
openInputFileBinary(string fileName,ifstream & fileHandle,string noerror)881 bool Utils::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){
882     try {
883 
884         //get full path name
885         string completeFileName = getFullPathName(fileName);
886 
887         fileHandle.open(completeFileName.c_str(), ios::binary);
888         if(!fileHandle) { return false; }
889         else { return true;  }
890     }
891     catch(exception& e) {
892         m->errorOut(e, "Utils", "openInputFileBinary - no error");
893         exit(1);
894     }
895 }
896 /***********************************************************************/
897 #ifdef USE_BOOST
openInputFileBinary(string fileName,ifstream & file,boost::iostreams::filtering_istream & in)898 bool Utils::openInputFileBinary(string fileName, ifstream& file, boost::iostreams::filtering_istream& in){
899     try {
900 
901         //get full path name
902         string completeFileName = getFullPathName(fileName);
903 
904         file.open(completeFileName.c_str(), ios_base::in | ios_base::binary);
905 
906         if(!file) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n"); return false; }
907         else {
908             //check for blank file
909             in.push(boost::iostreams::gzip_decompressor());
910             in.push(file);
911             if (file.eof()) { m->mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct.\n");  }
912             return true;
913         }
914     }
915     catch(exception& e) {
916         m->errorOut(e, "Utils", "openInputFileGZBinary");
917         exit(1);
918     }
919 }
920 /***********************************************************************/
openInputFileBinary(string fileName,ifstream & file,boost::iostreams::filtering_istream & in,string noerror)921 bool Utils::openInputFileBinary(string fileName, ifstream& file, boost::iostreams::filtering_istream& in, string noerror){
922     try {
923 
924         //get full path name
925         string completeFileName = getFullPathName(fileName);
926 
927         file.open(completeFileName.c_str(), ios_base::in | ios_base::binary);
928 
929         if(!file) { return false; }
930         else { //check for blank file
931             in.push(boost::iostreams::gzip_decompressor());
932             in.push(file);
933             return true;
934         }
935     }
936     catch(exception& e) {
937         m->errorOut(e, "Utils", "openInputFileGZBinary - no error");
938         exit(1);
939     }
940 }
941 #endif
942 /***********************************************************************/
943 //results[0] = allGZ, results[1] = allNotGZ
allGZFiles(vector<string> & files)944 vector<bool> Utils::allGZFiles(vector<string> & files){
945     try {
946         vector<bool> results;
947         bool allGZ = true;
948         bool allNOTGZ = true;
949 
950         for (int i = 0; i < files.size(); i++) {
951             if (m->getControl_pressed()) { break; }
952 
953             //ignore none and blank filenames
954             if ((files[i] != "") || (files[i] != "NONE")) {
955                 if (isGZ(files[i])[1]) { allNOTGZ = false;  }
956                 else {  allGZ = false;  }
957             }
958         }
959 
960         if (!allGZ && !allNOTGZ) { //mixed bag
961             m->mothurOut("[ERROR]: Cannot mix .gz and non compressed files. Please decompress your files and rerun.\n"); m->setControl_pressed(true);
962         }
963 
964         results.push_back(allGZ);
965         results.push_back(allNOTGZ);
966 
967         return results;
968     }
969     catch(exception& e) {
970         m->errorOut(e, "Utils", "areGZFiles");
971         exit(1);
972     }
973 }
974 /***********************************************************************/
975 //returns false if no api installed
isHDF5(string filename)976 bool Utils::isHDF5(string filename){
977     try {
978         bool result = false;
979 
980         #ifdef USE_HDF5
981             if(!H5::H5File::isHdf5(filename.c_str())){
982                 //m->mothurOut("[WARNING]: " + filename + " is not an HDF5 file.\n");
983                 return false;
984             }else { return true; }
985         #else
986             return false;
987         #endif
988 
989         return result;
990     }
991     catch(exception& e) {
992         m->errorOut(e, "Utils", "isHDF5");
993         exit(1);
994     }
995 }
996 /***********************************************************************/
isGZ(string filename)997 vector<bool> Utils::isGZ(string filename){
998     try {
999         vector<bool> results; results.resize(2, false);
1000 #ifdef USE_BOOST
1001         ifstream fileHandle;
1002         boost::iostreams::filtering_istream gzin;
1003 
1004         if ((getExtension(filename) != ".gz") && (getExtension(filename) != ".GZ")) { return results; } // results[0] = false; results[1] = false;
1005 
1006         bool ableToOpen = openInputFileBinary(filename, fileHandle, gzin, ""); //no error
1007         if (!ableToOpen) { return results; } // results[0] = false; results[1] = false;
1008         else {  results[0] = true;  }
1009 
1010         char c;
1011         try
1012         {
1013             gzin >> c;
1014             results[1] = true;
1015         }
1016         catch ( boost::iostreams::gzip_error & e )
1017         {
1018             gzin.pop();
1019             fileHandle.close();
1020             return results;  // results[0] = true; results[1] = false;
1021         }
1022         fileHandle.close();
1023 #else
1024         m->mothurOut("[ERROR]: cannot test for gz format without enabling boost libraries.\n"); m->setControl_pressed(true);
1025 #endif
1026         return results; //results[0] = true; results[1] = true;
1027     }
1028     catch(exception& e) {
1029         m->errorOut(e, "Utils", "isGZ");
1030         exit(1);
1031     }
1032 }
1033 
1034 /***********************************************************************/
1035 
renameFile(string oldName,string newName)1036 int Utils::renameFile(string oldName, string newName){
1037     try {
1038         if(m->getDebug()) { m->mothurOut("[DEBUG]: renaming " + oldName + " to " + newName + "\n"); }
1039 
1040         if (oldName == newName) { return 0; }
1041 
1042         ifstream inTest;
1043         bool exist = openInputFile(newName, inTest, "");
1044         inTest.close();
1045 
1046 #if defined NON_WINDOWS
1047         if (exist) { //you could open it so you want to delete it
1048             if(m->getDebug()) { m->mothurOut("[DEBUG]: removing old copy of " + newName + "\n"); }
1049             mothurRemove(newName);
1050         }
1051 
1052         int renameOk = rename(oldName.c_str(), newName.c_str());
1053 
1054         if(m->getDebug()) { m->mothurOut("[DEBUG]: rename " + oldName + " " + newName + " returned " + toString(renameOk) + "\n"); }
1055         /*
1056         if(m->getDebug()) { m->mothurOut("[DEBUG]: mv " + oldName + " to " + newName + "\n"); }
1057 
1058         string command = "mv " + oldName + " " + newName;
1059 
1060         if(m->getDebug()) { m->mothurOut("[DEBUG]: running system command mv " + oldName + " " + newName + "\n"); }
1061 
1062         int returnCode = system(command.c_str());
1063 
1064         if(m->getDebug()) { m->mothurOut("[DEBUG]: system command mv " + oldName + " " + newName + " returned " + toString(returnCode) + "\n"); }
1065 
1066         if (returnCode != 0) {
1067             int renameOk = rename(oldName.c_str(), newName.c_str());
1068 
1069             if(m->getDebug()) { m->mothurOut("[DEBUG]: rename " + oldName + " " + newName + " returned " + toString(renameOk) + "\n"); }
1070         }
1071          */
1072 #else
1073         mothurRemove(newName);
1074         int renameOk = rename(oldName.c_str(), newName.c_str());
1075 
1076         if(m->getDebug()) { m->mothurOut("[DEBUG]: rename " + oldName + " " + newName + " returned " + toString(renameOk) + "\n"); }
1077 #endif
1078         return 0;
1079 
1080     }
1081     catch(exception& e) {
1082         m->errorOut(e, "Utils", "renameFile");
1083         exit(1);
1084     }
1085 }
1086 /***********************************************************************/
1087 
copyFile(string oldName,string newName)1088 int Utils::copyFile(string oldName, string newName){
1089     try {
1090         if(m->getDebug()) { m->mothurOut("[DEBUG]: renaming " + oldName + " to " + newName + "\n"); }
1091 
1092         if (oldName == newName) { return 0; }
1093 
1094         ifstream inTest;
1095         bool exist = openInputFile(newName, inTest, "");
1096         inTest.close();
1097 
1098 #if defined NON_WINDOWS
1099         if (exist) { //you could open it so you want to delete it
1100             if(m->getDebug()) { m->mothurOut("[DEBUG]: removing old copy of " + newName + "\n"); }
1101             mothurRemove(newName);
1102         }
1103         appendFiles(oldName, newName);
1104         //if(m->getDebug()) { m->mothurOut("[DEBUG]: cp " + oldName + " to " + newName + "\n"); }
1105 
1106         //string command = "cp " + oldName + " " + newName;
1107 
1108         //if(m->getDebug()) { m->mothurOut("[DEBUG]: running system command cp " + oldName + " " + newName + "\n"); }
1109 
1110         //int returnCode = system(command.c_str());
1111 
1112        // if(m->getDebug()) { m->mothurOut("[DEBUG]: system command cp " + oldName + " " + newName + " returned " + toString(returnCode) + "\n"); }
1113 #else
1114         mothurRemove(newName);
1115         appendFiles(oldName, newName);
1116 #endif
1117         return 0;
1118 
1119     }
1120     catch(exception& e) {
1121         m->errorOut(e, "Utils", "copyFile");
1122         exit(1);
1123     }
1124 }
1125 
1126 /***********************************************************************/
1127 
openOutputFile(string fileName,ofstream & fileHandle)1128 bool Utils::openOutputFile(string fileName, ofstream& fileHandle){
1129     try {
1130         string completeFileName = getFullPathName(fileName);
1131         fileHandle.open(completeFileName.c_str(), ios::trunc);
1132 
1133         if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n"); return false; }
1134         else { return true; }
1135     }
1136     catch(exception& e) {
1137         m->errorOut(e, "Utils", "openOutputFile");
1138         exit(1);
1139     }
1140 
1141 }
1142 /***********************************************************************/
1143 
openOutputFileBinary(string fileName,ofstream & fileHandle)1144 bool Utils::openOutputFileBinary(string fileName, ofstream& fileHandle){
1145     try {
1146         string completeFileName = getFullPathName(fileName);
1147         fileHandle.open(completeFileName.c_str(), ios::trunc | ios::binary);
1148 
1149         if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n");  return false; }
1150         else { return true; }
1151     }
1152     catch(exception& e) {
1153         m->errorOut(e, "Utils", "openOutputFileBinary");
1154         exit(1);
1155     }
1156 }
1157 /**************************************************************************************************/
appendFiles(string temp,string filename)1158 int Utils::appendFiles(string temp, string filename) {
1159     try{
1160         ofstream output;
1161         ifstream input;
1162 
1163         //open output file in append mode
1164         openOutputFileBinaryAppend(filename, output);
1165         bool ableToOpen = openInputFileBinary(temp, input, "no error");
1166 
1167         int numLines = 0;
1168         if (ableToOpen) { //you opened it
1169             char buffer[4096];
1170             while (!input.eof()) {
1171                 input.read(buffer, 4096);
1172                 output.write(buffer, input.gcount());
1173                 //count number of lines
1174                 for (int i = 0; i < input.gcount(); i++) {  if (buffer[i] == '\n') {numLines++;} }
1175             }
1176             input.close();
1177         }
1178         output.close();
1179 
1180         return numLines;
1181     }
1182     catch(exception& e) {
1183         m->errorOut(e, "Utils", "appendFiles");
1184         exit(1);
1185     }
1186 }
1187 /**************************************************************************************************/
appendFiles(string filename,ofstream & out)1188 void Utils::appendFiles(string filename, ofstream& out) {
1189     try{
1190         ifstream input;
1191         bool ableToOpen = openInputFileBinary(filename, input, "no error");
1192 
1193         if (ableToOpen) { //you opened it
1194             char buffer[4096];
1195             while (!input.eof()) {
1196                 if (m->getControl_pressed()) { break; }
1197                 input.read(buffer, 4096);
1198                 out.write(buffer, input.gcount());
1199             }
1200             input.close();
1201         }
1202     }
1203     catch(exception& e) {
1204         m->errorOut(e, "Utils", "appendFiles");
1205         exit(1);
1206     }
1207 }
1208 
1209 /**************************************************************************************************/
appendFilesFront(string temp,string filename)1210 int Utils::appendFilesFront(string temp, string filename) {
1211     try{
1212         ofstream output;
1213         ifstream input;
1214 
1215         //open output file in append mode
1216         openOutputFileBinaryAppend(temp, output);
1217         bool ableToOpen = openInputFileBinary(filename, input, "no error");
1218 
1219         if (ableToOpen) { //you opened it
1220             char buffer[4096];
1221             while (!input.eof()) {
1222                 input.read(buffer, 4096);
1223                 output.write(buffer, input.gcount());
1224             }
1225             input.close();
1226         }
1227         output.close();
1228 
1229         mothurRemove(filename);
1230         renameFile(temp, filename);
1231         mothurRemove(temp);
1232 
1233         return 0;
1234     }
1235     catch(exception& e) {
1236         m->errorOut(e, "Utils", "appendFiles");
1237         exit(1);
1238     }
1239 }
1240 
1241 /**************************************************************************************************/
appendBinaryFiles(string temp,string filename)1242 bool Utils::appendBinaryFiles(string temp, string filename) {
1243     try{
1244         ofstream output;
1245         ifstream input;
1246 
1247         //open output file in append mode
1248         openOutputFileBinaryAppend(filename, output);
1249         bool ableToOpen = openInputFileBinary(temp, input, "no error");
1250 
1251         if (ableToOpen) { //you opened it
1252 
1253             char buffer[4096];
1254             while (!input.eof()) {
1255                 input.read(buffer, 4096);
1256                 output.write(buffer, input.gcount());
1257             }
1258             input.close();
1259         }
1260         output.close();
1261 
1262         return ableToOpen;
1263     }
1264     catch(exception& e) {
1265         m->errorOut(e, "Utils", "appendBinaryFiles");
1266         exit(1);
1267     }
1268 }
1269 /**************************************************************************************************/
appendSFFFiles(string temp,string filename)1270 bool Utils::appendSFFFiles(string temp, string filename) {
1271     try{
1272         ofstream output;
1273         bool ableToOpen = true;
1274 
1275         //open output file in append mode
1276         string fullFileName = getFullPathName(filename);
1277 
1278         output.open(fullFileName.c_str(), ios::app | ios::binary);
1279         if(!output) { m->mothurOut("[ERROR]: Could not open " + fullFileName + "\n"); return false;  }
1280         else {
1281             //get full path name
1282             string completeFileName = getFullPathName(temp);
1283             ifstream input;
1284             openInputFileBinary(completeFileName, input);
1285 
1286             if(!input) { return false; }
1287             else {
1288                 char buffer[4096];
1289                 while (!input.eof()) {
1290                     input.read(buffer, 4096);
1291                     output.write(buffer, input.gcount());
1292                 }
1293                 input.close();
1294             }
1295             output.close();
1296         }
1297 
1298         return ableToOpen;
1299     }
1300     catch(exception& e) {
1301         m->errorOut(e, "Utils", "appendSFFFiles");
1302         exit(1);
1303     }
1304 }
1305 /**************************************************************************************************/
appendFilesWithoutHeaders(string temp,string filename)1306 int Utils::appendFilesWithoutHeaders(string temp, string filename) {
1307     try{
1308         ofstream output;
1309         ifstream input;
1310 
1311         //open output file in append mode
1312         openOutputFileAppend(filename, output);
1313         bool ableToOpen = openInputFile(temp, input, "no error");
1314 
1315         int numLines = 0;
1316         if (ableToOpen) { //you opened it
1317 
1318             string headers = getline(input); gobble(input);
1319             char buffer[4096];
1320             while (!input.eof()) {
1321                 input.read(buffer, 4096);
1322                 output.write(buffer, input.gcount());
1323                 //count number of lines
1324                 for (int i = 0; i < input.gcount(); i++) {  if (buffer[i] == '\n') {numLines++;} }
1325             }
1326             input.close();
1327         }
1328 
1329         output.close();
1330 
1331         return numLines;
1332     }
1333     catch(exception& e) {
1334         m->errorOut(e, "Utils", "appendFiles");
1335         exit(1);
1336     }
1337 }
1338 /**************************************************************************************************/
sortFile(string distFile,string outputDir)1339 string Utils::sortFile(string distFile, string outputDir){
1340     try {
1341 
1342         //if (outputDir == "") {  outputDir += hasPath(distFile);  }
1343         string outfile = getRootName(distFile) + "sorted.dist";
1344 
1345 
1346         //if you can, use the unix sort since its been optimized for years
1347 #if defined NON_WINDOWS
1348         string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1349         system(command.c_str());
1350 #else //you are stuck with my best attempt...
1351         //windows sort does not have a way to specify a column, only a character in the line
1352         //since we cannot assume that the distance will always be at the the same character location on each line
1353         //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1354 
1355         //read in file line by file and put distance first
1356         string tempDistFile = distFile + ".temp";
1357         ifstream input;
1358         ofstream output;
1359         openInputFile(distFile, input);
1360         openOutputFile(tempDistFile, output);
1361 
1362         string firstName, secondName;
1363         float dist;
1364         while (!input.eof()) {
1365             input >> firstName >> secondName >> dist;
1366             output << dist << '\t' << firstName << '\t' << secondName << endl;
1367             gobble(input);
1368         }
1369         input.close();
1370         output.close();
1371 
1372 
1373         //sort using windows sort
1374         string tempOutfile = outfile + ".temp";
1375         string command = "sort " + tempDistFile + " /O " + tempOutfile;
1376         system(command.c_str());
1377 
1378         //read in sorted file and put distance at end again
1379         ifstream input2;
1380         ofstream output2;
1381         openInputFile(tempOutfile, input2);
1382         openOutputFile(outfile, output2);
1383 
1384         while (!input2.eof()) {
1385             input2 >> dist >> firstName >> secondName;
1386             output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1387             gobble(input2);
1388         }
1389         input2.close();
1390         output2.close();
1391 
1392         //remove temp files
1393         mothurRemove(tempDistFile);
1394         mothurRemove(tempOutfile);
1395 #endif
1396 
1397         return outfile;
1398     }
1399     catch(exception& e) {
1400         m->errorOut(e, "Utils", "sortFile");
1401         exit(1);
1402     }
1403 }
1404 /***********************************************************************/
openOutputFileAppend(string fileName,ofstream & fileHandle)1405 bool Utils::openOutputFileAppend(string fileName, ofstream& fileHandle){
1406     try {
1407         fileName = getFullPathName(fileName);
1408 
1409         fileHandle.open(fileName.c_str(), ios::app);
1410         if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + fileName + "\n");  return false; }
1411         return true;
1412     }
1413     catch(exception& e) {
1414         m->errorOut(e, "Utils", "openOutputFileAppend");
1415         exit(1);
1416     }
1417 }
1418 /***********************************************************************/
openOutputFileBinaryAppend(string fileName,ofstream & fileHandle)1419 bool Utils::openOutputFileBinaryAppend(string fileName, ofstream& fileHandle){
1420     try {
1421         fileName = getFullPathName(fileName);
1422 
1423         fileHandle.open(fileName.c_str(), ios::app | ios::binary);
1424         if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + fileName + "\n"); return false; }
1425 
1426         return true;
1427     }
1428     catch(exception& e) {
1429         m->errorOut(e, "Utils", "openOutputFileAppend");
1430         exit(1);
1431     }
1432 }
1433 
1434 /***********************************************************************/
gobble(istream & f)1435 void Utils::gobble(istream& f){
1436     try {
1437 
1438         char d;
1439         while(isspace(d=f.get()))		{ ;}
1440         if(!f.eof()) { f.putback(d); }
1441     }
1442     catch(exception& e) {
1443         m->errorOut(e, "Utils", "gobble");
1444         exit(1);
1445     }
1446 }
1447 /***********************************************************************/
gobble(istringstream & f)1448 void Utils::gobble(istringstream& f){
1449     try {
1450         char d;
1451         while(isspace(d=f.get()))		{;}
1452         if(!f.eof()) { f.putback(d); }
1453     }
1454     catch(exception& e) {
1455         m->errorOut(e, "Utils", "gobble");
1456         exit(1);
1457     }
1458 }
1459 /***********************************************************************/
zapGremlins(istream & f)1460 void Utils::zapGremlins(istream& f){
1461     try {
1462 
1463         char d;
1464         while('\0'==(d=f.get()))		{ ;}
1465         if(!f.eof()) { f.putback(d); }
1466     }
1467     catch(exception& e) {
1468         m->errorOut(e, "Utils", "zapGremlins");
1469         exit(1);
1470     }
1471 }
1472 /***********************************************************************/
zapGremlins(istringstream & f)1473 void Utils::zapGremlins(istringstream& f){
1474     try {
1475         char d;
1476         while('\0'==(d=f.get()))		{ ;}
1477         if(!f.eof()) { f.putback(d); }
1478     }
1479     catch(exception& e) {
1480         m->errorOut(e, "Utils", "zapGremlins");
1481         exit(1);
1482     }
1483 }
1484 
1485 /***********************************************************************/
getline(istringstream & fileHandle)1486 string Utils::getline(istringstream& fileHandle) {
1487     try {
1488         string line = "";
1489         while (!fileHandle.eof())	{
1490             //get next character
1491             char c = fileHandle.get();
1492 
1493             //are you at the end of the line
1494             if ((c == '\n') || (c == '\r') || (c == '\f')){  break;	}
1495             else {		line += c;		}
1496         }
1497 
1498         return line;
1499     }
1500     catch(exception& e) {
1501         m->errorOut(e, "Utils", "getline");
1502         exit(1);
1503     }
1504 }
1505 /***********************************************************************/
getline(ifstream & fileHandle,vector<string> & headers)1506 void Utils::getline(ifstream& fileHandle, vector<string>& headers) {
1507     try {
1508         string line = getline(fileHandle);
1509         headers = splitWhiteSpace(line);
1510     }
1511     catch(exception& e) {
1512         m->errorOut(e, "Utils", "getline");
1513         exit(1);
1514     }
1515 }
1516 /***********************************************************************/
getline(ifstream & fileHandle)1517 string Utils::getline(ifstream& fileHandle) {
1518     try {
1519         string line = "";
1520         while (fileHandle)	{
1521             //get next character
1522             char c = fileHandle.get();
1523 
1524             //are you at the end of the line
1525             if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){  break;	}
1526             else {		line += c;		}
1527         }
1528 
1529         return line;
1530     }
1531     catch(exception& e) {
1532         m->errorOut(e, "Utils", "getline");
1533         exit(1);
1534     }
1535 }
1536 #ifdef USE_BOOST
1537 /***********************************************************************/
getline(boost::iostreams::filtering_istream & fileHandle)1538 string Utils::getline(boost::iostreams::filtering_istream& fileHandle) {
1539     try {
1540         string line = "";
1541         while (fileHandle)	{
1542             //get next character
1543             char c = fileHandle.get();
1544 
1545             //are you at the end of the line
1546             if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
1547             else {		line += c;		}
1548         }
1549 
1550         return line;
1551     }
1552     catch(exception& e) {
1553         m->errorOut(e, "Utils", "getline");
1554         exit(1);
1555     }
1556 }
1557 #endif
1558 /**********************************************************************/
getPathName(string longName)1559 string Utils::getPathName(string longName){
1560     try {
1561         string rootPathName = longName;
1562 
1563         if(longName.find_last_of("/\\") != longName.npos){
1564             int pos = longName.find_last_of("/\\")+1;
1565             rootPathName = longName.substr(0, pos);
1566         }
1567 
1568         return rootPathName;
1569     }
1570     catch(exception& e) {
1571         m->errorOut(e, "Utils", "getPathName");
1572         exit(1);
1573     }
1574 }
1575 /***********************************************************************/
getRootName(string longName)1576 string Utils::getRootName(string longName){
1577     try {
1578 
1579         string rootName = longName;
1580 
1581         if(rootName.find_last_of(".") != rootName.npos){
1582             int pos = rootName.find_last_of('.')+1;
1583             rootName = rootName.substr(0, pos);
1584         }
1585 
1586         return rootName;
1587     }
1588     catch(exception& e) {
1589         m->errorOut(e, "Utils", "getRootName");
1590         exit(1);
1591     }
1592 }
1593 /***********************************************************************/
1594 
getSimpleName(string longName)1595 string Utils::getSimpleName(string longName){
1596     try {
1597         string simpleName = longName;
1598 
1599         size_t found; found=longName.find_last_of("/\\");
1600 
1601         if(found != longName.npos){ simpleName = longName.substr(found+1); }
1602 
1603         return simpleName;
1604     }
1605     catch(exception& e) {
1606         m->errorOut(e, "Utils", "getSimpleName");
1607         exit(1);
1608     }
1609 }
1610 //**********************************************************************************************************************
getStringFromVector(vector<string> & list,string delim)1611 string Utils::getStringFromVector(vector<string>& list, string delim){
1612     try {
1613         string result = "";
1614 
1615         if (list.size() == 0) { return result; }
1616 
1617         result = list[0];
1618 
1619         for (int i = 1; i < list.size(); i++) {
1620             if (m->getControl_pressed()) { break;  }
1621             result += delim + list[i];
1622         }
1623 
1624         return result;
1625     }
1626     catch(exception& e) {
1627         m->errorOut(e, "Utils", "getStringFromVector");
1628         exit(1);
1629     }
1630 }
1631 //**********************************************************************************************************************
getStringFromVector(vector<int> & list,string delim)1632 string Utils::getStringFromVector(vector<int>& list, string delim){
1633     try {
1634         string result = "";
1635 
1636         if (list.size() == 0) { return result; }
1637 
1638         result = toString(list[0]);
1639 
1640         for (int i = 1; i < list.size(); i++) {
1641             if (m->getControl_pressed()) { break;  }
1642             string temp = toString(list[i]);
1643             result += delim + temp;
1644         }
1645 
1646         return result;
1647     }
1648     catch(exception& e) {
1649         m->errorOut(e, "Utils", "getStringFromVector");
1650         exit(1);
1651     }
1652 }
1653 //**********************************************************************************************************************
getSetFromList(ListVector * & list,vector<vector<string>> & otus)1654 set<string> Utils::getSetFromList(ListVector*& list, vector< vector<string> >& otus){
1655     try {
1656         set<string> results; otus.clear();
1657 
1658         if (list->getNumSeqs() == 0) { return results; }
1659 
1660         for (int i = 0; i < list->getNumBins(); i++) {
1661             if (m->getControl_pressed()) { break;  }
1662 
1663             string thisBin = list->get(i);
1664             vector<string> binNames; splitAtComma(thisBin, binNames);
1665 
1666             otus.push_back(binNames);
1667 
1668             for (int j = 0; j < binNames.size(); j++) { results.insert(binNames[j]); }
1669         }
1670 
1671         return results;
1672     }
1673     catch(exception& e) {
1674         m->errorOut(e, "Utils", "getSetFromList");
1675         exit(1);
1676     }
1677 }
1678 //**********************************************************************************************************************
getStringFromVector(vector<double> & list,string delim)1679 string Utils::getStringFromVector(vector<double>& list, string delim){
1680     try {
1681         string result = "";
1682 
1683         if (list.size() == 0) { return result; }
1684 
1685         result = toString(list[0]);
1686 
1687         for (int i = 1; i < list.size(); i++) {
1688             if (m->getControl_pressed()) { break;  }
1689             string temp = toString(list[i]);
1690             result += delim + temp;
1691         }
1692 
1693         return result;
1694     }
1695     catch(exception& e) {
1696         m->errorOut(e, "Utils", "getStringFromVector");
1697         exit(1);
1698     }
1699 }
1700 //**********************************************************************************************************************
getStringFromSet(set<int> & list,string delim)1701 string Utils::getStringFromSet(set<int>& list, string delim){
1702     try {
1703         string result = "";
1704 
1705         if (list.size() == 0) { return result; }
1706 
1707         vector<int> vlist;
1708         for (set<int>::iterator it = list.begin(); it != list.end(); it++) {
1709             if (m->getControl_pressed()) { break;  }
1710             int value = *it;
1711             vlist.push_back(value);
1712         }
1713         result = getStringFromVector(vlist, delim);
1714 
1715         return result;
1716     }
1717     catch(exception& e) {
1718         m->errorOut(e, "Utils", "getStringFromVector");
1719         exit(1);
1720     }
1721 }
1722 //**********************************************************************************************************************
getStringFromSet(set<string> & list,string delim)1723 string Utils::getStringFromSet(set<string>& list, string delim){
1724     try {
1725         string result = "";
1726 
1727         if (list.size() == 0) { return result; }
1728 
1729         vector<string> vlist;
1730         for (set<string>::iterator it = list.begin(); it != list.end(); it++) {
1731             if (m->getControl_pressed()) { break;  }
1732             vlist.push_back(*it);
1733         }
1734         result = getStringFromVector(vlist, delim);
1735 
1736         return result;
1737     }
1738     catch(exception& e) {
1739         m->errorOut(e, "Utils", "getStringFromVector");
1740         exit(1);
1741     }
1742 }
1743 //**********************************************************************************************************************
1744 //NOTE: assumes questions.size() == qanswers.size(), issues.size() == ianswers.size(), howtos.size() == hanswers.size()
getFormattedHelp(vector<string> questions,vector<string> qanswers,vector<string> issues,vector<string> ianswers,vector<string> howtos,vector<string> hanswers)1745 string Utils::getFormattedHelp(vector<string> questions, vector<string> qanswers, vector<string> issues, vector<string> ianswers, vector<string> howtos,vector<string> hanswers) {
1746     try {
1747 
1748         string commonQuestions = ""; vector<string> headers;
1749         string header = "\nCommon Questions: \n"; headers.push_back(header);
1750         header = "\nCommon Issues: \n"; headers.push_back(header);
1751         header = "\nHow To: \n"; headers.push_back(header);
1752 
1753         commonQuestions += headers[0]+"\n";
1754 #if defined NON_WINDOWS
1755         cout << BOLDGREEN << headers[0]; cout << RESET << endl;
1756 #endif
1757 
1758         for (int i = 0; i < questions.size(); i++) {
1759             commonQuestions += toString(i+1) + ". " + questions[i]+"\n"+qanswers[i]+"\n";
1760 #if defined NON_WINDOWS
1761             cout << BOLDBLUE << toString(i+1)+". "+questions[i]; cout << RESET << endl << qanswers[i] << endl;
1762 #endif
1763         }
1764 
1765         if (questions.size() == 0) {
1766             commonQuestions += "Can't find your question? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1767 #if defined NON_WINDOWS
1768             cout << RESET "Can't find your question? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1769 #endif
1770 
1771         }
1772 
1773         commonQuestions += headers[1]+"\n";
1774 #if defined NON_WINDOWS
1775         cout << BOLDGREEN << headers[1]; cout << RESET << endl;
1776 #endif
1777 
1778         for (int i = 0; i < issues.size(); i++) {
1779             commonQuestions += toString(i+1)+". "+issues[i]+"\n"+ianswers[i]+"\n";
1780 #if defined NON_WINDOWS
1781             cout << BOLDBLUE << toString(i+1)+". "+issues[i]; cout << RESET << endl << ianswers[i] << endl;
1782 #endif
1783         }
1784 
1785         if (issues.size() == 0) {
1786             commonQuestions += "Can't find your issue? Please feel free to ask questions on our forum, https://forum.mothur.org or send bug reports to mothur.bugs@gmail.com.\n\n";
1787 #if defined NON_WINDOWS
1788             cout << RESET "Can't find your issue? Please feel free to ask questions on our forum, https://forum.mothur.org or send bug reports to mothur.bugs@gmail.com.\n\n";
1789 #endif
1790 
1791         }
1792 
1793 
1794         commonQuestions += headers[2]+"\n";
1795 #if defined NON_WINDOWS
1796         cout << BOLDGREEN << headers[2]; cout << RESET << endl;
1797 #endif
1798 
1799         for (int i = 0; i < howtos.size(); i++) {
1800             commonQuestions += toString(i+1) + ". " + howtos[i]+"\n"+hanswers[i]+"\n";
1801 #if defined NON_WINDOWS
1802             cout << BOLDBLUE << toString(i+1)+". "+howtos[i]; cout << RESET << endl << hanswers[i] << endl;
1803 #endif
1804         }
1805 
1806         if (howtos.size() == 0) {
1807             commonQuestions += "Not sure how to do what you want? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1808 #if defined NON_WINDOWS
1809             cout << RESET "Not sure how to do what you want? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1810 #endif
1811 
1812         }
1813 
1814 #if defined NON_WINDOWS
1815         m->mothurOutJustToLog(commonQuestions);
1816 
1817         cout << BOLDMAGENTA << "\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki.\n"; cout << RESET << endl;
1818         m->mothurOutJustToLog("\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki.\n");
1819 #else
1820         m->mothurOut(commonQuestions + "\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki.\n");
1821 #endif
1822 
1823         return commonQuestions;
1824     }
1825     catch(exception& e) {
1826         m->errorOut(e, "Utils", "getFormattedHelp");
1827         exit(1);
1828     }
1829 }
1830 //**********************************************************************************************************************
removeNs(string seq)1831 string Utils::removeNs(string seq){
1832     try {
1833         string newSeq = "";
1834         for (int i = 0; i < seq.length(); i++) { if (seq[i] != 'N') {  newSeq += seq[i]; } }
1835         return newSeq;
1836     }
1837     catch(exception& e) {
1838         m->errorOut(e, "Utils", "removeNs");
1839         exit(1);
1840     }
1841 }
1842 /***********************************************************************/
getOTUNames(vector<string> & currentLabels,int numBins,string tagHeader)1843 int Utils::getOTUNames(vector<string>& currentLabels, int numBins, string tagHeader){
1844     try {
1845 
1846         if (currentLabels.size() == numBins) {  return 0; }
1847 
1848         int maxLabelNumber = 0;
1849         if (currentLabels.size() < numBins) {
1850             string snumBins = toString(numBins);
1851 
1852             for (int i = 0; i < numBins; i++) {
1853                 string binLabel = tagHeader;
1854                 if (i < currentLabels.size()) { //label exists
1855                     if (getLabelTag(currentLabels[i]) == tagHeader) { //adjust 0's??
1856                         string sbinNumber = getSimpleLabel(currentLabels[i]);
1857                         int tempBinNumber; mothurConvert(sbinNumber, tempBinNumber);
1858                         if (tempBinNumber > maxLabelNumber) { maxLabelNumber = tempBinNumber; }
1859                         if (sbinNumber.length() < snumBins.length()) {
1860                             int diff = snumBins.length() - sbinNumber.length();
1861                             for (int h = 0; h < diff; h++) { binLabel += "0"; }
1862                         }
1863                         binLabel += sbinNumber;
1864                         currentLabels[i] = binLabel;
1865                     }
1866                 }else{ //create new label
1867                     string sbinNumber = toString(maxLabelNumber+1); maxLabelNumber++;
1868                     if (sbinNumber.length() < snumBins.length()) {
1869                         int diff = snumBins.length() - sbinNumber.length();
1870                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
1871                     }
1872                     binLabel += sbinNumber;
1873                     currentLabels.push_back(binLabel);
1874                 }
1875             }
1876         }
1877         return currentLabels.size();
1878 
1879     }
1880     catch(exception& e) {
1881         m->errorOut(e, "Utils", "getOTUNames");
1882         exit(1);
1883     }
1884 }
1885 /**************************************************************************************/
getCombos(vector<string> & groupComb,vector<string> userGroups,int & numComp)1886 void Utils::getCombos(vector<string>& groupComb, vector<string> userGroups, int& numComp) { //groupcomb, Groups, numcomb
1887     try {
1888         sort(userGroups.begin(), userGroups.end());
1889 
1890         //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
1891         numComp = 0;
1892         for (int i=0; i< userGroups.size(); i++) {
1893             numComp += i;
1894             for (int l = 0; l < i; l++) {  //set group comparison labels
1895                 if (userGroups[i] > userGroups[l])  { groupComb.push_back(userGroups[l] + "-" + userGroups[i]);     }
1896                 else                                { groupComb.push_back(userGroups[i] + "-" + userGroups[l]);     }
1897             }
1898         }
1899     }
1900     catch(exception& e) {
1901         m->errorOut(e, "Utils", "getCombos");
1902         exit(1);
1903     }
1904 }
1905 /***********************************************************************/
dirCheckWritable(string & dirName)1906 bool Utils::dirCheckWritable(string& dirName){
1907     try {
1908 
1909         if (dirName == "") { return false; }
1910 
1911         //add / to name if needed
1912         string lastChar = dirName.substr(dirName.length()-1);
1913         if (lastChar != PATH_SEPARATOR) { dirName += PATH_SEPARATOR; }
1914 
1915         //test to make sure directory exists
1916         dirName = getFullPathName(dirName);
1917         string outTemp = dirName + "temp"+ toString(time(NULL));
1918         ofstream out;
1919         out.open(outTemp.c_str(), ios::trunc);
1920         if(!out) { m->mothurOut(dirName + " directory does not exist or is not writable.\n");  }
1921         else{ out.close(); mothurRemove(outTemp); return true; }
1922 
1923         return false;
1924     }
1925     catch(exception& e) {
1926         m->errorOut(e, "Utils", "dirCheckWritable");
1927         exit(1);
1928     }
1929 }
1930 /***********************************************************************/
dirCheckExists(string & dirName)1931 bool Utils::dirCheckExists(string& dirName){
1932     return (dirCheckExists(dirName, true));
1933 }
1934 /***********************************************************************/
dirCheckExists(string & dirName,bool reportError)1935 bool Utils::dirCheckExists(string& dirName, bool reportError){
1936     try {
1937 
1938         if (dirName == "") { return false; }
1939 
1940         //add / to name if needed
1941         string lastChar = dirName.substr(dirName.length()-1);
1942         if (lastChar != PATH_SEPARATOR) { dirName += PATH_SEPARATOR; }
1943 
1944         //test to make sure directory exists
1945         dirName = getFullPathName(dirName);
1946 
1947 #if defined USE_BOOST
1948 
1949         boost::filesystem::path p(dirName.c_str());
1950 
1951         if (exists(p))  { return true; }
1952         else { if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); } }
1953 
1954 #else
1955     #if defined NON_WINDOWS
1956 
1957         struct stat info;
1958 
1959         if(stat(dirName.c_str(), &info ) != 0 ) {
1960             if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); }
1961         }else if( info.st_mode & S_IFDIR ) { // S_ISDIR() doesn't exist on my windows
1962             return true;
1963         }else {
1964             if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); }
1965         }
1966 
1967     #else
1968         DWORD dwAttrib = GetFileAttributes(dirName.c_str());
1969 
1970          if (dwAttrib != INVALID_FILE_ATTRIBUTES &&
1971              (dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) { return true; }
1972          else { if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); } }
1973 
1974     #endif
1975 #endif
1976         return false;
1977     }
1978     catch(exception& e) {
1979         m->errorOut(e, "Utils", "dirCheckExists");
1980         exit(1);
1981     }
1982 }
1983 /***********************************************************************/
1984 //returns true if it exists or if we can make it
mkDir(string & dirName)1985 bool Utils::mkDir(string& dirName){
1986     try {
1987         bool dirExist = dirCheckExists(dirName, false);
1988         if (dirExist) { return true; }
1989 
1990 #ifdef USE_BOOST
1991 
1992         boost::filesystem::path dir(dirName.c_str());
1993         if(boost::filesystem::create_directories(dir)) {}
1994         else { return false; }
1995 
1996 #else
1997     #if defined NON_WINDOWS
1998 
1999         if ((mkdir(dirName.c_str(), S_IRWXU | S_IRWXG | S_IRWXO )) == 0) {}
2000         else { return false; }
2001 
2002     #else
2003 
2004         if (CreateDirectory(dirName.c_str(), NULL) ||
2005             ERROR_ALREADY_EXISTS == GetLastError()) { }
2006         else { return false; }
2007 
2008     #endif
2009 #endif
2010 
2011         if (dirCheckWritable(dirName)) { return true; }
2012 
2013         return false;
2014     }
2015     catch(exception& e) {
2016         m->errorOut(e, "Utils", "mkDir");
2017         exit(1);
2018     }
2019 }
2020 //***********************************************************************
parseClasses(string classes)2021 map<string, vector<string> > Utils::parseClasses(string classes){
2022     try {
2023         map<string, vector<string> > parts;
2024 
2025         //treatment<Early|Late>-age<young|old>
2026         vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
2027 
2028         for (int i = 0; i < pieces.size(); i++) {
2029             string category = ""; string value = "";
2030             bool foundOpen = false;
2031             for (int j = 0; j < pieces[i].length(); j++) {
2032                 if (m->getControl_pressed()) { return parts; }
2033 
2034                 if (pieces[i][j] == '<')        { foundOpen = true;         }
2035                 else if (pieces[i][j] == '>')   { j += pieces[i].length();  }
2036                 else {
2037                     if (!foundOpen) { category += pieces[i][j]; }
2038                     else { value += pieces[i][j]; }
2039                 }
2040             }
2041             vector<string> values; splitAtChar(value, values, '|');
2042             parts[category] = values;
2043         }
2044 
2045         return parts;
2046     }
2047     catch(exception& e) {
2048         m->errorOut(e, "Utils", "parseClasses");
2049         exit(1);
2050     }
2051 }
2052 /**************************************************************************************************/
2053 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
parseTax(string tax,vector<string> & scores)2054 vector<string> Utils::parseTax(string tax, vector<string>& scores) {
2055     try {
2056 
2057         string taxon;
2058         vector<string> taxs;
2059 
2060         while (tax.find_first_of(';') != -1) {
2061 
2062             if (m->getControl_pressed()) { return taxs; }
2063 
2064             //get taxon
2065             taxon = tax.substr(0,tax.find_first_of(';'));
2066 
2067             int pos = taxon.find_last_of('(');
2068             if (pos != -1) {
2069                 //is it a number?
2070                 int pos2 = taxon.find_last_of(')');
2071                 if (pos2 != -1) {
2072                     string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
2073                     if (isNumeric1(confidenceScore)) {
2074                         taxon = taxon.substr(0, pos); //rip off confidence
2075                         scores.push_back(confidenceScore);
2076                     }else{ scores.push_back("null"); }
2077                 }
2078             }else{ scores.push_back("null"); }
2079 
2080             //strip "" if they are there
2081             pos = taxon.find("\"");
2082             if (pos != string::npos) {
2083                 string newTax = "";
2084                 for (int k = 0; k < taxon.length(); k++) {
2085                     if (taxon[k] != '\"') { newTax += taxon[k]; }
2086                 }
2087                 taxon = newTax;
2088             }
2089 
2090             //look for bootstrap value
2091             taxs.push_back(taxon);
2092             tax = tax.substr(tax.find_first_of(';')+1, tax.length());
2093         }
2094 
2095         return taxs;
2096     }
2097     catch(exception& e) {
2098         m->errorOut(e, "Utils", "parseTax");
2099         exit(1);
2100     }
2101 }
2102 
2103 /***********************************************************************/
hasPath(string longName)2104 string Utils::hasPath(string longName){
2105     try {
2106         string path = "";
2107         size_t found;
2108         found=longName.find_last_of("~/\\");
2109 
2110         if(found != longName.npos){ path = longName.substr(0, found+1); }
2111 
2112         return path;
2113     }
2114     catch(exception& e) {
2115         m->errorOut(e, "Utils", "hasPath");
2116         exit(1);
2117     }
2118 }
2119 /***********************************************************************/
getCurrentDate(string & thisYear,string & thisMonth,string & thisDay)2120 void Utils::getCurrentDate(string& thisYear, string& thisMonth, string& thisDay){
2121     try {
2122         time_t rawtime;
2123         struct tm * timeinfo;
2124 
2125         time (&rawtime);
2126         timeinfo = localtime(&rawtime);
2127 
2128         char buffer[80];
2129         strftime(buffer,sizeof(buffer),"%Y",timeinfo);
2130         string year(buffer); thisYear = year;
2131 
2132         strftime(buffer,sizeof(buffer),"%m",timeinfo);
2133         string Month(buffer); thisMonth = Month;
2134 
2135         strftime(buffer,sizeof(buffer),"%d",timeinfo);
2136         string Day(buffer); thisDay = Day;
2137     }
2138     catch(exception& e) {
2139         m->errorOut(e, "Utils", "getCurrentDate");
2140         exit(1);
2141     }
2142 }
2143 /***********************************************************************/
isASCII(string input)2144 bool Utils::isASCII(string input){
2145     try {
2146 
2147         for (int i = 0; i < input.length(); i++) {
2148             if (isascii(input[i]) == 0) { return false; } //non ascii
2149         }
2150         return true;
2151     }
2152     catch(exception& e) {
2153         m->errorOut(e, "Utils", "isASCII");
2154         exit(1);
2155     }
2156 }
2157 /***********************************************************************/
getExtension(string longName)2158 string Utils::getExtension(string longName){
2159     try {
2160         string extension = "";
2161 
2162         if(longName.find_last_of('.') != longName.npos){
2163             int pos = longName.find_last_of('.');
2164             extension = longName.substr(pos, longName.length());
2165         }
2166 
2167         return extension;
2168     }
2169     catch(exception& e) {
2170         m->errorOut(e, "Utils", "getExtension");
2171         exit(1);
2172     }
2173 }
2174 /***********************************************************************/
mothurInitialPrep(string & defaultPath,string & tools,string & mothurVersion,string & releaseDate,string & OS)2175 bool Utils::mothurInitialPrep(string& defaultPath, string& tools, string& mothurVersion, string& releaseDate, string& OS){
2176     try {
2177 
2178         string lastChar = "";
2179         #ifdef MOTHUR_FILES
2180             defaultPath = MOTHUR_FILES;
2181             defaultPath = removeQuotes(defaultPath);
2182             //add / to name if needed
2183             lastChar = defaultPath.substr(defaultPath.length()-1);
2184             if (lastChar != PATH_SEPARATOR) { defaultPath += PATH_SEPARATOR; }
2185 
2186             defaultPath = getFullPathName(defaultPath);
2187         #else
2188             defaultPath = "";
2189         #endif
2190 
2191         #ifdef MOTHUR_TOOLS
2192             tools = MOTHUR_TOOLS;
2193             tools = removeQuotes(tools);
2194             //add / to name if needed
2195             lastChar = tools.substr(tools.length()-1);
2196             if (lastChar != PATH_SEPARATOR) { tools += PATH_SEPARATOR; }
2197 
2198             tools = getFullPathName(tools);
2199         #else
2200             tools = "";
2201         #endif
2202 
2203         #ifdef LOGFILE_NAME
2204             string logfilename = LOGFILE_NAME;
2205             logfilename = getFullPathName(logfilename);
2206 
2207             m->appendLogBuffer("Using Static Logfile " + logfilename +  "\n");
2208 
2209             m->setLogFileName(logfilename, false);
2210             m->mothurOut("\n");
2211         #endif
2212 
2213         releaseDate = "";
2214         #ifdef RELEASE_DATE
2215             releaseDate = RELEASE_DATE;
2216         #else
2217             string year, month, day;
2218             getCurrentDate(year, month, day);
2219             releaseDate = month + "/" + day + "/" + year;
2220         #endif
2221 
2222         mothurVersion = VERSION;
2223 
2224 
2225         //version
2226 #if defined NON_WINDOWS
2227 #if defined (__APPLE__) || (__MACH__)
2228         m->appendLogBuffer("Mac version\n\n");
2229 #else
2230         m->appendLogBuffer("Linux version\n\n");
2231 #endif
2232 #else
2233         m->appendLogBuffer("Windows version\n\n");
2234 #endif
2235 
2236         string packagesUsed = "";
2237 #ifdef USE_READLINE
2238         packagesUsed += "ReadLine,";
2239 #endif
2240 
2241 #ifdef USE_BOOST
2242         packagesUsed += "Boost,";
2243 #endif
2244 
2245 #ifdef USE_HDF5
2246         packagesUsed += "HDF5,";
2247 #endif
2248 
2249 #ifdef USE_GSL
2250         packagesUsed += "GSL,";
2251 #endif
2252 
2253         if (packagesUsed != "") {
2254             //remove last comma
2255             packagesUsed = packagesUsed.substr(0,packagesUsed.length()-1);
2256             m->appendLogBuffer("Using " + packagesUsed + "\n");
2257         }
2258 
2259         #ifdef MOTHUR_FILES
2260             m->appendLogBuffer("\nUsing default search path for mothur input files: " + defaultPath + "\n\n");
2261         #endif
2262 
2263         #ifdef MOTHUR_TOOLS
2264             m->appendLogBuffer("\nUsing mothur tools location: " + tools + "\n\n");
2265         #endif
2266 
2267         //header
2268         m->appendLogBuffer("mothur v." + mothurVersion + "\n");
2269         m->appendLogBuffer("Last updated: " + releaseDate + "\n");
2270         m->appendLogBuffer("by\n");
2271         m->appendLogBuffer("Patrick D. Schloss\n\n");
2272         m->appendLogBuffer("Department of Microbiology & Immunology\n\n");
2273         m->appendLogBuffer("University of Michigan\n");
2274         m->appendLogBuffer("http://www.mothur.org\n\n");
2275         m->appendLogBuffer("When using, please cite:\n");
2276         m->appendLogBuffer("Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.\n\n");
2277         m->appendLogBuffer("Distributed under the GNU General Public License\n\n");
2278         m->appendLogBuffer("Type 'help()' for information on the commands that are available\n\n");
2279         m->appendLogBuffer("For questions and analysis support, please visit our forum at https://forum.mothur.org\n\n");
2280         m->appendLogBuffer("Type 'quit()' to exit program\n\n");
2281 
2282         m->setRandomSeed(19760620);
2283         m->appendLogBuffer("[NOTE]: Setting random seed to 19760620.\n\n");
2284 
2285         OS = "";
2286         //version
2287         #if defined NON_WINDOWS
2288             #if defined (__APPLE__) || (__MACH__)
2289             OS = "Mac ";
2290             #else
2291             OS = "Linux ";
2292             #endif
2293         #else
2294             OS = "Windows ";
2295         #endif
2296 
2297         return true;
2298     }
2299     catch(exception& e) {
2300         m->errorOut(e, "Utils", "mothurInitialPrep");
2301         exit(1);
2302     }
2303 }
2304 /***********************************************************************/
2305 /***********************************************************************/
isBlank(string fileName)2306 bool Utils::isBlank(string fileName){
2307     try {
2308 
2309         fileName = getFullPathName(fileName);
2310 
2311         ifstream fileHandle;
2312         fileHandle.open(fileName.c_str());
2313         if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + fileName + "\n");  }
2314         else {  //check for blank file
2315             zapGremlins(fileHandle);
2316             gobble(fileHandle);
2317             if (fileHandle.eof()) { fileHandle.close(); return true;  }
2318             fileHandle.close();
2319         }
2320         return false;
2321     }
2322     catch(exception& e) {
2323         m->errorOut(e, "Utils", "isBlank");
2324         exit(1);
2325     }
2326 }
2327 /***********************************************************************/
stringBlank(string input)2328 bool Utils::stringBlank(string input){
2329     try {
2330         for (int i = 0; i < input.length(); i++) { if (!isspace(input[i])) { return false; } }
2331         return true;
2332     }
2333     catch(exception& e) {
2334         m->errorOut(e, "Utils", "isBlank");
2335         exit(1);
2336     }
2337 }
2338 /**************************************************************************************************/
setFilePosFasta(string filename,long long & num,char delim)2339 vector<double> Utils::setFilePosFasta(string filename, long long& num, char delim) {
2340     try {
2341         vector<double> positions;
2342         ifstream inFASTA;
2343         string completeFileName = getFullPathName(filename);
2344         //inFASTA.open(completeFileName.c_str(), ios::binary);
2345         openInputFileBinary(completeFileName, inFASTA);
2346         int nameLine = 2;
2347         if (delim == '@') { nameLine = 4; }
2348         else if (delim == '>') { nameLine = 2; }
2349         else { m->mothurOut("[ERROR]: unknown file deliminator, quitting.\n"); m->setControl_pressed(true); }
2350 
2351         double count = 0;
2352         long long numLines = 0;
2353         while(!inFASTA.eof()){
2354             char c = inFASTA.get(); count++;
2355             string input = ""; input += c;
2356             while ((c != '\n') && (c != '\r') && (c != '\f') && (c != EOF)) {
2357                 c = inFASTA.get(); count++;
2358                 input += c;
2359             }
2360             numLines++;
2361             //gobble
2362             while(isspace(c=inFASTA.get()))		{ input += c; count++;}
2363             if(!inFASTA.eof()) { inFASTA.putback(c); count--;  }
2364 
2365             if (input.length() != 0) {
2366                 if((input[0] == delim) && (((numLines-1)%nameLine) == 0)){ //this is a name line
2367                     positions.push_back(count+numLines-input.length());
2368                 }else if (int(c) == -1) { break; }
2369                 else { input = ""; }
2370             }
2371         }
2372         inFASTA.close();
2373 
2374         num = positions.size();
2375 
2376         FILE * pFile;
2377         double size;
2378 
2379         //get num bytes in file
2380         pFile = fopen (completeFileName.c_str(),"rb");
2381         if (pFile==NULL) perror ("Error opening file");
2382         else{
2383             fseek (pFile, 0, SEEK_END);
2384             size=ftell (pFile);
2385             fclose (pFile);
2386         }
2387 
2388         positions.push_back(size);
2389         positions[0] = 0;
2390 
2391         return positions;
2392     }
2393     catch(exception& e) {
2394         m->errorOut(e, "Utils", "setFilePosFasta");
2395         exit(1);
2396     }
2397 }
2398 /**************************************************************************************************/
setFilePosFasta(string filename,long long & num)2399 vector<double> Utils::setFilePosFasta(string filename, long long& num) {
2400     try {
2401         vector<double> positions;
2402         ifstream inFASTA;
2403         //openInputFileBinary(filename, inFASTA);
2404         string completeFileName = getFullPathName(filename);
2405         //inFASTA.open(completeFileName.c_str(), ios::binary);
2406         openInputFileBinary(completeFileName, inFASTA);
2407 
2408         string input;
2409         double count = 0;
2410         while(!inFASTA.eof()){
2411             char c = inFASTA.get(); count++;
2412             if (c == '>') { positions.push_back(count-1); }
2413         }
2414         inFASTA.close();
2415 
2416         num = positions.size();
2417 
2418         FILE * pFile;
2419         double size;
2420 
2421         //get num bytes in file
2422         pFile = fopen (completeFileName.c_str(),"rb");
2423         if (pFile==NULL) perror ("Error opening file");
2424         else{
2425             fseek (pFile, 0, SEEK_END);
2426             size=ftell (pFile);
2427             fclose (pFile);
2428         }
2429 
2430         positions.push_back(size);
2431         positions[0] = 0;
2432 
2433         return positions;
2434     }
2435     catch(exception& e) {
2436         m->errorOut(e, "Utils", "setFilePosFasta");
2437         exit(1);
2438     }
2439 }
2440 //**********************************************************************************************************************
readConsTax(string inputfile,PhyloTree & tree)2441 vector<Taxonomy> Utils::readConsTax(string inputfile, PhyloTree& tree){
2442     try {
2443         //read headers
2444         ifstream in; openInputFile(inputfile, in); getline(in);
2445 
2446         vector<Taxonomy> taxes;
2447         while (!in.eof()) {
2448 
2449             if (m->getControl_pressed()) { break; }
2450 
2451             Taxonomy thisTax(in);
2452             taxes.push_back(thisTax);
2453 
2454             tree.addSeqToTree(thisTax.getName(), thisTax.getTaxons());
2455         }
2456         in.close();
2457 
2458         return taxes;
2459     }
2460     catch(exception& e) {
2461         m->errorOut(e, "Utils", "readConsTax");
2462         exit(1);
2463     }
2464 }
2465 //**********************************************************************************************************************
readConsTax(string inputfile)2466 vector<consTax> Utils::readConsTax(string inputfile){
2467     try {
2468 
2469         vector<consTax> taxes;
2470 
2471         ifstream in;
2472         openInputFile(inputfile, in);
2473 
2474         //read headers
2475         getline(in);
2476 
2477         while (!in.eof()) {
2478 
2479             if (m->getControl_pressed()) { break; }
2480 
2481             string otu = ""; string tax = "unknown";
2482             int size = 0;
2483 
2484             in >> otu; gobble(in);
2485             in >> size; gobble(in);
2486             tax = getline(in); gobble(in);
2487 
2488             consTax temp(otu, tax, size);
2489             taxes.push_back(temp);
2490         }
2491         in.close();
2492 
2493         return taxes;
2494     }
2495     catch(exception& e) {
2496         m->errorOut(e, "Utils", "readConsTax");
2497         exit(1);
2498     }
2499 }
2500 //**********************************************************************************************************************
readConsTax(string inputfile,vector<Taxonomy> & conTax)2501 void Utils::readConsTax(string inputfile, vector<Taxonomy>& conTax){
2502     try {
2503         conTax.clear();
2504 
2505         ifstream in; openInputFile(inputfile, in);
2506         getline(in); //read headers
2507 
2508         while (!in.eof()) {
2509 
2510             if (m->getControl_pressed()) { break; }
2511 
2512             string otu = ""; string tax = "unknown";
2513             int size = 0;
2514 
2515             in >> otu; gobble(in);
2516             in >> size; gobble(in);
2517             tax = getline(in); gobble(in);
2518 
2519             Taxonomy temp(otu, tax, size);
2520             conTax.push_back(temp);
2521         }
2522         in.close();
2523 
2524         return;
2525     }
2526     catch(exception& e) {
2527         m->errorOut(e, "Utils", "readConsTax");
2528         exit(1);
2529     }
2530 }
2531 //**********************************************************************************************************************
readConsTax(string inputfile,map<int,consTax2> & taxes)2532 int Utils::readConsTax(string inputfile, map<int, consTax2>& taxes){
2533     try {
2534         ifstream in;
2535         openInputFile(inputfile, in);
2536 
2537         //read headers
2538         getline(in);
2539 
2540         while (!in.eof()) {
2541 
2542             if (m->getControl_pressed()) { break; }
2543 
2544             string otu = ""; string tax = "unknown";
2545             int size = 0;
2546 
2547             in >> otu; gobble(in);
2548             in >> size; gobble(in);
2549             tax = getline(in); gobble(in);
2550 
2551             consTax2 temp(otu, tax, size);
2552             string simpleBin = getSimpleLabel(otu);
2553             int bin;
2554             convert(simpleBin, bin);
2555             taxes[bin] = temp;
2556         }
2557         in.close();
2558 
2559         return 0;
2560     }
2561     catch(exception& e) {
2562         m->errorOut(e, "Utils", "readConsTax");
2563         exit(1);
2564     }
2565 }
2566 /**************************************************************************************************/
setFilePosEachLine(string filename,long long & num)2567 vector<double> Utils::setFilePosEachLine(string filename, long long& num) {
2568     try {
2569         filename = getFullPathName(filename);
2570 
2571         vector<double> positions;
2572         ifstream in;
2573         //openInputFile(filename, in);
2574         openInputFileBinary(filename, in);
2575 
2576         string input;
2577         unsigned long long count = 0;
2578         positions.push_back(0);
2579 
2580         while(!in.eof()){
2581             //getline counting reads
2582             char d = in.get(); count++;
2583             while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof()))	{
2584                 //get next character
2585                 d = in.get();
2586                 count++;
2587             }
2588 
2589             if (!in.eof()) {
2590                 d=in.get(); count++;
2591                 while(isspace(d) && (d != in.eof()))		{ d=in.get(); count++;}
2592             }
2593             positions.push_back(count-1);
2594 
2595         }
2596         in.close();
2597 
2598         num = positions.size()-1;
2599 
2600         FILE * pFile;
2601         double size = 0;
2602 
2603         //get num bytes in file
2604         pFile = fopen (filename.c_str(),"rb");
2605         if (pFile==NULL) perror ("Error opening file");
2606         else{
2607             fseek (pFile, 0, SEEK_END);
2608             size=ftell (pFile);
2609             fclose (pFile);
2610         }
2611 
2612         positions[(positions.size()-1)] = size;
2613 
2614         return positions;
2615     }
2616     catch(exception& e) {
2617         m->errorOut(e, "Utils", "setFilePosEachLine");
2618         exit(1);
2619     }
2620 }
2621 /**************************************************************************************************/
setFilePosEachLine(string filename,unsigned long long & num)2622 vector<double> Utils::setFilePosEachLine(string filename, unsigned long long& num) {
2623     try {
2624         filename = getFullPathName(filename);
2625 
2626         vector<double> positions;
2627         ifstream in;
2628         //openInputFile(filename, in);
2629         openInputFileBinary(filename, in);
2630 
2631         string input;
2632         unsigned long long count = 0;
2633         positions.push_back(0);
2634 
2635         while(!in.eof()){
2636             //getline counting reads
2637             char d = in.get(); count++;
2638             while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof()))	{
2639                 //get next character
2640                 d = in.get();
2641                 count++;
2642             }
2643 
2644             if (!in.eof()) {
2645                 d=in.get(); count++;
2646                 while(isspace(d) && (d != in.eof()))		{ d=in.get(); count++;}
2647             }
2648             positions.push_back(count-1);
2649         }
2650         in.close();
2651 
2652         num = positions.size()-1;
2653 
2654         FILE * pFile;
2655         double size = 0;
2656 
2657         //get num bytes in file
2658         pFile = fopen (filename.c_str(),"rb");
2659         if (pFile==NULL) perror ("Error opening file");
2660         else{
2661             fseek (pFile, 0, SEEK_END);
2662             size=ftell (pFile);
2663             fclose (pFile);
2664         }
2665 
2666         positions[(positions.size()-1)] = size;
2667 
2668         return positions;
2669     }
2670     catch(exception& e) {
2671         m->errorOut(e, "Utils", "setFilePosEachLine");
2672         exit(1);
2673     }
2674 }
2675 
2676 /**************************************************************************************************/
2677 
divideFile(string filename,int & proc)2678 vector<double> Utils::divideFile(string filename, int& proc) {
2679     try{
2680         vector<double> filePos;
2681         filePos.push_back(0);
2682 
2683         FILE * pFile;
2684         double size = 0;
2685 
2686         filename = getFullPathName(filename);
2687 
2688         //get num bytes in file
2689         pFile = fopen (filename.c_str(),"rb");
2690         if (pFile==NULL) perror ("Error opening file");
2691         else{
2692             fseek (pFile, 0, SEEK_END);
2693             size=ftell (pFile);
2694             fclose (pFile);
2695         }
2696 
2697         if (proc == 1) { filePos.push_back(size); return filePos; }
2698 
2699 #if defined NON_WINDOWS
2700 
2701         //estimate file breaks
2702         double chunkSize = 0;
2703         chunkSize = size / proc;
2704 
2705         //file to small to divide by processors
2706         if (chunkSize == 0)  {  proc = 1;	filePos.push_back(size); return filePos;	}
2707 
2708         if (proc > 1) {
2709             //for each process seekg to closest file break and search for next '>' char. make that the filebreak
2710             for (int i = 0; i < proc; i++) {
2711                 double spot = (i+1) * chunkSize;
2712 
2713                 ifstream in;
2714                 openInputFile(filename, in);
2715                 in.seekg(spot);
2716 
2717                 //look for next '>'
2718                 double newSpot = spot;
2719                 while (!in.eof()) {
2720                     char c = in.get();
2721 
2722                     if (c == '>') {   in.putback(c); newSpot = in.tellg(); break;  }
2723                     else if (int(c) == -1) { break; }
2724 
2725                 }
2726 
2727                 //there was not another sequence before the end of the file
2728                 double sanityPos = in.tellg();
2729 
2730                 if (isEqual(sanityPos, -1)) {	break;  }
2731                 else {  filePos.push_back(newSpot);  }
2732 
2733                 in.close();
2734             }
2735         }
2736         //save end pos
2737         filePos.push_back(size);
2738 
2739         //sanity check filePos
2740         for (int i = 0; i < (filePos.size()-1); i++) {
2741             if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
2742         }
2743 
2744         proc = (filePos.size() - 1);
2745 #else
2746         m->mothurOut("[ERROR]: Windows version should not be calling the divideFile function.\n");
2747         proc=1;
2748         filePos.push_back(size);
2749 #endif
2750         return filePos;
2751     }
2752     catch(exception& e) {
2753         m->errorOut(e, "Utils", "divideFile");
2754         exit(1);
2755     }
2756 }
2757 /**************************************************************************************************/
2758 
divideFile(string filename,int & proc,char delimChar)2759 vector<double> Utils::divideFile(string filename, int& proc, char delimChar) {
2760     try{
2761         vector<double> filePos;
2762         filePos.push_back(0);
2763 
2764         FILE * pFile;
2765         double size = 0;
2766 
2767         filename = getFullPathName(filename);
2768 
2769         //get num bytes in file
2770         pFile = fopen (filename.c_str(),"rb");
2771         if (pFile==NULL) perror ("Error opening file");
2772         else{
2773             fseek (pFile, 0, SEEK_END);
2774             size=ftell (pFile);
2775             fclose (pFile);
2776         }
2777 
2778         char secondaryDelim = '>';
2779         if (delimChar == '@') { secondaryDelim = '+'; }
2780 
2781         if (proc == 1) { filePos.push_back(size); return filePos; }
2782 
2783 #if defined NON_WINDOWS
2784 
2785         //estimate file breaks
2786         double chunkSize = 0;
2787         chunkSize = size / proc;
2788 
2789         //file to small to divide by processors
2790         if (chunkSize == 0)  {  proc = 1;	filePos.push_back(size); return filePos;	}
2791 
2792         //for each process seekg to closest file break and search for next delimChar char. make that the filebreak
2793         for (int i = 0; i < proc; i++) {
2794             double spot = (i+1) * chunkSize;
2795 
2796             ifstream in;
2797             openInputFile(filename, in);
2798             in.seekg(spot);
2799 
2800             getline(in); //get to end of line in case you jump into middle of line where the delim char happens to fall.
2801 
2802             //look for next delimChar
2803             double newSpot = spot;
2804             while (!in.eof()) {
2805                 char c = in.get();
2806                 string input = ""; input += c;
2807                 while ((c != '\n') && (c != '\r') && (c != '\f') && (c != EOF)) {
2808                     c = in.get();
2809                     input += c;
2810                 }
2811 
2812                 if (input.length() != 0) {
2813                     if(input[0] == delimChar){ //this is a potential name line
2814                         newSpot = in.tellg();
2815                         newSpot -=input.length();
2816                         //get two lines and look for secondary delim
2817                         //inf a fasta file this would be a new sequence, in fastq it will be the + line, if this was a nameline.
2818                         getline(in); gobble(in);
2819                         if (!in.eof()) {
2820                             string secondInput = getline(in); gobble(in);
2821                             if (secondInput[0] == secondaryDelim) { break; } //yes, it was a nameline so stop
2822                             else { input = ""; gobble(in); } //nope it was a delim at the beginning of a non nameline, keep looking.
2823                         }
2824                     }else if (int(c) == -1) { break; }
2825                     else {  input = ""; gobble(in); }
2826                 }
2827             }
2828 
2829             //there was not another sequence before the end of the file
2830             double sanityPos = in.tellg();
2831 
2832             if (isEqual(sanityPos, -1)) {	break;  }
2833             else {  filePos.push_back(newSpot);  }
2834 
2835             in.close();
2836         }
2837 
2838         //save end pos
2839         filePos.push_back(size);
2840 
2841         //sanity check filePos
2842         for (int i = 0; i < (filePos.size()-1); i++) {
2843             if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
2844         }
2845 
2846         proc = (filePos.size() - 1);
2847 #else
2848         m->mothurOut("[ERROR]: Windows version should not be calling the divideFile function.\n");
2849         proc=1;
2850         filePos.push_back(size);
2851 #endif
2852         return filePos;
2853     }
2854     catch(exception& e) {
2855         m->errorOut(e, "Utils", "divideFile");
2856         exit(1);
2857     }
2858 }
2859 
2860 /**************************************************************************************************/
2861 
divideFilePerLine(string filename,int & proc)2862 vector<double> Utils::divideFilePerLine(string filename, int& proc) {
2863     try{
2864         vector<double> filePos;
2865         filePos.push_back(0);
2866 
2867         FILE * pFile;
2868         double size = 0;
2869 
2870         filename = getFullPathName(filename);
2871 
2872         //get num bytes in file
2873         pFile = fopen (filename.c_str(),"rb");
2874         if (pFile==NULL) perror ("Error opening file");
2875         else{
2876             fseek (pFile, 0, SEEK_END);
2877             size=ftell (pFile);
2878             fclose (pFile);
2879         }
2880 
2881 #if defined NON_WINDOWS
2882         //estimate file breaks
2883         double chunkSize = 0;
2884         chunkSize = size / proc;
2885 
2886         //file to small to divide by processors
2887         if (chunkSize == 0)  {  proc = 1;	filePos.push_back(size); return filePos;	}
2888 
2889         //for each process seekg to closest file break and search for next '>' char. make that the filebreak
2890         for (int i = 0; i < proc; i++) {
2891             double spot = (i+1) * chunkSize;
2892 
2893             ifstream in;
2894             openInputFile(filename, in);
2895             in.seekg(spot);
2896 
2897             //look for next line break
2898             double newSpot = spot;
2899             while (!in.eof()) {
2900                 char c = in.get();
2901 
2902                 if ((c == '\n') || (c == '\r') || (c == '\f'))	{ gobble(in); newSpot = in.tellg(); break; }
2903                 else if (int(c) == -1) { break; }
2904             }
2905 
2906             //there was not another line before the end of the file
2907             double sanityPos = in.tellg();
2908 
2909             if (sanityPos == -1) {	break;  }
2910             else {  filePos.push_back(newSpot);  }
2911 
2912             in.close();
2913         }
2914 
2915         //save end pos
2916         filePos.push_back(size);
2917 
2918         //sanity check filePos
2919         for (int i = 0; i < (filePos.size()-1); i++) {
2920             if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
2921         }
2922 
2923         proc = (filePos.size() - 1);
2924 #else
2925         m->mothurOut("[ERROR]: Windows version should not be calling the divideFile function.\n");
2926         proc=1;
2927         filePos.push_back(size);
2928 #endif
2929         return filePos;
2930     }
2931     catch(exception& e) {
2932         m->errorOut(e, "Utils", "divideFile");
2933         exit(1);
2934     }
2935 }
2936 /**************************************************************************************************/
divideFile(string filename,int & proc,vector<string> & files)2937 int Utils::divideFile(string filename, int& proc, vector<string>& files) {
2938     try{
2939 
2940         vector<double> filePos = divideFile(filename, proc);
2941 
2942         for (int i = 0; i < (filePos.size()-1); i++) {
2943 
2944             //read file chunk
2945             ifstream in;
2946             openInputFile(filename, in);
2947             in.seekg(filePos[i]);
2948             unsigned long long size = filePos[(i+1)] - filePos[i];
2949             char* chunk = new char[size];
2950             in.read(chunk, size);
2951             in.close();
2952 
2953             //open new file
2954             string fileChunkName = filename + "." + toString(i) + ".tmp";
2955             ofstream out;
2956             openOutputFile(fileChunkName, out);
2957 
2958             out << chunk << endl;
2959             out.close();
2960             delete[] chunk;
2961 
2962             //save name
2963             files.push_back(fileChunkName);
2964         }
2965 
2966         return 0;
2967     }
2968     catch(exception& e) {
2969         m->errorOut(e, "Utils", "divideFile");
2970         exit(1);
2971     }
2972 }
2973 /***********************************************************************/
2974 
isTrue(string f)2975 bool Utils::isTrue(string f){
2976     try {
2977 
2978         for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
2979 
2980         if ((f == "TRUE") || (f == "T")) {	return true;	}
2981         else {	return false;  }
2982     }
2983     catch(exception& e) {
2984         m->errorOut(e, "Utils", "isTrue");
2985         exit(1);
2986     }
2987 }
2988 
2989 /***********************************************************************/
2990 
roundDist(float dist,int precision)2991 float Utils::roundDist(float dist, int precision){
2992     try {
2993         return int(dist * precision + 0.5)/float(precision);
2994     }
2995     catch(exception& e) {
2996         m->errorOut(e, "Utils", "roundDist");
2997         exit(1);
2998     }
2999 }
3000 /***********************************************************************/
3001 
ceilDist(float dist,int precision)3002 float Utils::ceilDist(float dist, int precision){
3003     try {
3004         return int(ceil(dist * precision))/float(precision);
3005     }
3006     catch(exception& e) {
3007         m->errorOut(e, "Utils", "ceilDist");
3008         exit(1);
3009     }
3010 }
3011 /***********************************************************************/
3012 
splitWhiteSpace(string & rest,char buffer[],int size)3013 vector<string> Utils::splitWhiteSpace(string& rest, char buffer[], int size){
3014     try {
3015         vector<string> pieces;
3016 
3017         for (int i = 0; i < size; i++) {
3018             if (!isspace(buffer[i]))  { rest += buffer[i];  }
3019             else {
3020                 if (rest != "") { pieces.push_back(rest);  rest = ""; }
3021                 while (i < size) {  //gobble white space
3022                     if (isspace(buffer[i])) { i++; }
3023                     else { rest = buffer[i];  break; }
3024                 }
3025             }
3026         }
3027 
3028         return pieces;
3029     }
3030     catch(exception& e) {
3031         m->errorOut(e, "Utils", "splitWhiteSpace");
3032         exit(1);
3033     }
3034 }
3035 /***********************************************************************/
trimWhiteSpace(string input)3036 string Utils::trimWhiteSpace(string input){
3037     try {
3038 
3039         int start, end; start = 0; end = input.length();
3040 
3041         //no spaces
3042         if (input.find_first_of(' ') == string::npos) { return input; }
3043 
3044         for (int i = 0; i < input.length(); i++) {
3045             if (input[i] != ' ') { start = i; break; }
3046         }
3047 
3048         end = start;
3049         for (int i = input.length()-1; i > start; i--) {
3050             if (input[i] != ' ') { end = i; break; }
3051         }
3052 
3053         string trimmed = input.substr(start, end-start+1);
3054 
3055         return trimmed;
3056     }
3057     catch(exception& e) {
3058         m->errorOut(e, "Utils", "trimWhiteSpace");
3059         exit(1);
3060     }
3061 }
3062 /***********************************************************************/
splitWhiteSpace(string input)3063 vector<string> Utils::splitWhiteSpace(string input){
3064     try {
3065         vector<string> pieces;
3066         string rest = "";
3067 
3068         for (int i = 0; i < input.length(); i++) {
3069             if (!isspace(input[i]))  { rest += input[i];  }
3070             else {
3071                 if (rest != "") { pieces.push_back(rest);  rest = ""; }
3072                 while (i < input.length()) {  //gobble white space
3073                     if (isspace(input[i])) { i++; }
3074                     else { rest = input[i];  break; }
3075                 }
3076             }
3077         }
3078 
3079         if (rest != "") { pieces.push_back(rest); }
3080 
3081         return pieces;
3082     }
3083     catch(exception& e) {
3084         m->errorOut(e, "Utils", "splitWhiteSpace");
3085         exit(1);
3086     }
3087 }
3088 /***********************************************************************/
splitWhiteSpace(string input,vector<float> & pieces,int index)3089 int Utils::splitWhiteSpace(string input, vector<float>& pieces, int index){
3090     try {
3091         pieces.clear();
3092         string rest = "";
3093         int count = 0;
3094 
3095         for (int i = 0; i < input.length(); i++) {
3096             if (!isspace(input[i]))  { rest += input[i];  }
3097             else {
3098                 if (rest != "") { float tdist; mothurConvert(rest, tdist); pieces.push_back(tdist); count++; rest = ""; }
3099                 while (i < input.length()) {  //gobble white space
3100                     if (isspace(input[i])) { i++; }
3101                     else { rest = input[i];  break; }
3102                 }
3103                 if (count > index) { return 0; }
3104             }
3105         }
3106 
3107         if (rest != "") { float tdist; mothurConvert(rest, tdist); count++; pieces.push_back(tdist); }
3108 
3109         return 0;
3110     }
3111     catch(exception& e) {
3112         m->errorOut(e, "Utils", "splitWhiteSpace");
3113         exit(1);
3114     }
3115 }
3116 /***********************************************************************/
splitWhiteSpaceWithQuotes(string input)3117 vector<string> Utils::splitWhiteSpaceWithQuotes(string input){
3118     try {
3119         vector<string> pieces;
3120         string rest = "";
3121 
3122         int pos = input.find('\'');
3123         int pos2 = input.find('\"');
3124 
3125         if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
3126         else {
3127             for (int i = 0; i < input.length(); i++) {
3128 
3129                 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
3130                     rest += input[i];
3131                     for (int j = i+1; j < input.length(); j++) {
3132                         if ((input[j] == '\'') || (input[j] == '\"')) {  //then quit
3133                             rest += input[j];
3134                             i = j;
3135                             j+=input.length();
3136                         }else { rest += input[j]; }
3137                     }
3138                 }else if (!isspace(input[i]))  { rest += input[i];  }
3139                 else {
3140                     if (rest != "") { pieces.push_back(rest);  rest = ""; }
3141                     while (i < input.length()) {  //gobble white space
3142                         if (isspace(input[i])) { i++; }
3143                         else { rest = input[i];  break; }
3144                     }
3145                 }
3146             }
3147 
3148             if (rest != "") { pieces.push_back(rest); }
3149         }
3150         return pieces;
3151     }
3152     catch(exception& e) {
3153         m->errorOut(e, "Utils", "splitWhiteSpace");
3154         exit(1);
3155     }
3156 }
3157 //**********************************************************************************************************************
readTax(string taxfile,map<string,string> & taxMap,bool removeConfidence)3158 int Utils::readTax(string taxfile, map<string, string>& taxMap, bool removeConfidence) {
3159     try {
3160         //open input file
3161         ifstream in;
3162         openInputFile(taxfile, in);
3163 
3164         bool error = false;
3165         string name, taxonomy;
3166 
3167         while (!in.eof()) {
3168             if (m->getControl_pressed()) { break; }
3169 
3170             in >> name; gobble(in);
3171             taxonomy = getline(in); gobble(in);
3172 
3173             checkName(name);
3174 
3175             //are there confidence scores, if so remove them
3176             if (removeConfidence) {  if (taxonomy.find_first_of('(') != -1) {  removeConfidences(taxonomy);	} }
3177             map<string, string>::iterator itTax = taxMap.find(name);
3178 
3179             if(itTax == taxMap.end()) {
3180                 bool ignore = false;
3181                 if (taxonomy != "") { if (taxonomy[taxonomy.length()-1] != ';') { m->mothurOut("[ERROR]: " + name + " is missing the final ';', ignoring.\n"); ignore=true; }
3182                 }
3183                 if (!ignore) { taxMap[name] = taxonomy; }
3184             }else { m->mothurOut("[ERROR]: " + name + " is already in your taxonomy file, names must be unique.\n"); error = true; }
3185         }
3186         in.close();
3187 
3188         if (error) { m->setControl_pressed(true); }
3189 
3190         return taxMap.size();
3191 
3192     }
3193     catch(exception& e) {
3194         m->errorOut(e, "Utils", "readTax");
3195         exit(1);
3196     }
3197 }
3198 /**********************************************************************************************************************/
3199 //nameMap is filled with redundant names mapped to unique name
readNames(string namefile,map<string,string> & nameMap,bool redund)3200 int Utils::readNames(string namefile, map<string, string>& nameMap, bool redund) {
3201     try {
3202         //open input file
3203         ifstream in;
3204         openInputFile(namefile, in);
3205 
3206         string rest = "";
3207         char buffer[4096];
3208         bool pairDone = false;
3209         bool columnOne = true;
3210         string firstCol, secondCol;
3211 
3212         while (!in.eof()) {
3213             if (m->getControl_pressed()) { break; }
3214 
3215             in.read(buffer, 4096);
3216             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3217 
3218             for (int i = 0; i < pieces.size(); i++) {
3219                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3220                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3221 
3222                 if (pairDone) {
3223                     checkName(firstCol);
3224                     checkName(secondCol);
3225 
3226                     //parse names into vector
3227                     vector<string> theseNames;
3228                     splitAtComma(secondCol, theseNames);
3229                     for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = firstCol;  }
3230                     pairDone = false;
3231                 }
3232             }
3233         }
3234         in.close();
3235 
3236         if (rest != "") {
3237             vector<string> pieces = splitWhiteSpace(rest);
3238 
3239             for (int i = 0; i < pieces.size(); i++) {
3240                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3241                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3242 
3243                 if (pairDone) {
3244                     checkName(firstCol);
3245                     checkName(secondCol);
3246 
3247                     //parse names into vector
3248                     vector<string> theseNames;
3249                     splitAtComma(secondCol, theseNames);
3250                     for (int i = 0; i < theseNames.size(); i++) {   nameMap[theseNames[i]] = firstCol;  }
3251                     pairDone = false;
3252                 }
3253             }
3254         }
3255 
3256         return nameMap.size();
3257 
3258     }
3259     catch(exception& e) {
3260         m->errorOut(e, "Utils", "readNames");
3261         exit(1);
3262     }
3263 }
3264 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap,int flip)3265 int Utils::readNames(string namefile, map<string, string>& nameMap, int flip) {
3266     try {
3267         //open input file
3268         ifstream in;
3269         openInputFile(namefile, in);
3270 
3271         string rest = "";
3272         char buffer[4096];
3273         bool pairDone = false;
3274         bool columnOne = true;
3275         string firstCol, secondCol;
3276 
3277         while (!in.eof()) {
3278             if (m->getControl_pressed()) { break; }
3279 
3280             in.read(buffer, 4096);
3281             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3282 
3283             for (int i = 0; i < pieces.size(); i++) {
3284                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3285                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3286 
3287                 if (pairDone) {
3288                     checkName(firstCol);
3289                     checkName(secondCol);
3290                     nameMap[secondCol] = firstCol;
3291                     pairDone = false;
3292                 }
3293             }
3294         }
3295         in.close();
3296 
3297         if (rest != "") {
3298             vector<string> pieces = splitWhiteSpace(rest);
3299 
3300             for (int i = 0; i < pieces.size(); i++) {
3301                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3302                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3303 
3304                 if (pairDone) {
3305                     checkName(firstCol);
3306                     checkName(secondCol);
3307                     nameMap[secondCol] = firstCol;
3308                     pairDone = false;
3309                 }
3310             }
3311         }
3312 
3313         return nameMap.size();
3314 
3315     }
3316     catch(exception& e) {
3317         m->errorOut(e, "Utils", "readNames");
3318         exit(1);
3319     }
3320 }
3321 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap,map<string,int> & nameCount)3322 int Utils::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
3323     try {
3324         nameMap.clear(); nameCount.clear();
3325         //open input file
3326         ifstream in;
3327         openInputFile(namefile, in);
3328 
3329         string rest = "";
3330         char buffer[4096];
3331         bool pairDone = false;
3332         bool columnOne = true;
3333         string firstCol, secondCol;
3334 
3335         while (!in.eof()) {
3336             if (m->getControl_pressed()) { break; }
3337 
3338             in.read(buffer, 4096);
3339             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3340 
3341             for (int i = 0; i < pieces.size(); i++) {
3342                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3343                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3344 
3345                 if (pairDone) {
3346                     checkName(firstCol);
3347                     checkName(secondCol);
3348                     //parse names into vector
3349                     vector<string> theseNames;
3350                     splitAtComma(secondCol, theseNames);
3351                     for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = firstCol;  }
3352                     nameCount[firstCol] = theseNames.size();
3353                     pairDone = false;
3354                 }
3355             }
3356         }
3357         in.close();
3358 
3359         if (rest != "") {
3360             vector<string> pieces = splitWhiteSpace(rest);
3361 
3362             for (int i = 0; i < pieces.size(); i++) {
3363                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3364                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3365 
3366                 if (pairDone) {
3367                     checkName(firstCol);
3368                     checkName(secondCol);
3369                     //parse names into vector
3370                     vector<string> theseNames;
3371                     splitAtComma(secondCol, theseNames);
3372                     for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = firstCol;  }
3373                     nameCount[firstCol] = theseNames.size();
3374                     pairDone = false;
3375                 }
3376             }
3377 
3378         }
3379         return nameMap.size();
3380 
3381     }
3382     catch(exception& e) {
3383         m->errorOut(e, "Utils", "readNames");
3384         exit(1);
3385     }
3386 }
3387 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap)3388 int Utils::readNames(string namefile, map<string, string>& nameMap) {
3389     try {
3390         //open input file
3391         ifstream in;
3392         openInputFile(namefile, in);
3393 
3394         string rest = "";
3395         char buffer[4096];
3396         bool pairDone = false;
3397         bool columnOne = true;
3398         string firstCol, secondCol;
3399 
3400         while (!in.eof()) {
3401             if (m->getControl_pressed()) { break; }
3402 
3403             in.read(buffer, 4096);
3404             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3405 
3406             for (int i = 0; i < pieces.size(); i++) {
3407                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3408                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3409 
3410                 if (pairDone) {
3411                     checkName(firstCol);
3412                     checkName(secondCol);
3413                     nameMap[firstCol] = secondCol; pairDone = false; }
3414             }
3415         }
3416         in.close();
3417 
3418         if (rest != "") {
3419             vector<string> pieces = splitWhiteSpace(rest);
3420 
3421             for (int i = 0; i < pieces.size(); i++) {
3422                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3423                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3424 
3425                 if (pairDone) {
3426                     checkName(firstCol);
3427                     checkName(secondCol);
3428                     nameMap[firstCol] = secondCol; pairDone = false; }
3429             }
3430         }
3431 
3432         return nameMap.size();
3433 
3434     }
3435     catch(exception& e) {
3436         m->errorOut(e, "Utils", "readNames");
3437         exit(1);
3438     }
3439 }
3440 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap,set<string> & namesToInclude)3441 int Utils::readNames(string namefile, map<string, string>& nameMap, set<string>& namesToInclude) {
3442     try {
3443         //open input file
3444         ifstream in;
3445         openInputFile(namefile, in);
3446 
3447         string firstCol, secondCol;
3448 
3449         while (!in.eof()) {
3450             if (m->getControl_pressed()) { break; }
3451 
3452             in >> firstCol; gobble(in);
3453             in >> secondCol; gobble(in);
3454 
3455             checkName(firstCol);
3456             checkName(secondCol);
3457 
3458             vector<string> secondNames; splitAtComma(secondCol, secondNames);
3459 
3460             secondCol = ""; firstCol = "";
3461 
3462             for (int i = 0; i < secondNames.size(); i++) {
3463                 if (namesToInclude.count(secondNames[i]) != 0) { //we want to include you
3464                     secondCol += secondNames[i] + ",";
3465                     if (firstCol == "") {   firstCol = secondNames[i]; }
3466                 }
3467             }
3468 
3469             if (secondCol != "") {
3470                 //remove last comma
3471                 secondCol = secondCol.substr(0,secondCol.length()-1);
3472 
3473                 nameMap[firstCol] = secondCol;
3474             }
3475 
3476         }
3477         in.close();
3478 
3479 
3480         return nameMap.size();
3481 
3482     }
3483     catch(exception& e) {
3484         m->errorOut(e, "Utils", "readNames");
3485         exit(1);
3486     }
3487 }
3488 
3489 /**********************************************************************************************************************/
readNames(string namefile,map<string,vector<string>> & nameMap)3490 int Utils::readNames(string namefile, map<string, vector<string> >& nameMap) {
3491     try {
3492         //open input file
3493         ifstream in;
3494         openInputFile(namefile, in);
3495 
3496         string rest = "";
3497         char buffer[4096];
3498         bool pairDone = false;
3499         bool columnOne = true;
3500         string firstCol, secondCol;
3501 
3502         while (!in.eof()) {
3503             if (m->getControl_pressed()) { break; }
3504 
3505             in.read(buffer, 4096);
3506             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3507 
3508             for (int i = 0; i < pieces.size(); i++) {
3509                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3510                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3511 
3512                 if (pairDone) {
3513                     checkName(firstCol);
3514                     checkName(secondCol);
3515                     vector<string> temp;
3516                     splitAtComma(secondCol, temp);
3517                     nameMap[firstCol] = temp;
3518                     pairDone = false;
3519                 }
3520             }
3521         }
3522         in.close();
3523 
3524         if (rest != "") {
3525             vector<string> pieces = splitWhiteSpace(rest);
3526 
3527             for (int i = 0; i < pieces.size(); i++) {
3528                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3529                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3530 
3531                 if (pairDone) {
3532                     checkName(firstCol);
3533                     checkName(secondCol);
3534                     vector<string> temp;
3535                     splitAtComma(secondCol, temp);
3536                     nameMap[firstCol] = temp;
3537                     pairDone = false;
3538                 }
3539             }
3540         }
3541 
3542         return nameMap.size();
3543     }
3544     catch(exception& e) {
3545         m->errorOut(e, "Utils", "readNames");
3546         exit(1);
3547     }
3548 }
3549 /**********************************************************************************************************************/
readNames(string namefile)3550 map<string, int> Utils::readNames(string namefile) {
3551     try {
3552         map<string, int> nameMap;
3553 
3554         //open input file
3555         ifstream in;
3556         openInputFile(namefile, in);
3557 
3558 
3559         string firstCol, secondCol;
3560 
3561         while (!in.eof()) {
3562             if (m->getControl_pressed()) { break; }
3563 
3564             in >> firstCol; gobble(in);
3565             in >> secondCol; gobble(in);
3566 
3567             checkName(firstCol);
3568             checkName(secondCol);
3569             int num = getNumNames(secondCol);
3570             nameMap[firstCol] = num;
3571         }
3572         in.close();
3573 
3574         return nameMap;
3575 
3576     }
3577     catch(exception& e) {
3578         m->errorOut(e, "Utils", "readNames");
3579         exit(1);
3580     }
3581 }
3582 /**********************************************************************************************************************/
scanNames(string namefile)3583 int Utils::scanNames(string namefile) {
3584     try {
3585 
3586         //open input file
3587         ifstream in;
3588         openInputFile(namefile, in);
3589 
3590         int total = 0;
3591         string firstCol, secondCol;
3592 
3593         while (!in.eof()) {
3594             if (m->getControl_pressed()) { break; }
3595 
3596             in >> firstCol; gobble(in);
3597             in >> secondCol; gobble(in);
3598 
3599             total += getNumNames(secondCol);
3600         }
3601         in.close();
3602 
3603         return total;
3604 
3605     }
3606     catch(exception& e) {
3607         m->errorOut(e, "Utils", "scanNames");
3608         exit(1);
3609     }
3610 }
3611 /**********************************************************************************************************************/
readNames(string namefile,map<string,long long> & nameMap)3612 void Utils::readNames(string namefile, map<string, long long>& nameMap) {
3613     try {
3614         //open input file
3615         ifstream in; openInputFile(namefile, in);
3616 
3617         string rest = "";
3618         char buffer[4096];
3619         bool pairDone = false;
3620         bool columnOne = true;
3621         string firstCol, secondCol;
3622 
3623         while (!in.eof()) {
3624             if (m->getControl_pressed()) { break; }
3625 
3626             in.read(buffer, 4096);
3627             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3628 
3629             for (int i = 0; i < pieces.size(); i++) {
3630                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3631                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3632 
3633                 if (pairDone) {
3634                     checkName(firstCol);
3635                     checkName(secondCol);
3636                     long long num = getNumNames(secondCol);
3637                     nameMap[firstCol] = num;
3638                     pairDone = false;
3639                 }
3640             }
3641         }
3642         in.close();
3643 
3644         if (rest != "") {
3645             vector<string> pieces = splitWhiteSpace(rest);
3646             for (int i = 0; i < pieces.size(); i++) {
3647                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3648                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3649 
3650                 if (pairDone) {
3651                     checkName(firstCol);
3652                     checkName(secondCol);
3653                     long long num = getNumNames(secondCol);
3654                     nameMap[firstCol] = num;
3655                     pairDone = false;
3656                 }
3657             }
3658         }
3659     }
3660     catch(exception& e) {
3661         m->errorOut(e, "Utils", "readNames");
3662         exit(1);
3663     }
3664 }
3665 
3666 /**********************************************************************************************************************/
readNames(string namefile,unsigned long int & numSeqs)3667 map<string, int> Utils::readNames(string namefile, unsigned long int& numSeqs) {
3668     try {
3669         map<string, int> nameMap;
3670         numSeqs = 0;
3671 
3672         //open input file
3673         ifstream in;
3674         openInputFile(namefile, in);
3675 
3676         string rest = "";
3677         char buffer[4096];
3678         bool pairDone = false;
3679         bool columnOne = true;
3680         string firstCol, secondCol;
3681 
3682         while (!in.eof()) {
3683             if (m->getControl_pressed()) { break; }
3684 
3685             in.read(buffer, 4096);
3686             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3687 
3688             for (int i = 0; i < pieces.size(); i++) {
3689                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3690                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3691 
3692                 if (pairDone) {
3693                     checkName(firstCol);
3694                     checkName(secondCol);
3695                     int num = getNumNames(secondCol);
3696                     nameMap[firstCol] = num;
3697                     pairDone = false;
3698                     numSeqs += num;
3699                 }
3700             }
3701         }
3702         in.close();
3703 
3704         if (rest != "") {
3705             vector<string> pieces = splitWhiteSpace(rest);
3706             for (int i = 0; i < pieces.size(); i++) {
3707                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3708                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3709 
3710                 if (pairDone) {
3711                     checkName(firstCol);
3712                     checkName(secondCol);
3713                     int num = getNumNames(secondCol);
3714                     nameMap[firstCol] = num;
3715                     pairDone = false;
3716                     numSeqs += num;
3717                 }
3718             }
3719         }
3720 
3721         return nameMap;
3722 
3723     }
3724     catch(exception& e) {
3725         m->errorOut(e, "Utils", "readNames");
3726         exit(1);
3727     }
3728 }
3729 //**********************************************************************************************************************
printVsearchFile(vector<seqPriorityNode> & nameMapCount,string filename,string tag,string tag2)3730 int Utils::printVsearchFile(vector<seqPriorityNode>& nameMapCount, string filename, string tag, string tag2){
3731     try {
3732 
3733         sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
3734 
3735         ofstream out;
3736         openOutputFile(filename, out);
3737 
3738         //print new file in order of
3739         for (int i = 0; i < nameMapCount.size(); i++) {
3740             if (m->getControl_pressed()) {break;}
3741             out << ">" << nameMapCount[i].name  << tag << nameMapCount[i].numIdentical << tag2 << endl << nameMapCount[i].seq << endl;
3742         }
3743         out.close();
3744 
3745         return 0;
3746     }
3747     catch(exception& e) {
3748         m->errorOut(e, "Utils", "printVsearchFile");
3749         exit(1);
3750     }
3751 }
3752 /************************************************************/
checkName(string & name)3753 int Utils::checkName(string& name) {
3754     try {
3755         if (modifyNames) {
3756             for (int i = 0; i < name.length(); i++) {
3757                 if (name[i] == ':') { name[i] = '_'; m->setChangedSeqNames(true); }
3758             }
3759         }
3760         return 0;
3761     }
3762     catch(exception& e) {
3763         m->errorOut(e, "Utils", "checkName");
3764         exit(1);
3765     }
3766 }
3767 /************************************************************/
checkGroupName(string name)3768 bool Utils::checkGroupName(string name) {
3769     try {
3770 
3771         bool goodName = true;
3772         for (int i = 0; i < name.length(); i++) {
3773             if (name[i] == ':') {  goodName = false; break;  }
3774             else if (name[i] == '-') {  goodName = false; break;  }
3775             else if (name[i] == '/') {  goodName = false; break;  }
3776         }
3777 
3778         if (!goodName) {
3779             m->mothurOut("\n[WARNING]: group " + name + " contains illegal characters in the name. Group names should not include :, -, or / characters.  The ':' character is a special character used in trees. Using ':' will result in your tree being unreadable by tree reading software.  The '-' character is a special character used by mothur to parse group names.  Using the '-' character will prevent you from selecting groups. The '/' character will created unreadable filenames when mothur includes the group in an output filename.\n\n");
3780         }
3781 
3782         return goodName;
3783     }
3784     catch(exception& e) {
3785         m->errorOut(e, "Utils", "checkGroupName");
3786         exit(1);
3787     }
3788 }
3789 /**********************************************************************************************************************/
readNames(string namefile,vector<seqPriorityNode> & nameVector,map<string,string> & fastamap)3790 int Utils::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
3791     try {
3792         int error = 0;
3793 
3794         //open input file
3795         ifstream in;
3796         openInputFile(namefile, in);
3797 
3798         string rest = "";
3799         char buffer[4096];
3800         bool pairDone = false;
3801         bool columnOne = true;
3802         string firstCol, secondCol;
3803 
3804         while (!in.eof()) {
3805             if (m->getControl_pressed()) { break; }
3806 
3807             in.read(buffer, 4096);
3808             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3809 
3810             for (int i = 0; i < pieces.size(); i++) {
3811                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3812                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3813 
3814                 if (pairDone) {
3815                     checkName(firstCol);
3816                     checkName(secondCol);
3817                     int num = getNumNames(secondCol);
3818 
3819                     map<string, string>::iterator it = fastamap.find(firstCol);
3820                     if (it == fastamap.end()) {
3821                         error = 1;
3822                         m->mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct.\n");
3823                     }else {
3824                         seqPriorityNode temp(num, it->second, firstCol);
3825                         nameVector.push_back(temp);
3826                     }
3827 
3828                     pairDone = false;
3829                 }
3830             }
3831         }
3832         in.close();
3833 
3834         if (rest != "") {
3835             vector<string> pieces = splitWhiteSpace(rest);
3836 
3837             for (int i = 0; i < pieces.size(); i++) {
3838                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
3839                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3840 
3841                 if (pairDone) {
3842                     checkName(firstCol);
3843                     checkName(secondCol);
3844                     int num = getNumNames(secondCol);
3845 
3846                     map<string, string>::iterator it = fastamap.find(firstCol);
3847                     if (it == fastamap.end()) {
3848                         error = 1;
3849                         m->mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct.\n");
3850                     }else {
3851                         seqPriorityNode temp(num, it->second, firstCol);
3852                         nameVector.push_back(temp);
3853                     }
3854 
3855                     pairDone = false;
3856                 }
3857             }
3858         }
3859         return error;
3860     }
3861     catch(exception& e) {
3862         m->errorOut(e, "Utils", "readNames");
3863         exit(1);
3864     }
3865 }
3866 //**********************************************************************************************************************
readAccnos(string accnosfile)3867 set<string> Utils::readAccnos(string accnosfile){
3868     try {
3869         set<string> names;
3870         ifstream in;
3871         bool ableToOpen = openInputFile(accnosfile, in, "");
3872         if (!ableToOpen) {  m->mothurOut("[ERROR]: Could not open " + accnosfile + "\n"); return names; }
3873         string name;
3874 
3875         while (!in.eof()) {
3876             if (m->getControl_pressed()) { break; }
3877 
3878             in >> name; gobble(in);
3879 
3880             checkName(name);
3881             names.insert(name);
3882         }
3883         in.close();
3884 
3885         return names;
3886     }
3887     catch(exception& e) {
3888         m->errorOut(e, "Utils", "readAccnos");
3889         exit(1);
3890     }
3891 }
3892 //**********************************************************************************************************************
printAccnos(string accnosfile,vector<string> & names)3893 void Utils::printAccnos(string accnosfile, vector<string>& names){
3894     try {
3895         ofstream out; openOutputFile(accnosfile, out);
3896 
3897         //output to .accnos file
3898         for (int i = 0; i < names.size(); i++) {
3899 
3900             if (m->getControl_pressed()) { break; }
3901 
3902             out << names[i] << endl;
3903         }
3904         out.close();
3905     }
3906     catch(exception& e) {
3907         m->errorOut(e, "Utils", "printAccnos");
3908         exit(1);
3909     }
3910 }
3911 //**********************************************************************************************************************
printAccnos(string accnosfile,set<string> & names)3912 void Utils::printAccnos(string accnosfile, set<string>& names){
3913     try {
3914         ofstream out; openOutputFile(accnosfile, out);
3915 
3916         //output to .accnos file
3917         for (set<string>::iterator it = names.begin(); it != names.end(); it++) {
3918 
3919             if (m->getControl_pressed()) { break; }
3920 
3921             out << *it << endl;
3922         }
3923         out.close();
3924     }
3925     catch(exception& e) {
3926         m->errorOut(e, "Utils", "printAccnos");
3927         exit(1);
3928     }
3929 }
3930 //**********************************************************************************************************************
readAccnos(string accnosfile,vector<string> & names)3931 int Utils::readAccnos(string accnosfile, vector<string>& names){
3932     try {
3933         names.clear();
3934         ifstream in;
3935         openInputFile(accnosfile, in);
3936         string name;
3937 
3938         string rest = "";
3939         char buffer[4096];
3940 
3941         while (!in.eof()) {
3942             if (m->getControl_pressed()) { break; }
3943 
3944             in.read(buffer, 4096);
3945             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3946 
3947             for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
3948         }
3949         in.close();
3950 
3951         if (rest != "") {
3952             vector<string> pieces = splitWhiteSpace(rest);
3953             for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
3954         }
3955 
3956         return 0;
3957     }
3958     catch(exception& e) {
3959         m->errorOut(e, "Utils", "readAccnos");
3960         exit(1);
3961     }
3962 }
3963 //**********************************************************************************************************************
readAccnos(string accnosfile,vector<string> & names,string noerror)3964 int Utils::readAccnos(string accnosfile, vector<string>& names, string noerror){
3965     try {
3966         names.clear();
3967         ifstream in;
3968         openInputFile(accnosfile, in, noerror);
3969         string name;
3970 
3971         while (!in.eof()) {
3972             if (m->getControl_pressed()) { break; }
3973 
3974             string line = trimWhiteSpace(getline(in));
3975             checkName(line);
3976             if (line != "") { names.push_back(line); }
3977         }
3978         in.close();
3979 
3980         return 0;
3981     }
3982     catch(exception& e) {
3983         m->errorOut(e, "Utils", "readAccnos");
3984         exit(1);
3985     }
3986 }
3987 /***********************************************************************/
3988 
getNumNames(string names)3989 int Utils::getNumNames(string names){
3990     try {
3991         int count = 0;
3992 
3993         if(names != ""){
3994             count = 1;
3995             for(int i=0;i<names.size();i++){
3996                 if(names[i] == ','){
3997                     count++;
3998                 }
3999             }
4000         }
4001 
4002         return count;
4003     }
4004     catch(exception& e) {
4005         m->errorOut(e, "Utils", "getNumNames");
4006         exit(1);
4007     }
4008 }
4009 /***********************************************************************/
4010 
getNumChar(string line,char c)4011 int Utils::getNumChar(string line, char c){
4012     try {
4013         int count = 0;
4014 
4015         if(line != ""){
4016             for(int i=0;i<line.size();i++){
4017                 if(line[i] == c){
4018                     count++;
4019                 }
4020             }
4021         }
4022 
4023         return count;
4024     }
4025     catch(exception& e) {
4026         m->errorOut(e, "Utils", "getNumChar");
4027         exit(1);
4028     }
4029 }
4030 /***********************************************************************/
getSimpleLabel(string label)4031 string Utils::getSimpleLabel(string label){
4032     try {
4033         string simple = "";
4034 
4035         //remove OTU or phylo tag
4036         string newLabel1 = "";
4037         for (int i = 0; i < label.length(); i++) {
4038             if(label[i]>47 && label[i]<58) { //is a digit
4039                 newLabel1 += label[i];
4040             }
4041         }
4042 
4043         int num1;
4044 
4045         mothurConvert(newLabel1, num1);
4046 
4047         simple = toString(num1);
4048 
4049         return simple;
4050     }
4051     catch(exception& e) {
4052         m->errorOut(e, "Utils", "getSimpleLabel");
4053         exit(1);
4054     }
4055 }
4056 /***********************************************************************/
4057 
isLabelEquivalent(string label1,string label2)4058 bool Utils::isLabelEquivalent(string label1,  string label2){
4059     try {
4060         bool same = false;
4061 
4062         //remove OTU or phylo tag
4063         string newLabel1 = "";
4064         for (int i = 0; i < label1.length(); i++) {
4065             if(label1[i]>47 && label1[i]<58) { //is a digit
4066                 newLabel1 += label1[i];
4067             }
4068         }
4069 
4070         string newLabel2 = "";
4071         for (int i = 0; i < label2.length(); i++) {
4072             if(label2[i]>47 && label2[i]<58) { //is a digit
4073                 newLabel2 += label2[i];
4074             }
4075         }
4076 
4077         int num1, num2;
4078         mothurConvert(newLabel1, num1);
4079         mothurConvert(newLabel2, num2);
4080 
4081         if (num1 == num2) { same = true; }
4082 
4083         return same;
4084     }
4085     catch(exception& e) {
4086         m->errorOut(e, "Utils", "isLabelEquivalent");
4087         exit(1);
4088     }
4089 }
4090 //**********************************************************************************************************************
isSubset(vector<string> bigset,vector<string> subset)4091 bool Utils::isSubset(vector<string> bigset, vector<string> subset) {
4092     try {
4093 
4094 
4095         if (subset.size() > bigset.size()) { return false;  }
4096 
4097         //check if each guy in subset is also in bigset
4098         for (int i = 0; i < subset.size(); i++) {
4099             bool match = false;
4100             for (int j = 0; j < bigset.size(); j++) {
4101                 if (subset[i] == bigset[j]) { match = true; break; }
4102             }
4103 
4104             //you have a guy in subset that had no match in bigset
4105             if (!match) { return false; }
4106         }
4107 
4108         return true;
4109 
4110     }
4111     catch(exception& e) {
4112         m->errorOut(e, "Utils", "isSubset");
4113         exit(1);
4114     }
4115 }
4116 /***********************************************************************/
mothurRemove(string filename)4117 bool Utils::mothurRemove(string filename){
4118     try {
4119         filename = getFullPathName(filename);
4120         int error = remove(filename.c_str());
4121         return error;
4122     }
4123     catch(exception& e) {
4124         m->errorOut(e, "Utils", "mothurRemove");
4125         exit(1);
4126     }
4127 }
4128 /***********************************************************************/
mothurConvert(string item)4129 char* Utils::mothurConvert(string item){
4130     try {
4131         char* converted = new char[item.length()+1];
4132 
4133         *converted = '\0'; strncat(converted, item.c_str(), item.length());
4134 
4135         //size_t size = item.length()+1;
4136 
4137         //strncat(converted, item.c_str(), size-strlen(converted)-1);
4138 
4139         //converted[size-1] = '\0';
4140 
4141         //if (m->getDebug()) { m->mothurOut("[DEBUG]: converting string " + item + " to char* " + converted + "\n"); }
4142 
4143         return converted;
4144     }
4145     catch(exception& e) {
4146         m->errorOut(e, "Utils", "mothurConvert-char*");
4147         exit(1);
4148     }
4149 }
4150 /***********************************************************************/
mothurConvert(string item,int & num)4151 bool Utils::mothurConvert(string item, int& num){
4152     try {
4153         bool error = false;
4154 
4155         if (isNumeric1(item)) { convert(item, num); }
4156         else {
4157             num = 0;
4158             error = true;
4159             m->mothurOut("[ERROR]: cannot convert " + item + " to an integer.\n");
4160             m->setControl_pressed(true);
4161         }
4162 
4163         return error;
4164     }
4165     catch(exception& e) {
4166         m->errorOut(e, "Utils", "mothurConvert-int");
4167         exit(1);
4168     }
4169 }
4170 /***********************************************************************/
mothurConvert(char item,int & num)4171 bool Utils::mothurConvert(char item, int& num){
4172     try {
4173         bool error = false;
4174 
4175         if (isdigit(item)) {
4176             string mystring; mothurConvert(item, mystring);
4177             mothurConvert(mystring, num);
4178         }else {
4179             num = 0;
4180             error = true;
4181             m->mothurOut("[ERROR]: cannot convert " + toString(item) + " to an integer.\n");
4182             m->setControl_pressed(true);
4183         }
4184 
4185         return error;
4186     }
4187     catch(exception& e) {
4188         m->errorOut(e, "Utils", "mothurConvert-int");
4189         exit(1);
4190     }
4191 }
4192 /***********************************************************************/
mothurConvert(char item,string & output)4193 bool Utils::mothurConvert(char item, string& output){
4194     try {
4195 
4196         stringstream ss;
4197         ss << item;
4198         ss >> output;
4199         return true;
4200 
4201     }
4202     catch(exception& e) {
4203         m->errorOut(e, "Utils", "mothurConvert-char");
4204         exit(1);
4205     }
4206 }
4207 /***********************************************************************/
mothurConvert(string item,intDist & num)4208 bool Utils::mothurConvert(string item, intDist& num){
4209     try {
4210         bool error = false;
4211 
4212         if (isNumeric1(item)) {
4213             convert(item, num);
4214         }else {
4215             num = 0;
4216             error = true;
4217             m->mothurOut("[ERROR]: cannot convert " + item + " to an integer.\n");
4218             m->setControl_pressed(true);
4219         }
4220 
4221         return error;
4222     }
4223     catch(exception& e) {
4224         m->errorOut(e, "Utils", "mothurConvert-intDist");
4225         exit(1);
4226     }
4227 }
4228 /***********************************************************************/
mothurConvert(vector<long long> & input)4229 set<long long> Utils::mothurConvert(vector<long long>& input){
4230     try {
4231         set<long long> output(input.begin(), input.end());
4232 
4233 
4234         return output;
4235     }
4236     catch(exception& e) {
4237         m->errorOut(e, "Utils", "mothurConvert-vectorToSet");
4238         exit(1);
4239     }
4240 }
4241 /***********************************************************************/
mothurConvert(set<long long> & input)4242 vector<long long> Utils::mothurConvert(set<long long>& input){
4243     try {
4244         vector<long long> output(input.begin(), input.end());
4245 
4246 
4247         return output;
4248     }
4249     catch(exception& e) {
4250         m->errorOut(e, "Utils", "mothurConvert-SetToVector");
4251         exit(1);
4252     }
4253 }
4254 /***********************************************************************/
mothurConvert(vector<string> & input)4255 set<string> Utils::mothurConvert(vector<string>& input){
4256     try {
4257         set<string> output(input.begin(), input.end());
4258 
4259 
4260         return output;
4261     }
4262     catch(exception& e) {
4263         m->errorOut(e, "Utils", "mothurConvert-vectorToSet");
4264         exit(1);
4265     }
4266 }
4267 /***********************************************************************/
mothurConvert(set<string> & input)4268 vector<string> Utils::mothurConvert(set<string>& input){
4269     try {
4270         vector<string> output(input.begin(), input.end());
4271 
4272 
4273         return output;
4274     }
4275     catch(exception& e) {
4276         m->errorOut(e, "Utils", "mothurConvert-SetToVector");
4277         exit(1);
4278     }
4279 }
4280 /**************************************************************************************************/
addUnclassifieds(string tax,int maxlevel,bool probs)4281 string Utils::addUnclassifieds(string tax, int maxlevel, bool probs) {
4282     try{
4283         string newTax, taxon;
4284 
4285         string savedTax = tax;
4286         vector<string> taxons; splitAtChar(tax, taxons, ';'); taxons.pop_back();
4287         vector<int> confidences;
4288 
4289         if (taxons.size() == maxlevel) { return savedTax; }
4290 
4291         int index = 0;
4292         int confidence = 0;
4293         int level = 1;
4294         for (int i = 0; i < taxons.size(); i++) {
4295             index = i;
4296             string thisTax = taxons[i]+";";
4297             confidence = removeConfidences(thisTax);
4298             confidences.push_back(confidence);
4299 
4300             if (thisTax == "unclassified;"){ index--; break; }
4301             else{ newTax += taxons[i] + ";";  }
4302         }
4303         level = index+1;
4304 
4305         string thisTax = taxons[index]+";";
4306 
4307         removeConfidences(thisTax);
4308         taxon = thisTax.substr(0, thisTax.length()-1);
4309 
4310         string cTax = "";
4311         if (probs)  { cTax = taxon + "_unclassified(" + toString(confidences[index]) + ");";     }
4312         else        { cTax = taxon + "_unclassified;";          }
4313 
4314         //add "unclassified" until you reach maxLevel
4315         while (level < maxlevel) {
4316             newTax += cTax;
4317             level++;
4318         }
4319 
4320         return newTax;
4321     }
4322     catch(exception& e) {
4323         m->errorOut(e, "Utils", "addUnclassifieds");
4324         exit(1);
4325     }
4326 }
4327 /**************************************************************************************************/
trimTax(string tax,int trimLevel)4328 string Utils::trimTax(string tax, int trimLevel) {
4329     try{
4330         string newTax = "";
4331         string savedTax = tax;
4332         vector<string> taxons; splitAtChar(tax, taxons, ';'); taxons.pop_back();
4333 
4334         if (taxons.size() == trimLevel) { return savedTax; }
4335         else {
4336             int level = 0;
4337             for (int i = 0; i < taxons.size(); i++) {
4338                 newTax += taxons[i] +";";
4339                 level++;
4340                 if (level == trimLevel) { break; }
4341             }
4342         }
4343 
4344         return newTax;
4345     }
4346     catch(exception& e) {
4347         m->errorOut(e, "Utils", "trimTax");
4348         exit(1);
4349     }
4350 }
4351 /**************************************************************************************************/
toUpper(string item)4352 string Utils::toUpper(string item) {
4353     try{
4354         string newItem = "";
4355 
4356         for (int i = 0; i < item.length(); i++) {
4357             newItem += toupper(item[i]);
4358         }
4359         return newItem;
4360     }
4361     catch(exception& e) {
4362         m->errorOut(e, "Utils", "toUpper");
4363         exit(1);
4364     }
4365 }
4366 /**************************************************************************************************/
toLower(string item)4367 string Utils::toLower(string item) {
4368     try{
4369         string newItem = "";
4370 
4371         for (int i = 0; i < item.length(); i++) {
4372             newItem += tolower(item[i]);
4373         }
4374         return newItem;
4375     }
4376     catch(exception& e) {
4377         m->errorOut(e, "Utils", "toLower");
4378         exit(1);
4379     }
4380 }
4381 /***********************************************************************/
isNumeric1(string stringToCheck)4382 bool Utils::isNumeric1(string stringToCheck){
4383     try {
4384         bool numeric = false;
4385 
4386         if (stringToCheck == "") { numeric = false;  }
4387         else if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
4388 
4389         return numeric;
4390     }
4391     catch(exception& e) {
4392         m->errorOut(e, "Utils", "isNumeric1");
4393         exit(1);
4394     }
4395 
4396 }
4397 /***********************************************************************/
isPositiveNumeric(string stringToCheck)4398 bool Utils::isPositiveNumeric(string stringToCheck){
4399     try {
4400         bool numeric = false;
4401 
4402         if (stringToCheck == "") { numeric = false;  }
4403         else if(stringToCheck.find_first_not_of("0123456789.") == string::npos) { numeric = true; }
4404 
4405         return numeric;
4406     }
4407     catch(exception& e) {
4408         m->errorOut(e, "Utils", "isPositiveNumeric");
4409         exit(1);
4410     }
4411 
4412 }
4413 /***********************************************************************/
isEqual(float num1,float num2)4414 bool Utils::isEqual(float num1, float num2){
4415     try {
4416         bool equal = false;
4417 
4418         if (fabs(num1-num2) <= fabs(num1 * 0.001)) { equal = true; }
4419 
4420         return equal;
4421     }
4422     catch(exception& e) {
4423         m->errorOut(e, "Utils", "isEqual");
4424         exit(1);
4425     }
4426 }
4427 /***********************************************************************/
isEqual(double num1,double num2)4428 bool Utils::isEqual(double num1, double num2){
4429     try {
4430         bool equal = false;
4431 
4432         if (fabs(num1-num2) <= fabs(num1 * 0.001)) { equal = true; }
4433 
4434         return equal;
4435     }
4436     catch(exception& e) {
4437         m->errorOut(e, "Utils", "isEqual");
4438         exit(1);
4439     }
4440 }
4441 /***********************************************************************/
allSpaces(string stringToCheck)4442 bool Utils::allSpaces(string stringToCheck){
4443     try {
4444 
4445         for (int i = 0; i < stringToCheck.length(); i++) {
4446             char c = stringToCheck[i];
4447             if (!isspace(c)) { return false; }
4448         }
4449 
4450         return true;
4451     }
4452     catch(exception& e) {
4453         m->errorOut(e, "Utils", "isNumeric1");
4454         exit(1);
4455     }
4456 
4457 }
4458 /***********************************************************************/
isInteger(string stringToCheck)4459 bool Utils::isInteger(string stringToCheck){
4460     try {
4461         bool isInt = false;
4462 
4463         if(stringToCheck.find_first_not_of("0123456789-") == string::npos) { isInt = true; }
4464 
4465         return isInt;
4466     }
4467     catch(exception& e) {
4468         m->errorOut(e, "Utils", "isInteger");
4469         exit(1);
4470     }
4471 
4472 }
4473 /***********************************************************************/
containsAlphas(string stringToCheck)4474 bool Utils::containsAlphas(string stringToCheck){
4475     try {
4476         bool containsAlpha = false;
4477 
4478         if(stringToCheck.find_first_of("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOopPQqRrSsTtUuVvWwXxYyZz") != string::npos) { containsAlpha = true; }
4479 
4480         return containsAlpha;
4481     }
4482     catch(exception& e) {
4483         m->errorOut(e, "Utils", "containsAlphas");
4484         exit(1);
4485     }
4486 
4487 }
4488 /***********************************************************************/
isAllAlphas(string stringToCheck)4489 bool Utils::isAllAlphas(string stringToCheck){
4490     try {
4491         bool allAlphas = true;
4492 
4493         if(stringToCheck.find_first_not_of("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOopPQqRrSsTtUuVvWwXxYyZz") != string::npos) { allAlphas = false; }
4494 
4495         return allAlphas;
4496     }
4497     catch(exception& e) {
4498         m->errorOut(e, "Utils", "isAllAlphas");
4499         exit(1);
4500     }
4501 
4502 }
4503 /***********************************************************************/
isAllAlphaNumerics(string stringToCheck)4504 bool Utils::isAllAlphaNumerics(string stringToCheck){
4505     try {
4506         bool allAlphaNumerics = true;
4507 
4508         if(stringToCheck.find_first_not_of("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOopPQqRrSsTtUuVvWwXxYyZz0123456789") != string::npos) { allAlphaNumerics = false; }
4509 
4510         return allAlphaNumerics;
4511     }
4512     catch(exception& e) {
4513         m->errorOut(e, "Utils", "isAllAlphas");
4514         exit(1);
4515     }
4516 
4517 }
4518 /***********************************************************************/
mothurConvert(string item,float & num)4519 bool Utils::mothurConvert(string item, float& num){
4520     try {
4521         bool error = false;
4522 
4523         if (isNumeric1(item)) {
4524             convert(item, num);
4525         }else {
4526             try {
4527                 num = atof(item.c_str());
4528             }catch(exception& e) {
4529                 num = 0;
4530                 error = true;
4531                 m->mothurOut("[ERROR]: cannot convert " + item + " to a float.\n");
4532                 m->setControl_pressed(true);
4533             }
4534         }
4535 
4536         return error;
4537     }
4538     catch(exception& e) {
4539         m->errorOut(e, "Utils", "mothurConvert-float");
4540         exit(1);
4541     }
4542 }
4543 /***********************************************************************/
mothurConvert(string item,double & num)4544 bool Utils::mothurConvert(string item, double& num){
4545     try {
4546         bool error = false;
4547 
4548         if (isNumeric1(item)) {
4549             convert(item, num);
4550         }else {
4551             try {
4552                 num = atof(item.c_str());
4553             }catch(exception& e) {
4554                 num = 0;
4555                 error = true;
4556                 m->mothurOut("[ERROR]: cannot convert " + item + " to a double.\n");
4557                 m->setControl_pressed(true);
4558             }
4559         }
4560 
4561         return error;
4562     }
4563     catch(exception& e) {
4564         m->errorOut(e, "Utils", "mothurConvert-double");
4565         exit(1);
4566     }
4567 }
4568 /**************************************************************************************************/
4569 
binomial(int maxOrder)4570 vector<vector<double> > Utils::binomial(int maxOrder){
4571     try {
4572         vector<vector<double> > binomial(maxOrder+1);
4573 
4574         for(int i=0;i<=maxOrder;i++){
4575             binomial[i].resize(maxOrder+1);
4576             binomial[i][0]=1;
4577             binomial[0][i]=0;
4578         }
4579         binomial[0][0]=1;
4580 
4581         binomial[1][0]=1;
4582         binomial[1][1]=1;
4583 
4584         for(int i=2;i<=maxOrder;i++){
4585             binomial[1][i]=0;
4586         }
4587 
4588         for(int i=2;i<=maxOrder;i++){
4589             for(int j=1;j<=maxOrder;j++){
4590                 if(i==j){	binomial[i][j]=1;									}
4591                 if(j>i)	{	binomial[i][j]=0;									}
4592                 else	{	binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];	}
4593             }
4594         }
4595 
4596         return binomial;
4597 
4598     }
4599     catch(exception& e) {
4600         m->errorOut(e, "Utils", "binomial");
4601         exit(1);
4602     }
4603 }
4604 /**************************************************************************************************/
fromBase36(string base36)4605 unsigned int Utils::fromBase36(string base36){
4606     try {
4607         unsigned int num = 0;
4608 
4609         map<char, int> converts;
4610         converts['A'] = 0;
4611         converts['a'] = 0;
4612         converts['B'] = 1;
4613         converts['b'] = 1;
4614         converts['C'] = 2;
4615         converts['c'] = 2;
4616         converts['D'] = 3;
4617         converts['d'] = 3;
4618         converts['E'] = 4;
4619         converts['e'] = 4;
4620         converts['F'] = 5;
4621         converts['f'] = 5;
4622         converts['G'] = 6;
4623         converts['g'] = 6;
4624         converts['H'] = 7;
4625         converts['h'] = 7;
4626         converts['I'] = 8;
4627         converts['i'] = 8;
4628         converts['J'] = 9;
4629         converts['j'] = 9;
4630         converts['K'] = 10;
4631         converts['k'] = 10;
4632         converts['L'] = 11;
4633         converts['l'] = 11;
4634         converts['M'] = 12;
4635         converts['m'] = 12;
4636         converts['N'] = 13;
4637         converts['n'] = 13;
4638         converts['O'] = 14;
4639         converts['o'] = 14;
4640         converts['P'] = 15;
4641         converts['p'] = 15;
4642         converts['Q'] = 16;
4643         converts['q'] = 16;
4644         converts['R'] = 17;
4645         converts['r'] = 17;
4646         converts['S'] = 18;
4647         converts['s'] = 18;
4648         converts['T'] = 19;
4649         converts['t'] = 19;
4650         converts['U'] = 20;
4651         converts['u'] = 20;
4652         converts['V'] = 21;
4653         converts['v'] = 21;
4654         converts['W'] = 22;
4655         converts['w'] = 22;
4656         converts['X'] = 23;
4657         converts['x'] = 23;
4658         converts['Y'] = 24;
4659         converts['y'] = 24;
4660         converts['Z'] = 25;
4661         converts['z'] = 25;
4662         converts['0'] = 26;
4663         converts['1'] = 27;
4664         converts['2'] = 28;
4665         converts['3'] = 29;
4666         converts['4'] = 30;
4667         converts['5'] = 31;
4668         converts['6'] = 32;
4669         converts['7'] = 33;
4670         converts['8'] = 34;
4671         converts['9'] = 35;
4672 
4673         int i = 0;
4674         while (i < base36.length()) {
4675             char c = base36[i];
4676             num = 36 * num + converts[c];
4677             i++;
4678         }
4679 
4680         return num;
4681 
4682     }
4683     catch(exception& e) {
4684         m->errorOut(e, "Utils", "fromBase36");
4685         exit(1);
4686     }
4687 }
4688 /***********************************************************************/
findEdianness()4689 string  Utils::findEdianness() {
4690     try {
4691         // find real endian type
4692         string endianType = "unknown";
4693         int num = 1;
4694         if(*(char *)&num == 1)
4695         {
4696             endianType = "LITTLE_ENDIAN";
4697         }
4698         else
4699         {
4700             endianType = "BIG_ENDIAN";
4701         }
4702         return endianType;
4703     }
4704     catch(exception& e) {
4705         m->errorOut(e, "Utils", "findEdianness");
4706         exit(1);
4707     }
4708 }
4709 /***********************************************************************/
median(vector<double> x)4710 double  Utils::median(vector<double> x) {
4711     try {
4712         double value = 0.0;
4713 
4714         if (x.size() == 0) { } //error
4715         else {
4716             //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2.
4717             sort(x.begin(), x.end());
4718             //is x.size even?
4719             if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints
4720                 int midIndex1 = (x.size()/2)-1;
4721                 int midIndex2 = (x.size()/2);
4722                 value = (x[midIndex1]+ x[midIndex2]) / 2.0;
4723             }else {
4724                 int midIndex = (x.size()/2);
4725                 value = x[midIndex];
4726             }
4727         }
4728         return value;
4729     }
4730     catch(exception& e) {
4731         m->errorOut(e, "Utils", "median");
4732         exit(1);
4733     }
4734 }
4735 /***********************************************************************/
median(vector<int> x)4736 int  Utils::median(vector<int> x) {
4737     try {
4738         double value = 0;
4739 
4740         if (x.size() == 0) { } //error
4741         else {
4742             //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2.
4743             sort(x.begin(), x.end());
4744             //is x.size even?
4745             if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints
4746                 int midIndex1 = (x.size()/2)-1;
4747                 int midIndex2 = (x.size()/2);
4748                 value = (x[midIndex1]+ x[midIndex2]) / 2.0;
4749             }else {
4750                 int midIndex = (x.size()/2);
4751                 value = x[midIndex];
4752             }
4753         }
4754         return (int) value;
4755     }
4756     catch(exception& e) {
4757         m->errorOut(e, "Utils", "median - int");
4758         exit(1);
4759     }
4760 }
4761 /***********************************************************************/
average(vector<int> x)4762 int  Utils::average(vector<int> x) {
4763     try {
4764         int value = 0;
4765 
4766         for (int i = 0; i < x.size(); i++) {
4767             if (m->getControl_pressed()) { break; }
4768             value += x[i];
4769         }
4770 
4771         return ((int) value / x.size());
4772     }
4773     catch(exception& e) {
4774         m->errorOut(e, "Utils", "average - int");
4775         exit(1);
4776     }
4777 }
factorial(int num)4778 int Utils::factorial(int num){
4779     try {
4780         int total = 1;
4781 
4782         for (int i = 1; i <= num; i++) {
4783             total *= i;
4784         }
4785 
4786         return total;
4787     }
4788     catch(exception& e) {
4789         m->errorOut(e, "Utils", "factorial");
4790         exit(1);
4791     }
4792 }
4793 /***********************************************************************/
getAlignmentLength(string file)4794 int Utils::getAlignmentLength(string file){
4795     try {
4796         ifstream in; openInputFile(file, in);
4797 
4798         Sequence seq(in);
4799 
4800         in.close();
4801 
4802         return seq.getAlignLength();
4803     }
4804     catch(exception& e) {
4805         m->errorOut(e, "Utils", "getAlignmentLength");
4806         exit(1);
4807     }
4808 }
4809 
4810 /***********************************************************************/
4811 
getNumSeqs(ifstream & file)4812 int Utils::getNumSeqs(ifstream& file){
4813     try {
4814         int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
4815         file.seekg(0);
4816         return numSeqs;
4817     }
4818     catch(exception& e) {
4819         m->errorOut(e, "Utils", "getNumSeqs");
4820         exit(1);
4821     }
4822 }
4823 /***********************************************************************/
getNumSeqs(ifstream & file,int & numSeqs)4824 void Utils::getNumSeqs(ifstream& file, int& numSeqs){
4825     try {
4826         string input;
4827         numSeqs = 0;
4828         while(!file.eof()){
4829             input = getline(file);
4830             if (input.length() != 0) {
4831                 if(input[0] == '>'){ numSeqs++;	}
4832             }
4833         }
4834     }
4835     catch(exception& e) {
4836         m->errorOut(e, "Utils", "getNumSeqs");
4837         exit(1);
4838     }
4839 }
4840 /***********************************************************************/
4841 
4842 //This function parses the estimator options and puts them in a vector
splitAtChar(string & estim,vector<string> & container,char symbol)4843 void Utils::splitAtChar(string& estim, vector<string>& container, char symbol) {
4844     try {
4845 
4846         if (symbol == '-') { splitAtDash(estim, container); return; }
4847 
4848         string individual = "";
4849         int estimLength = estim.size();
4850         for(int i=0;i<estimLength;i++){
4851             if(estim[i] == symbol){
4852                 container.push_back(individual);
4853                 individual = "";
4854             }
4855             else{
4856                 individual += estim[i];
4857             }
4858         }
4859         container.push_back(individual);
4860 
4861     }
4862     catch(exception& e) {
4863         m->errorOut(e, "Utils", "splitAtChar");
4864         exit(1);
4865     }
4866 }
4867 /***********************************************************************/
4868 
4869 //This function parses the estimator options and puts them in a vector
splitAtChar(string & estim,set<string> & container,char symbol)4870 void Utils::splitAtChar(string& estim, set<string>& container, char symbol) {
4871     try {
4872 
4873         if (symbol == '-') { splitAtDash(estim, container); return; }
4874 
4875         string individual = "";
4876         int estimLength = estim.size();
4877         for(int i=0;i<estimLength;i++){
4878             if(estim[i] == symbol){
4879                 container.insert(individual);
4880                 individual = "";
4881             }
4882             else{
4883                 individual += estim[i];
4884             }
4885         }
4886         container.insert(individual);
4887 
4888     }
4889     catch(exception& e) {
4890         m->errorOut(e, "Utils", "splitAtChar");
4891         exit(1);
4892     }
4893 }
4894 
4895 /***********************************************************************/
4896 
4897 //This function parses the estimator options and puts them in a vector
splitAtDash(string & estim,vector<string> & container)4898 void Utils::splitAtDash(string& estim, vector<string>& container) {
4899     try {
4900         string individual = "";
4901         int estimLength = estim.size();
4902         bool prevEscape = false;
4903 
4904         for(int i=0;i<estimLength;i++){
4905             if(estim[i] == '-'){
4906                 if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
4907                 else {
4908                     container.push_back(individual);
4909                     individual = "";
4910                 }
4911             }else if(estim[i] == '\\'){
4912                 if (i < estimLength-1) {
4913                     if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
4914                     else { individual += estim[i]; prevEscape = false;  } //if no, add in
4915                 }else { individual += estim[i]; }
4916             }else {
4917                 individual += estim[i];
4918             }
4919         }
4920 
4921 
4922 
4923         container.push_back(individual);
4924     }
4925     catch(exception& e) {
4926         m->errorOut(e, "Utils", "splitAtDash");
4927         exit(1);
4928     }
4929 }
4930 
4931 /***********************************************************************/
4932 //This function parses the label options and puts them in a set
splitAtDash(string & estim,set<string> & container)4933 void Utils::splitAtDash(string& estim, set<string>& container) {
4934     try {
4935         string individual = "";
4936         int estimLength = estim.size();
4937         bool prevEscape = false;
4938 
4939         for(int i=0;i<estimLength;i++){
4940             if(estim[i] == '-'){
4941                 if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
4942                 else {
4943                     container.insert(individual);
4944                     individual = "";
4945                 }
4946             }else if(estim[i] == '\\'){
4947                 if (i < estimLength-1) {
4948                     if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
4949                     else { individual += estim[i]; prevEscape = false;  } //if no, add in
4950                 }else { individual += estim[i]; }
4951             }else {
4952                 individual += estim[i];
4953             }
4954         }
4955         container.insert(individual);
4956 
4957     }
4958     catch(exception& e) {
4959         m->errorOut(e, "Utils", "splitAtDash");
4960         exit(1);
4961     }
4962 }
4963 /***********************************************************************/
4964 //This function parses the line options and puts them in a set
splitAtDash(string & estim,set<int> & container)4965 void Utils::splitAtDash(string& estim, set<int>& container) {
4966     try {
4967         string individual = "";
4968         int lineNum;
4969         int estimLength = estim.size();
4970         bool prevEscape = false;
4971 
4972         for(int i=0;i<estimLength;i++){
4973             if(estim[i] == '-'){
4974                 if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
4975                 else {
4976                     convert(individual, lineNum); //convert the string to int
4977                     container.insert(lineNum);
4978                     individual = "";
4979                 }
4980             }else if(estim[i] == '\\'){
4981                 if (i < estimLength-1) {
4982                     if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
4983                     else { individual += estim[i]; prevEscape = false;  } //if no, add in
4984                 }else { individual += estim[i]; }
4985             }else {
4986                 individual += estim[i];
4987             }
4988         }
4989 
4990         convert(individual, lineNum); //convert the string to int
4991         container.insert(lineNum);
4992     }
4993     catch(exception& e) {
4994         m->errorOut(e, "Utils", "splitAtDash");
4995         exit(1);
4996     }
4997 }
4998 
4999 /***********************************************************************/
makeList(vector<string> & names)5000 string Utils::makeList(vector<string>& names) {
5001     try {
5002         string list = "";
5003 
5004         if (names.size() == 0) { return list; }
5005 
5006         for (int i = 0; i < names.size()-1; i++) { list += names[i] + ",";  }
5007 
5008         //get last name
5009         list += names[names.size()-1];
5010 
5011         return list;
5012     }
5013     catch(exception& e) {
5014         m->errorOut(e, "Utils", "makeList");
5015         exit(1);
5016     }
5017 }
5018 
5019 /***********************************************************************/
5020 //This function parses the a string and puts peices in a vector
splitAtComma(string & estim,vector<string> & container)5021 void Utils::splitAtComma(string& estim, vector<string>& container) {
5022     try {
5023         string individual = "";
5024         int estimLength = estim.size();
5025         for(int i=0;i<estimLength;i++){
5026             if(estim[i] == ','){
5027                 container.push_back(individual);
5028                 individual = "";
5029             }
5030             else{
5031                 individual += estim[i];
5032             }
5033         }
5034         container.push_back(individual);
5035 
5036     }
5037     catch(exception& e) {
5038         m->errorOut(e, "Utils", "splitAtComma");
5039         exit(1);
5040     }
5041 }
5042 /***********************************************************************/
5043 //This function parses the a string and puts peices in a vector
splitAtComma(string & estim,vector<int> & convertedContainer)5044 void Utils::splitAtComma(string& estim, vector<int>& convertedContainer) {
5045     try {
5046         string individual = "";
5047         vector<string> container;
5048         int estimLength = estim.size();
5049         for(int i=0;i<estimLength;i++){
5050             if(estim[i] == ','){
5051                 container.push_back(individual);
5052                 individual = "";
5053             }
5054             else{
5055                 individual += estim[i];
5056             }
5057         }
5058         container.push_back(individual);
5059 
5060         for (int i = 0; i < container.size(); i++) {
5061             int temp;
5062             if (mothurConvert(container[i], temp)) { convertedContainer.push_back(temp); }
5063         }
5064 
5065     }
5066     catch(exception& e) {
5067         m->errorOut(e, "Utils", "splitAtComma");
5068         exit(1);
5069     }
5070 }
5071 /***********************************************************************/
5072 //This function splits up the various option parameters
splitAtChar(string & prefix,string & suffix,char c)5073 void Utils::splitAtChar(string& prefix, string& suffix, char c){
5074     try {
5075 
5076         string individual = "";
5077         int estimLength = prefix.size();
5078         for(int i=0;i<estimLength;i++){
5079             if(prefix[i] == c){
5080                 suffix = prefix.substr(i+1);
5081                 prefix = individual;
5082                 break;
5083             }
5084             else{
5085                 individual += prefix[i];
5086             }
5087         }
5088 
5089     }
5090     catch(exception& e) {
5091         m->errorOut(e, "Utils", "splitAtChar");
5092         exit(1);
5093     }
5094 }
5095 
5096 /***********************************************************************/
5097 
5098 //This function splits up the various option parameters
splitAtComma(string & prefix,string & suffix)5099 void Utils::splitAtComma(string& prefix, string& suffix){
5100     try {
5101         prefix = suffix.substr(0,suffix.find_first_of(','));
5102         if ((suffix.find_first_of(',')+2) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
5103             suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
5104             string space = " ";
5105             while(suffix.at(0) == ' ')
5106                 suffix = suffix.substr(1, suffix.length());
5107         }else {  suffix = "";  }
5108 
5109     }
5110     catch(exception& e) {
5111         m->errorOut(e, "Utils", "splitAtComma");
5112         exit(1);
5113     }
5114 }
5115 /***********************************************************************/
5116 
5117 //This function separates the key value from the option value i.e. dist=96_...
splitAtEquals(string & key,string & value)5118 void Utils::splitAtEquals(string& key, string& value){
5119     try {
5120         if(value.find_first_of('=') != -1){
5121             key = value.substr(0,value.find_first_of('='));
5122             if ((value.find_first_of('=')+1) <= value.length()) {
5123                 value = value.substr(value.find_first_of('=')+1, value.length());
5124             }
5125         }else{
5126             key = value;
5127             value = 1;
5128         }
5129     }
5130     catch(exception& e) {
5131         m->errorOut(e, "Utils", "splitAtEquals");
5132         exit(1);
5133     }
5134 }
5135 
5136 /**************************************************************************************************/
5137 
inUsersGroups(string groupname,vector<string> Groups)5138 bool Utils::inUsersGroups(string groupname, vector<string> Groups) {
5139     try {
5140         for (int i = 0; i < Groups.size(); i++) {
5141             if (groupname == Groups[i]) { return true; }
5142         }
5143         return false;
5144     }
5145     catch(exception& e) {
5146         m->errorOut(e, "Utils", "inUsersGroups");
5147         exit(1);
5148     }
5149 }
5150 /**************************************************************************************************/
5151 
inUsersGroups(string groupname,set<string> Groups)5152 bool Utils::inUsersGroups(string groupname, set<string> Groups) {
5153     try {
5154         if (Groups.count(groupname) != 0) { return true; } //found it
5155         return false;
5156     }
5157     catch(exception& e) {
5158         m->errorOut(e, "Utils", "inUsersGroups");
5159         exit(1);
5160     }
5161 }
5162 
5163 /**************************************************************************************************/
5164 
inUsersGroups(vector<int> set,vector<vector<int>> sets)5165 bool Utils::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
5166     try {
5167         for (int i = 0; i < sets.size(); i++) {
5168             if (set == sets[i]) { return true; }
5169         }
5170         return false;
5171     }
5172     catch(exception& e) {
5173         m->errorOut(e, "Utils", "inUsersGroups");
5174         exit(1);
5175     }
5176 }
5177 /**************************************************************************************************/
5178 
inUsersGroups(int groupname,vector<int> Groups)5179 bool Utils::inUsersGroups(int groupname, vector<int> Groups) {
5180     try {
5181         for (int i = 0; i < Groups.size(); i++) {
5182             if (groupname == Groups[i]) { return true; }
5183         }
5184         return false;
5185     }
5186     catch(exception& e) {
5187         m->errorOut(e, "Utils", "inUsersGroups");
5188         exit(1);
5189     }
5190 }
5191 
5192 /**************************************************************************************************/
5193 //returns true if any of the strings in first vector are in second vector
inUsersGroups(vector<string> groupnames,vector<string> Groups)5194 bool Utils::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
5195     try {
5196 
5197         for (int i = 0; i < groupnames.size(); i++) {
5198             if (inUsersGroups(groupnames[i], Groups)) { return true; }
5199         }
5200         return false;
5201     }
5202     catch(exception& e) {
5203         m->errorOut(e, "Utils", "inUsersGroups");
5204         exit(1);
5205     }
5206 }
5207 
5208 /**************************************************************************************************/
getTag(string filename)5209 string Utils::getTag(string filename) {
5210     try {
5211         string tag = "Otu";
5212         int pos = filename.find_first_of(".tx.");
5213         if (pos != string::npos) { tag = "Phylo"; }
5214 
5215         int pos2 = filename.find_first_of(".asv.");
5216         if (pos2 != string::npos) { tag = "ASV"; }
5217 
5218         return tag;
5219     }
5220     catch(exception& e) {
5221         m->errorOut(e, "Utils", "getTag");
5222         exit(1);
5223     }
5224 }
5225 /**************************************************************************************************/
5226 //removes entries that are only white space
removeBlanks(vector<string> & tempVector)5227 int Utils::removeBlanks(vector<string>& tempVector) {
5228     try {
5229         vector<string> newVector;
5230         for (int i = 0; i < tempVector.size(); i++) {
5231             bool isBlank = true;
5232             for (int j = 0; j < tempVector[i].length(); j++) {
5233                 if (!isspace(tempVector[i][j])) { isBlank = false; j+= tempVector[i].length(); } //contains non space chars, break out and save
5234             }
5235             if (!isBlank) { newVector.push_back(tempVector[i]); }
5236         }
5237         tempVector = newVector;
5238         return 0;
5239     }
5240     catch(exception& e) {
5241         m->errorOut(e, "Utils", "removeBlanks");
5242         exit(1);
5243     }
5244 }
5245 /***********************************************************************/
getNextShared(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel,string optionOutput)5246 SharedRAbundVectors* Utils::getNextShared(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel, string optionOutput) {//input, allLines, userLabels, processedLabels
5247     try {
5248 
5249         SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
5250 
5251         //as long as you are not at the end of the file or done wih the lines you want
5252         while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5253 
5254             if (m->getControl_pressed()) {  delete lookup;  return NULL; }
5255 
5256             if (lastLabel == "") {  lastLabel = lookup->getLabel();  }
5257 
5258             if(allLines == 1 || userLabels.count(lookup->getLabel()) == 1){ //process all lines or this is a line we want
5259 
5260                 m->mothurOut(lookup->getLabel()+ " " + optionOutput +"\n");
5261 
5262                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5263 
5264                 return lookup;
5265             }
5266 
5267             if ((anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5268 
5269                 string saveLabel = lookup->getLabel();
5270 
5271                 delete lookup;
5272                 lookup = input.getSharedRAbundVectors(lastLabel);
5273                 m->mothurOut(lookup->getLabel()+"\n");
5274 
5275                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5276 
5277                 lastLabel = saveLabel;
5278 
5279                 return lookup;
5280             }
5281 
5282             lastLabel = lookup->getLabel();
5283             //prevent memory leak
5284             delete lookup;
5285 
5286             if (m->getControl_pressed()) {  return NULL; }
5287 
5288             //get next line to process
5289             lookup = input.getSharedRAbundVectors();
5290         }
5291 
5292         if (m->getControl_pressed()) { delete lookup;  return NULL; }
5293 
5294         //output error messages about any remaining user labels
5295         set<string>::iterator it;
5296         bool needToRun = false;
5297         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5298             m->mothurOut("Your file does not include the label " + *it);
5299             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5300             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5301         }
5302 
5303         //run last label if you need to
5304         if (needToRun )  {
5305             delete lookup;
5306             lookup = input.getSharedRAbundVectors(lastLabel);
5307             if (lookup != NULL) {
5308                 m->mothurOut(lookup->getLabel()+"\n");
5309                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5310             }
5311             return lookup;
5312         }
5313 
5314         return lookup;
5315 
5316     }catch(exception& e) {
5317             m->errorOut(e, "Utils", "getNextShared");
5318             exit(1);
5319     }
5320 }
5321 /***********************************************************************/
getNextRelabund(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5322 SharedRAbundFloatVectors* Utils::getNextRelabund(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5323     try {
5324 
5325         SharedRAbundFloatVectors* lookup = input.getSharedRAbundFloatVectors();
5326 
5327         //as long as you are not at the end of the file or done wih the lines you want
5328         while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5329 
5330             if (m->getControl_pressed()) {  delete lookup;  return NULL; }
5331 
5332             if (lastLabel == "") {  lastLabel = lookup->getLabel();  }
5333 
5334             if(allLines == 1 || userLabels.count(lookup->getLabel()) == 1){ //process all lines or this is a line we want
5335 
5336                 m->mothurOut(lookup->getLabel()+"\n");
5337 
5338                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5339 
5340                 return lookup;
5341             }
5342 
5343             if ((anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5344 
5345                 string saveLabel = lookup->getLabel();
5346 
5347                 delete lookup;
5348                 lookup = input.getSharedRAbundFloatVectors(lastLabel);
5349                 m->mothurOut(lookup->getLabel()+"\n");
5350 
5351                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5352 
5353                 lastLabel = saveLabel;
5354 
5355                 return lookup;
5356             }
5357 
5358             lastLabel = lookup->getLabel();
5359             //prevent memory leak
5360             delete lookup;
5361 
5362             if (m->getControl_pressed()) {   return NULL; }
5363 
5364             //get next line to process
5365             lookup = input.getSharedRAbundFloatVectors();
5366         }
5367 
5368         if (m->getControl_pressed()) { delete lookup;  return NULL; }
5369 
5370         //output error messages about any remaining user labels
5371         set<string>::iterator it;
5372         bool needToRun = false;
5373         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5374             m->mothurOut("Your file does not include the label " + *it);
5375             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5376             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5377         }
5378 
5379         //run last label if you need to
5380         if (needToRun )  {
5381             delete lookup;
5382             lookup = input.getSharedRAbundFloatVectors(lastLabel);
5383             if (lookup != NULL) {
5384                 m->mothurOut(lookup->getLabel()+"\n");
5385                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5386             }
5387             return lookup;
5388         }
5389 
5390         return lookup;
5391 
5392     }catch(exception& e) {
5393             m->errorOut(e, "Utils", "getNextRelabund");
5394             exit(1);
5395     }
5396 }
5397 /***********************************************************************/
getNextCLR(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5398 SharedCLRVectors* Utils::getNextCLR(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5399     try {
5400 
5401         SharedCLRVectors* lookup = input.getSharedCLRVectors();
5402 
5403         //as long as you are not at the end of the file or done wih the lines you want
5404         while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5405 
5406             if (m->getControl_pressed()) {  delete lookup;  return NULL; }
5407 
5408             if (lastLabel == "") {  lastLabel = lookup->getLabel();  }
5409 
5410             if(allLines == 1 || userLabels.count(lookup->getLabel()) == 1){ //process all lines or this is a line we want
5411 
5412                 m->mothurOut(lookup->getLabel()+"\n");
5413 
5414                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5415 
5416                 return lookup;
5417             }
5418 
5419             if ((anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5420 
5421                 string saveLabel = lookup->getLabel();
5422 
5423                 delete lookup;
5424                 lookup = input.getSharedCLRVectors(lastLabel);
5425                 m->mothurOut(lookup->getLabel()+"\n");
5426 
5427                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5428 
5429                 lastLabel = saveLabel;
5430 
5431                 return lookup;
5432             }
5433 
5434             lastLabel = lookup->getLabel();
5435             //prevent memory leak
5436             delete lookup;
5437 
5438             if (m->getControl_pressed()) {  return NULL; }
5439 
5440             //get next line to process
5441             lookup = input.getSharedCLRVectors();
5442         }
5443 
5444         if (m->getControl_pressed()) { delete lookup;  return NULL; }
5445 
5446         //output error messages about any remaining user labels
5447         set<string>::iterator it;
5448         bool needToRun = false;
5449         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5450             m->mothurOut("Your file does not include the label " + *it);
5451             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5452             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5453         }
5454 
5455         //run last label if you need to
5456         if (needToRun )  {
5457             delete lookup;
5458             lookup = input.getSharedCLRVectors(lastLabel);
5459             if (lookup != NULL) {
5460                 m->mothurOut(lookup->getLabel()+"\n");
5461                 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5462             }
5463             return lookup;
5464         }
5465 
5466         return lookup;
5467 
5468     }catch(exception& e) {
5469             m->errorOut(e, "Utils", "getNextCLR");
5470             exit(1);
5471     }
5472 }
5473 /***********************************************************************/
getNextList(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5474 ListVector* Utils::getNextList(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5475     try {
5476 
5477         ListVector* list = input.getListVector();
5478 
5479         //as long as you are not at the end of the file or done wih the lines you want
5480         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5481 
5482             if (m->getControl_pressed()) {  delete list;  return NULL; }
5483 
5484             if (lastLabel == "") {  lastLabel = list->getLabel();  }
5485 
5486             if(allLines == 1 || userLabels.count(list->getLabel()) == 1){ //process all lines or this is a line we want
5487 
5488                 m->mothurOut(list->getLabel()+"\n");
5489 
5490                 processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel());
5491 
5492                 return list;
5493             }
5494 
5495             if ((anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5496 
5497                 string saveLabel = list->getLabel();
5498 
5499                 delete list;
5500                 list = input.getListVector(lastLabel);
5501                 m->mothurOut(list->getLabel()+"\n");
5502 
5503                 processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel());
5504 
5505                 lastLabel = saveLabel;
5506 
5507                 return list;
5508             }
5509 
5510             lastLabel = list->getLabel();
5511             //prevent memory leak
5512             delete list;
5513 
5514             if (m->getControl_pressed()) {   return NULL; }
5515 
5516             //get next line to process
5517             list = input.getListVector();
5518         }
5519 
5520         if (m->getControl_pressed()) { delete list;  return NULL; }
5521 
5522         //output error messages about any remaining user labels
5523         set<string>::iterator it;
5524         bool needToRun = false;
5525         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5526             m->mothurOut("Your file does not include the label " + *it);
5527             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5528             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5529         }
5530 
5531         //run last label if you need to
5532         if (needToRun )  {
5533             delete list;
5534             list = input.getListVector(lastLabel);
5535             if (list != NULL) {
5536                 m->mothurOut(list->getLabel()+"\n");
5537                 processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel());
5538             }
5539             return list;
5540         }
5541 
5542         return list;
5543 
5544     }catch(exception& e) {
5545             m->errorOut(e, "Utils", "getNextList");
5546             exit(1);
5547     }
5548 }
5549 /***********************************************************************/
getNextRAbund(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5550 RAbundVector* Utils::getNextRAbund(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5551     try {
5552 
5553         RAbundVector* rabund = input.getRAbundVector();
5554 
5555         //as long as you are not at the end of the file or done wih the lines you want
5556         while((rabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5557 
5558             if (m->getControl_pressed()) {  delete rabund;  return NULL; }
5559 
5560             if (lastLabel == "") {  lastLabel = rabund->getLabel();  }
5561 
5562             if(allLines == 1 || userLabels.count(rabund->getLabel()) == 1){ //process all lines or this is a line we want
5563 
5564                 m->mothurOut(rabund->getLabel()+"\n");
5565 
5566                 processedLabels.insert(rabund->getLabel()); userLabels.erase(rabund->getLabel());
5567 
5568                 return rabund;
5569             }
5570 
5571             if ((anyLabelsToProcess(rabund->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5572 
5573                 string saveLabel = rabund->getLabel();
5574 
5575                 delete rabund;
5576                 rabund = input.getRAbundVector(lastLabel);
5577                 m->mothurOut(rabund->getLabel()+"\n");
5578 
5579                 processedLabels.insert(rabund->getLabel()); userLabels.erase(rabund->getLabel());
5580 
5581                 lastLabel = saveLabel;
5582 
5583                 return rabund;
5584             }
5585 
5586             lastLabel = rabund->getLabel();
5587             //prevent memory leak
5588             delete rabund;
5589 
5590             if (m->getControl_pressed()) {  return NULL; }
5591 
5592             //get next line to process
5593             rabund = input.getRAbundVector();
5594         }
5595 
5596         if (m->getControl_pressed()) { delete rabund;  return NULL; }
5597 
5598         //output error messages about any remaining user labels
5599         set<string>::iterator it;
5600         bool needToRun = false;
5601         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5602             m->mothurOut("Your file does not include the label " + *it);
5603             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5604             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5605         }
5606 
5607         //run last label if you need to
5608         if (needToRun )  {
5609             delete rabund;
5610             rabund = input.getRAbundVector(lastLabel);
5611             if (rabund != NULL) {
5612                 m->mothurOut(rabund->getLabel()+"\n");
5613                 processedLabels.insert(rabund->getLabel()); userLabels.erase(rabund->getLabel());
5614             }
5615             return rabund;
5616         }
5617 
5618         return rabund;
5619 
5620     }catch(exception& e) {
5621             m->errorOut(e, "Utils", "getNextRAbund");
5622             exit(1);
5623     }
5624 }
5625 /***********************************************************************/
getNextSAbund(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5626 SAbundVector* Utils::getNextSAbund(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5627     try {
5628 
5629         SAbundVector* sabund = input.getSAbundVector();
5630 
5631         //as long as you are not at the end of the file or done wih the lines you want
5632         while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5633 
5634             if (m->getControl_pressed()) {  delete sabund;  return NULL; }
5635 
5636             if (lastLabel == "") {  lastLabel = sabund->getLabel();  }
5637 
5638             if(allLines == 1 || userLabels.count(sabund->getLabel()) == 1){ //process all lines or this is a line we want
5639 
5640                 m->mothurOut(sabund->getLabel()+"\n");
5641 
5642                 processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel());
5643 
5644                 return sabund;
5645             }
5646 
5647             if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5648 
5649                 string saveLabel = sabund->getLabel();
5650 
5651                 delete sabund;
5652                 sabund = input.getSAbundVector(lastLabel);
5653                 m->mothurOut(sabund->getLabel()+"\n");
5654 
5655                 processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel());
5656 
5657                 lastLabel = saveLabel;
5658 
5659                 return sabund;
5660             }
5661 
5662             lastLabel = sabund->getLabel();
5663             //prevent memory leak
5664             delete sabund;
5665 
5666             if (m->getControl_pressed()) {  return NULL; }
5667 
5668             //get next line to process
5669             sabund = input.getSAbundVector();
5670         }
5671 
5672         if (m->getControl_pressed()) { delete sabund;  return NULL; }
5673 
5674         //output error messages about any remaining user labels
5675         set<string>::iterator it;
5676         bool needToRun = false;
5677         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5678             m->mothurOut("Your file does not include the label " + *it);
5679             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5680             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5681         }
5682 
5683         //run last label if you need to
5684         if (needToRun )  {
5685             delete sabund;
5686             sabund = input.getSAbundVector(lastLabel);
5687             if (sabund != NULL) {
5688                 m->mothurOut(sabund->getLabel()+"\n");
5689                 processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel());
5690             }
5691             return sabund;
5692         }
5693 
5694         return sabund;
5695 
5696     }catch(exception& e) {
5697             m->errorOut(e, "Utils", "getNextSAbund");
5698             exit(1);
5699     }
5700 }
5701 /***********************************************************************/
getNextOrder(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5702 OrderVector* Utils::getNextOrder(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5703     try {
5704 
5705         OrderVector* order = input.getOrderVector();
5706 
5707         //as long as you are not at the end of the file or done wih the lines you want
5708         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5709 
5710             if (m->getControl_pressed()) {  delete order;  return NULL; }
5711 
5712             if (lastLabel == "") {  lastLabel = order->getLabel();  }
5713 
5714             if(allLines == 1 || userLabels.count(order->getLabel()) == 1){ //process all lines or this is a line we want
5715 
5716                 m->mothurOut(order->getLabel()+"\n");
5717 
5718                 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5719 
5720                 return order;
5721             }
5722 
5723             if ((anyLabelsToProcess(order->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5724 
5725                 string saveLabel = order->getLabel();
5726 
5727                 delete order;
5728                 order = input.getOrderVector(lastLabel);
5729                 m->mothurOut(order->getLabel()+"\n");
5730 
5731                 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5732 
5733                 lastLabel = saveLabel;
5734 
5735                 return order;
5736             }
5737 
5738             lastLabel = order->getLabel();
5739             //prevent memory leak
5740             delete order;
5741 
5742             if (m->getControl_pressed()) {   return NULL; }
5743 
5744             //get next line to process
5745             order = input.getOrderVector();
5746         }
5747 
5748         if (m->getControl_pressed()) { delete order;  return NULL; }
5749 
5750         //output error messages about any remaining user labels
5751         set<string>::iterator it;
5752         bool needToRun = false;
5753         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5754             m->mothurOut("Your file does not include the label " + *it);
5755             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5756             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5757         }
5758 
5759         //run last label if you need to
5760         if (needToRun )  {
5761             delete order;
5762             order = input.getOrderVector(lastLabel);
5763             if (order != NULL) {
5764                 m->mothurOut(order->getLabel()+"\n");
5765                 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5766             }
5767             return order;
5768         }
5769 
5770         return order;
5771 
5772     }catch(exception& e) {
5773             m->errorOut(e, "Utils", "getNextOrder");
5774             exit(1);
5775     }
5776 }
5777 /***********************************************************************/
getNextSharedOrder(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5778 SharedOrderVector* Utils::getNextSharedOrder(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5779     try {
5780 
5781         SharedOrderVector* order = input.getSharedOrderVector();
5782 
5783         //as long as you are not at the end of the file or done wih the lines you want
5784         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5785 
5786             if (m->getControl_pressed()) {  delete order;  return NULL; }
5787 
5788             if (lastLabel == "") {  lastLabel = order->getLabel();  }
5789 
5790             if(allLines == 1 || userLabels.count(order->getLabel()) == 1){ //process all lines or this is a line we want
5791 
5792                 m->mothurOut(order->getLabel()+"\n");
5793 
5794                 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5795 
5796                 return order;
5797             }
5798 
5799             if ((anyLabelsToProcess(order->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5800 
5801                 string saveLabel = order->getLabel();
5802 
5803                 delete order;
5804                 order = input.getSharedOrderVector(lastLabel);
5805                 m->mothurOut(order->getLabel()+"\n");
5806 
5807                 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5808 
5809                 lastLabel = saveLabel;
5810 
5811                 return order;
5812             }
5813 
5814             lastLabel = order->getLabel();
5815             //prevent memory leak
5816             delete order;
5817 
5818             if (m->getControl_pressed()) {   return NULL; }
5819 
5820             //get next line to process
5821             order = input.getSharedOrderVector();
5822         }
5823 
5824         if (m->getControl_pressed()) { delete order;  return NULL; }
5825 
5826         //output error messages about any remaining user labels
5827         set<string>::iterator it;
5828         bool needToRun = false;
5829         for (it = userLabels.begin(); it != userLabels.end(); it++) {
5830             m->mothurOut("Your file does not include the label " + *it);
5831             if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5832             else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
5833         }
5834 
5835         //run last label if you need to
5836         if (needToRun )  {
5837             delete order;
5838             order = input.getSharedOrderVector(lastLabel);
5839             if (order != NULL) {
5840                 m->mothurOut(order->getLabel()+"\n");
5841                 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5842             }
5843             return order;
5844         }
5845 
5846         return order;
5847 
5848     }catch(exception& e) {
5849             m->errorOut(e, "Utils", "getNextSharedOrder");
5850             exit(1);
5851     }
5852 }
5853 /***********************************************************************/
5854 //this function determines if the user has given us labels that are smaller than the given label.
5855 //if so then it returns true so that the calling function can run the previous valid distance.
5856 //it's a "smart" distance function.  It also checks for invalid labels.
anyLabelsToProcess(string label,set<string> & userLabels,string errorOff)5857 bool Utils::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
5858     try {
5859 
5860         set<string>::iterator it;
5861         vector<float> orderFloat;
5862         map<string, float> userMap;  //the conversion process removes trailing 0's which we need to put back
5863         map<string, float>::iterator it2;
5864         float labelFloat;
5865         bool smaller = false;
5866 
5867         //unique is the smallest line
5868         if (label == "unique") {  return false;  }
5869         else {
5870             if (convertTestFloat(label, labelFloat)) {
5871                 convert(label, labelFloat);
5872             }else { //cant convert
5873                 return false;
5874             }
5875         }
5876 
5877         //go through users set and make them floats
5878         for(it = userLabels.begin(); it != userLabels.end();) {
5879 
5880             float temp;
5881             if ((*it != "unique") && (convertTestFloat(*it, temp) )){
5882                 convert(*it, temp);
5883                 orderFloat.push_back(temp);
5884                 userMap[*it] = temp;
5885                 it++;
5886             }else if (*it == "unique") {
5887                 orderFloat.push_back(-1.0);
5888                 userMap["unique"] = -1.0;
5889                 it++;
5890             }else {
5891                 if (errorOff == "") {  cout << (*it + " is not a valid label.\n");  }
5892                 userLabels.erase(it++);
5893             }
5894         }
5895 
5896         //sort order
5897         sort(orderFloat.begin(), orderFloat.end());
5898 
5899         /*************************************************/
5900         //is this label bigger than any of the users labels
5901         /*************************************************/
5902 
5903         //loop through order until you find a label greater than label
5904         for (int i = 0; i < orderFloat.size(); i++) {
5905             if (orderFloat[i] < labelFloat) {
5906                 smaller = true;
5907                 if (isEqual(orderFloat[i], -1)) {
5908                     if (errorOff == "") { cout << ("Your file does not include the label unique.\n"); }
5909                     userLabels.erase("unique");
5910                 }
5911                 else {
5912                     if (errorOff == "") { cout << ("Your file does not include the label. \n");  }
5913                     string s = "";
5914                     for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
5915                         if (isEqual(it2->second, orderFloat[i])) {
5916                             s = it2->first;
5917                             //remove small labels
5918                             userLabels.erase(s);
5919                             break;
5920                         }
5921                     }
5922                     if (errorOff == "") {cout << ( s +  ". I will use the next smallest distance. \n"); }
5923                 }
5924                 //since they are sorted once you find a bigger one stop looking
5925             }else { break; }
5926         }
5927 
5928         return smaller;
5929 
5930     }
5931     catch(exception& e) {
5932         m->errorOut(e, "Utils", "anyLabelsToProcess");
5933         exit(1);
5934     }
5935 }
5936 /**************************************************************************************************/
5937 // query = v2.15.2 minversion = v2.13.5
isVsearchVersionValid(string query,string minversion)5938 bool Utils::isVsearchVersionValid(string query, string minversion) {
5939     try {
5940 
5941         bool good = true;
5942 
5943         vector<string> versionVector;
5944         splitAtChar(minversion, versionVector, '.');
5945 
5946         //check file version
5947         vector<string> queryVector;
5948         splitAtChar(query, queryVector, '.');
5949 
5950         if (versionVector.size() != queryVector.size()) { good = false; }
5951         else if (versionVector.size() != 3) { good = false; }
5952         else {
5953             if (versionVector[0] != queryVector[0]) { good = false; return good; } //major version - v2 or v1
5954 
5955             //minor version - 13 or 15
5956             int queryNum, minVersionNum;
5957             convert(versionVector[1], minVersionNum);
5958             convert(queryVector[1], queryNum);
5959 
5960             //if query minor version is older (smaller) than minversion
5961             if (minVersionNum > queryNum) {  good = false; }
5962             else if (minVersionNum == queryNum) { //if major and minor versions match, check patches
5963 
5964                 //patch version
5965                 convert(versionVector[2], minVersionNum);
5966                 convert(queryVector[2], queryNum);
5967 
5968                 if (minVersionNum > queryNum) {  good = false; }
5969             }
5970         }
5971 
5972         return good;
5973     }
5974     catch(exception& e) {
5975         m->errorOut(e, "Utils", "checkReleaseVersion");
5976         exit(1);
5977     }
5978 }
5979 /**************************************************************************************************/
checkReleaseVersion(string line,string version)5980 bool Utils::checkReleaseVersion(string line, string version) {
5981     try {
5982 
5983         bool good = true;
5984 
5985         //before we added this check
5986         if (line[0] != '#') {  good = false;  }
5987         else {
5988             //rip off #
5989             line = line.substr(1);
5990 
5991             vector<string> versionVector;
5992             splitAtChar(version, versionVector, '.');
5993 
5994             //check file version
5995             vector<string> linesVector;
5996             splitAtChar(line, linesVector, '.');
5997 
5998             if (versionVector.size() != linesVector.size()) { good = false; }
5999             else {
6000                 for (int j = 0; j < versionVector.size(); j++) {
6001                     int num1, num2;
6002                     convert(versionVector[j], num1);
6003                     convert(linesVector[j], num2);
6004 
6005                     //if mothurs version is newer than this files version, then we want to remake it
6006                     if (num1 > num2) {  good = false; break;  }
6007                 }
6008             }
6009 
6010         }
6011         return good;
6012     }
6013     catch(exception& e) {
6014         m->errorOut(e, "Utils", "checkReleaseVersion");
6015         exit(1);
6016     }
6017 }
6018 /**************************************************************************************************/
getTimeStamp(string filename)6019 int Utils::getTimeStamp(string filename) {
6020     try {
6021         int timeStamp = 0;
6022 
6023 #if defined NON_WINDOWS
6024         struct stat st;
6025         int errorCode = stat (filename.c_str(), &st);
6026         if (errorCode != 0) {
6027             m->mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); m->setControl_pressed(true);
6028         }else {
6029             timeStamp = st.st_mtime;
6030         }
6031 #else
6032         HANDLE hFile;
6033 
6034         hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL,
6035                            OPEN_EXISTING, 0, NULL);
6036 
6037         if(hFile == INVALID_HANDLE_VALUE) {
6038             m->mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); m->setControl_pressed(true);
6039             CloseHandle(hFile); return timeStamp;
6040         }
6041 
6042         FILETIME ftCreate, ftAccess, ftWrite;
6043         SYSTEMTIME stUTC;
6044         DWORD dwRet;
6045 
6046         // Retrieve the file times for the file.
6047         bool success = GetFileTime(hFile, &ftCreate, &ftAccess, &ftWrite);
6048 
6049         if (success) {
6050             FileTimeToSystemTime(&ftWrite, &stUTC);
6051 
6052             tm time;
6053             time.tm_sec = stUTC.wSecond;
6054             time.tm_min = stUTC.wMinute;
6055             time.tm_hour = stUTC.wHour;
6056             time.tm_mday = stUTC.wDay;
6057             time.tm_mon = stUTC.wMonth - 1;
6058             time.tm_year = stUTC.wYear - 1900;
6059             time.tm_isdst = -1;
6060             time_t t = mktime(&time);
6061 
6062             timeStamp = t;
6063         }
6064         else { m->mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); m->setControl_pressed(true); }
6065         CloseHandle(hFile);
6066 #endif
6067 
6068         return timeStamp;
6069     }
6070     catch(exception& e) {
6071         m->errorOut(e, "Utils", "getTimeStamp");
6072         exit(1);
6073     }
6074 }
6075 /**************************************************************************************************/
6076 //Referenced - https://genome.sph.umich.edu/w/images/d/d5/Biostat615-Fall2011-lecture03-handout.pdf
geometricMean(vector<float> & abunds,double zeroReplacementValue)6077 double Utils::geometricMean(vector<float>& abunds, double zeroReplacementValue) {
6078     try{
6079         double sum = 0;
6080         for (int j = 0; j < abunds.size(); j++) {
6081             if (isEqual(abunds[j], 0)) { abunds[j] += zeroReplacementValue; }
6082             sum += log(abunds[j]);
6083         }
6084         sum /= abunds.size();
6085         sum = exp(sum);
6086 
6087         return sum;
6088     }
6089     catch(exception& e) {
6090         m->errorOut(e, "Utils", "geometricMean");
6091         exit(1);
6092     }
6093 }
6094 /**************************************************************************************************/
getAverages(vector<vector<double>> & dists)6095 vector<double> Utils::getAverages(vector< vector<double> >& dists) {
6096     try{
6097         vector<double> averages; //averages.resize(numComp, 0.0);
6098         for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
6099 
6100         for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
6101             for (int i = 0; i < dists[thisIter].size(); i++) {
6102                 averages[i] += dists[thisIter][i];
6103             }
6104         }
6105 
6106         //finds average.
6107         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (double) dists.size(); }
6108 
6109         return averages;
6110     }
6111     catch(exception& e) {
6112         m->errorOut(e, "Utils", "getAverages");
6113         exit(1);
6114     }
6115 }
6116 /**************************************************************************************************/
getAverage(vector<double> dists)6117 double Utils::getAverage(vector<double> dists) {
6118     try{
6119         double average = 0;
6120 
6121         for (int i = 0; i < dists.size(); i++) {
6122             average += dists[i];
6123         }
6124 
6125         //finds average.
6126         average /= (double) dists.size();
6127 
6128         return average;
6129     }
6130     catch(exception& e) {
6131         m->errorOut(e, "Utils", "getAverage");
6132         exit(1);
6133     }
6134 }
6135 
6136 /**************************************************************************************************/
getStandardDeviation(vector<vector<double>> & dists)6137 vector<double> Utils::getStandardDeviation(vector< vector<double> >& dists) {
6138     try{
6139 
6140         vector<double> averages = getAverages(dists);
6141 
6142         //find standard deviation
6143         vector<double> stdDev; //stdDev.resize(numComp, 0.0);
6144         for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
6145 
6146         for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6147             for (int j = 0; j < dists[thisIter].size(); j++) {
6148                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
6149             }
6150         }
6151         for (int i = 0; i < stdDev.size(); i++) {
6152             stdDev[i] /= (double) dists.size();
6153             stdDev[i] = sqrt(stdDev[i]);
6154         }
6155 
6156         return stdDev;
6157     }
6158     catch(exception& e) {
6159         m->errorOut(e, "Utils", "getAverages");
6160         exit(1);
6161     }
6162 }
6163 /**************************************************************************************************/
getStandardDeviation(vector<vector<double>> & dists,vector<double> & averages)6164 vector<double> Utils::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
6165     try{
6166         //find standard deviation
6167         vector<double> stdDev; //stdDev.resize(numComp, 0.0);
6168         for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
6169 
6170         for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6171             for (int j = 0; j < dists[thisIter].size(); j++) {
6172                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
6173             }
6174         }
6175         for (int i = 0; i < stdDev.size(); i++) {
6176             stdDev[i] /= (double) dists.size();
6177             stdDev[i] = sqrt(stdDev[i]);
6178         }
6179 
6180         return stdDev;
6181     }
6182     catch(exception& e) {
6183         m->errorOut(e, "Utils", "getStandardDeviation");
6184         exit(1);
6185     }
6186 }
6187 /**************************************************************************************************/
getAverages(vector<vector<vector<seqDist>>> & calcDistsTotals,string mode)6188 vector< vector<seqDist> > Utils::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
6189     try{
6190 
6191         vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
6192         for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
6193             //calcAverages[i].resize(calcDistsTotals[0][i].size());
6194             vector<seqDist> temp;
6195             for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6196                 seqDist tempDist;
6197                 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6198                 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6199                 tempDist.dist = 0.0;
6200                 temp.push_back(tempDist);
6201             }
6202             calcAverages.push_back(temp);
6203         }
6204 
6205         if (mode == "average") {
6206             for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
6207                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
6208                     for (int j = 0; j < calcAverages[i].size(); j++) {
6209                         calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
6210                     }
6211                 }
6212             }
6213 
6214             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
6215                 for (int j = 0; j < calcAverages[i].size(); j++) {
6216                     calcAverages[i][j].dist /= (float) calcDistsTotals.size();
6217                 }
6218             }
6219         }else { //find median
6220             for (int i = 0; i < calcAverages.size(); i++) { //for each calc
6221                 for (int j = 0; j < calcAverages[i].size(); j++) {  //for each comparison
6222                     vector<double> dists;
6223                     for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
6224                         dists.push_back(calcDistsTotals[thisIter][i][j].dist);
6225                     }
6226                     sort(dists.begin(), dists.end());
6227                     calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
6228                 }
6229             }
6230         }
6231 
6232         return calcAverages;
6233     }
6234     catch(exception& e) {
6235         m->errorOut(e, "Utils", "getAverages");
6236         exit(1);
6237     }
6238 }
6239 /**************************************************************************************************/
getAverages(vector<vector<vector<seqDist>>> & calcDistsTotals)6240 vector< vector<seqDist> > Utils::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
6241     try{
6242 
6243         vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
6244         for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
6245             //calcAverages[i].resize(calcDistsTotals[0][i].size());
6246             vector<seqDist> temp;
6247             for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6248                 seqDist tempDist;
6249                 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6250                 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6251                 tempDist.dist = 0.0;
6252                 temp.push_back(tempDist);
6253             }
6254             calcAverages.push_back(temp);
6255         }
6256 
6257 
6258         for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
6259             for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
6260                 for (int j = 0; j < calcAverages[i].size(); j++) {
6261                     calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
6262                 }
6263             }
6264         }
6265 
6266         for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
6267             for (int j = 0; j < calcAverages[i].size(); j++) {
6268                 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
6269             }
6270         }
6271 
6272         return calcAverages;
6273     }
6274     catch(exception& e) {
6275         m->errorOut(e, "Utils", "getAverages");
6276         exit(1);
6277     }
6278 }
6279 /**************************************************************************************************/
getStandardDeviation(vector<vector<vector<seqDist>>> & calcDistsTotals)6280 vector< vector<seqDist> > Utils::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
6281     try{
6282 
6283         vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
6284 
6285         //find standard deviation
6286         vector< vector<seqDist>  > stdDev;
6287         for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
6288             vector<seqDist> temp;
6289             for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6290                 seqDist tempDist;
6291                 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6292                 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6293                 tempDist.dist = 0.0;
6294                 temp.push_back(tempDist);
6295             }
6296             stdDev.push_back(temp);
6297         }
6298 
6299         for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6300             for (int i = 0; i < stdDev.size(); i++) {
6301                 for (int j = 0; j < stdDev[i].size(); j++) {
6302                     stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
6303                 }
6304             }
6305         }
6306 
6307         for (int i = 0; i < stdDev.size(); i++) {  //finds average.
6308             for (int j = 0; j < stdDev[i].size(); j++) {
6309                 stdDev[i][j].dist /= (float) calcDistsTotals.size();
6310                 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
6311             }
6312         }
6313 
6314         return stdDev;
6315     }
6316     catch(exception& e) {
6317         m->errorOut(e, "Utils", "getAverages");
6318         exit(1);
6319     }
6320 }
6321 /**************************************************************************************************/
getStandardDeviation(vector<vector<vector<seqDist>>> & calcDistsTotals,vector<vector<seqDist>> & calcAverages)6322 vector< vector<seqDist> > Utils::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
6323     try{
6324         //find standard deviation
6325         vector< vector<seqDist>  > stdDev;
6326         for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
6327             vector<seqDist> temp;
6328             for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6329                 seqDist tempDist;
6330                 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6331                 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6332                 tempDist.dist = 0.0;
6333                 temp.push_back(tempDist);
6334             }
6335             stdDev.push_back(temp);
6336         }
6337 
6338         for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6339             for (int i = 0; i < stdDev.size(); i++) {
6340                 for (int j = 0; j < stdDev[i].size(); j++) {
6341                     stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
6342                 }
6343             }
6344         }
6345 
6346         for (int i = 0; i < stdDev.size(); i++) {  //finds average.
6347             for (int j = 0; j < stdDev[i].size(); j++) {
6348                 stdDev[i][j].dist /= (float) calcDistsTotals.size();
6349                 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
6350             }
6351         }
6352 
6353         return stdDev;
6354     }
6355     catch(exception& e) {
6356         m->errorOut(e, "Utils", "getAverages");
6357         exit(1);
6358     }
6359 }
6360 
6361 /**************************************************************************************************/
isContainingOnlyDigits(string input)6362 bool Utils::isContainingOnlyDigits(string input) {
6363     try{
6364 
6365         //are you a digit in ascii code
6366         for (int i = 0;i < input.length(); i++){
6367             if( input[i]>47 && input[i]<58){}
6368             else { return false; }
6369         }
6370 
6371         return true;
6372     }
6373     catch(exception& e) {
6374         m->errorOut(e, "Utils", "isContainingOnlyDigits");
6375         exit(1);
6376     }
6377 }
6378 /**************************************************************************************************/
6379 /*M02352_41_000000000-AT06G_1_2104_18738_21630 Eukaryota(100);Archaeplastida(100);Chloroplastida(100);Chlorophyta(100);Mamiellophyceae(100);Mamiellales(100);Ostreococcus(100);Ostreococcus tauri(100);
6380 
6381  When I run remove.lineage with:
6382  taxon=Chloroplast-Mitochondria-unknown-Bacteria-Archaea-Metazoa-Charophyta
6383 
6384  The word "Chloroplast" in the taxon string gets matched to the lineage Chloroplastida in the taxonomy (above) and wipes out all of the green algae.*/
6385 
findTaxon(vector<Taxon> tax,vector<Taxon> stax)6386 bool Utils::findTaxon(vector<Taxon> tax, vector<Taxon> stax) {
6387     try {
6388         removeQuotes(tax); removeQuotes(stax);
6389 
6390         //looking to find something like "unknown" or "Proteobacteria"
6391         if (stax.size() == 1) {
6392             string searchTax = stax[0].name;
6393             auto it = find_if(tax.begin(), tax.end(), [&searchTax](const Taxon& obj) { return obj.name == searchTax;});
6394 
6395             if (it != tax.end()) { return true; }
6396             else { return false; }
6397 
6398         }else { //looking to find something like "Bacteria;Proteobacteria;Alphaproteobacteria;Rickettsiales;Anaplasmataceae;Wolbachia;"
6399 
6400             if (stax.size() > tax.size()) { return false; } //we are looking for a more specific taxonomy, not a match
6401             else {
6402                 for (int i = 0; i < stax.size(); i++) {
6403                     if (stax[i].name != tax[i].name) { return false; }
6404                 }
6405                 return true;
6406             }
6407         }
6408 
6409         return false;
6410     }
6411     catch(exception& e) {
6412         m->errorOut(e, "Utils", "findTaxon");
6413         exit(1);
6414     }
6415 }
6416 /**************************************************************************************************/
searchTax(vector<Taxon> userTaxons,vector<bool> taxonsHasConfidence,vector<vector<Taxon>> searchTaxons)6417 bool Utils::searchTax(vector<Taxon> userTaxons, vector<bool> taxonsHasConfidence, vector< vector<Taxon> > searchTaxons) {
6418     try {
6419         bool userDataHasConfidence = hasConfidenceScore(userTaxons);
6420 
6421         for (int j = 0; j < searchTaxons.size(); j++) {
6422 
6423             bool foundTaxonMatch = findTaxon(userTaxons, searchTaxons[j]);
6424 
6425             if (foundTaxonMatch) {
6426                 //searchTaxon or user taxons don't include confidence scores so ingnore them
6427                 if (!taxonsHasConfidence[j] || !userDataHasConfidence) {
6428                     return true;  //since you belong to at least one of the taxons we want you are included so no need to search for other
6429                 }else {
6430                     bool good = true;
6431 
6432                     //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
6433                     //we want to "line them up", so we will find the the index where the searchstring starts
6434                     int index = 0;
6435                     for (int i = 0; i < userTaxons.size(); i++) {
6436 
6437                         if (userTaxons[i].name == searchTaxons[j][0].name) {
6438                             index = i;
6439                             int spot = 0;
6440                             bool goodspot = true;
6441                             //is this really the start, or are we dealing with a taxon of the same name?
6442                             while ((spot < searchTaxons[j].size()) && ((i+spot) < userTaxons.size())) {
6443                                 if (userTaxons[i+spot].name != searchTaxons[j][spot].name) { goodspot = false; break; }
6444                                 else { spot++; }
6445                             }
6446 
6447                             if (goodspot) { break; }
6448                         }
6449                     }
6450 
6451                     for (int i = 0; i < searchTaxons[j].size(); i++) {
6452 
6453                         if ((i+index) < userTaxons.size()) { //just in case, should never be false
6454                             if (userTaxons[i+index].confidence < searchTaxons[j][i].confidence) { //is the users cutoff less than the search taxons
6455                                 good = false;
6456                                 break;
6457                             }
6458                         }else { good = false; break; }
6459                     }
6460 
6461                     //passed the test so add you
6462                     if (good) { return true; }
6463                 }
6464             }
6465         }
6466 
6467         return false;
6468     }
6469     catch(exception& e) {
6470         m->errorOut(e, "Utils", "searchTax");
6471         exit(1);
6472     }
6473 }
6474 
6475 /**************************************************************************************************/
getTaxons(string tax,bool & hasConfidence)6476 vector<Taxon> Utils::getTaxons(string tax, bool& hasConfidence) {
6477     try {
6478 
6479         vector<Taxon> t;
6480         string taxon = "";
6481         int taxLength = tax.length();
6482 
6483         for(int i=0;i<taxLength;i++){
6484             if(tax[i] == ';'){
6485                 string newtaxon = taxon; float confidence = 0;
6486                 hasConfidence = hasConfidenceScore(newtaxon, confidence);
6487 
6488                 Taxon temp(newtaxon, confidence); t.push_back(temp);
6489                 taxon = "";
6490             }
6491             else{ taxon += tax[i]; }
6492         }
6493 
6494         if (taxon != "") {
6495             float confidence = 0;
6496             hasConfidence = hasConfidenceScore(taxon, confidence);
6497 
6498             Taxon temp(taxon, confidence); t.push_back(temp);
6499         }
6500 
6501         return t;
6502     }
6503     catch(exception& e) {
6504         m->errorOut(e, "Utils", "getTaxons");
6505         exit(1);
6506     }
6507 }
6508 /**************************************************************************************************/
hasConfidenceScore(vector<Taxon> taxons)6509 bool Utils::hasConfidenceScore(vector<Taxon> taxons) {
6510     try {
6511 
6512         for (int i = 0; i < taxons.size(); i++) {
6513             if (m->getControl_pressed()) { break; }
6514 
6515             if (taxons[i].confidence > 0) { return true; }
6516         }
6517 
6518         return false;
6519     }
6520     catch(exception& e) {
6521         m->errorOut(e, "Utils", "hasConfidenceScore");
6522         exit(1);
6523     }
6524 }
6525 /**************************************************************************************************/
hasConfidenceScore(string & taxon,float & confidence)6526 bool Utils::hasConfidenceScore(string& taxon, float& confidence) {
6527     try {
6528         int openParen = taxon.find_last_of('(');
6529         int closeParen = taxon.find_last_of(')');
6530 
6531         if ((openParen != string::npos) && (closeParen != string::npos)) {
6532             string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
6533             if (isPositiveNumeric(confidenceScore)) {  //its a confidence
6534                 taxon = taxon.substr(0, openParen); //rip off confidence
6535                 mothurConvert(confidenceScore, confidence);
6536                 return true;
6537             }else {
6538                 confidence = 0; //its part of the taxon
6539             }
6540         }else{ confidence = 0;  }
6541 
6542         return false;
6543     }
6544     catch(exception& e) {
6545         m->errorOut(e, "Utils", "hasConfidenceScore");
6546         exit(1);
6547     }
6548 }
6549 /**************************************************************************************************/
removeConfidences(string & tax)6550 float Utils::removeConfidences(string& tax) {
6551     try {
6552         string temp = tax; float dummy; if (!hasConfidenceScore(temp, dummy)) { return 0; }
6553 
6554         string taxon;
6555         string newTax = "";
6556         string confidenceScore = "0";
6557 
6558         //remove last ";"
6559         if (tax.length() > 1) { tax = tax.substr(0, tax.length()-1); }
6560         vector<string> taxons; splitAtChar(tax, taxons, ';');
6561 
6562         for (int i = 0; i < taxons.size(); i++) {
6563 
6564             if (m->getControl_pressed()) { return 0; }
6565 
6566             taxon = taxons[i];
6567 
6568             int pos = taxon.find_last_of('(');
6569             if (pos != -1) {
6570                 //is it a number?
6571                 int pos2 = taxon.find_last_of(')');
6572                 if (pos2 != -1) {
6573                     string temp = taxon.substr(pos+1, (pos2-(pos+1)));
6574                     if (isPositiveNumeric(temp)) {
6575                         taxon = taxon.substr(0, pos); //rip off confidence
6576                         confidenceScore = temp;
6577                     }
6578                 }
6579             }
6580             taxon += ";";
6581 
6582             newTax += taxon;
6583         }
6584 
6585         tax = newTax;
6586 
6587         float confidence = 0; mothurConvert(confidenceScore, confidence);
6588 
6589         return confidence;
6590     }
6591     catch(exception& e) {
6592         m->errorOut(e, "Utils", "removeConfidences");
6593         exit(1);
6594     }
6595 }
6596 /**************************************************************************************************/
removeQuotes(vector<Taxon> & tax)6597 void Utils::removeQuotes(vector<Taxon>& tax) {
6598     try {
6599 
6600         string taxon;
6601         string newTax = "";
6602 
6603         for (int i = 0; i < tax.size(); i++) {
6604 
6605             if (m->getControl_pressed()) { return; }
6606 
6607             tax[i].name = removeQuotes(tax[i].name);
6608         }
6609 
6610         return;
6611     }
6612     catch(exception& e) {
6613         m->errorOut(e, "Utils", "removeQuotes");
6614         exit(1);
6615     }
6616 }
6617 /**************************************************************************************************/
removeQuotes(string tax)6618 string Utils::removeQuotes(string tax) {
6619     try {
6620 
6621         string taxon;
6622         string newTax = "";
6623 
6624         for (int i = 0; i < tax.length(); i++) {
6625 
6626             if (m->getControl_pressed()) { return newTax; }
6627 
6628             if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
6629 
6630         }
6631 
6632         return newTax;
6633     }
6634     catch(exception& e) {
6635         m->errorOut(e, "Utils", "removeQuotes");
6636         exit(1);
6637     }
6638 }
6639 /**************************************************************************************************/
6640 // function for calculating standard deviation
getStandardDeviation(vector<int> & featureVector)6641 double Utils::getStandardDeviation(vector<int>& featureVector){
6642     try {
6643         //finds sum
6644         double average = 0;
6645         for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
6646         average /= (double) featureVector.size();
6647 
6648         //find standard deviation
6649         double stdDev = 0;
6650         for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
6651             stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
6652         }
6653 
6654         stdDev /= (double) featureVector.size();
6655         stdDev = sqrt(stdDev);
6656 
6657         return stdDev;
6658     }
6659     catch(exception& e) {
6660         m->errorOut(e, "Utils", "getStandardDeviation");
6661         exit(1);
6662     }
6663 }
6664 /*****************************************************************/
6665 //this code is a mess and should be rethought...-slw
parseTreeFile(string filename)6666 vector<string> Utils::parseTreeFile(string filename) {
6667 
6668     //only takes names from the first tree and assumes that all trees use the same names.
6669     try {
6670         //string filename = current->getTreeFile();
6671         ifstream filehandle;
6672         Utils util; util.openInputFile(filename, filehandle);
6673         int comment;
6674         char c;
6675         comment = 0;
6676 
6677         vector<string> Treenames;
6678         if((c = filehandle.peek()) != '#') {  //ifyou are not a nexus file
6679 
6680             while ((c = filehandle.peek()) != ';') {
6681                 if (m->getControl_pressed()) {  filehandle.close(); return Treenames; }
6682                 // get past comments
6683                 if(c == '[')    { comment = 1; }
6684                 if(c == ']')    { comment = 0; }
6685                 if((c == '(') && (comment != 1)){ break; }
6686                 filehandle.get();
6687             }
6688 
6689             Treenames = readTreeString(filehandle);
6690 
6691         }else if((c = filehandle.peek()) == '#') { //ifyou are a nexus file
6692             string holder = "";
6693 
6694             // get past comments
6695             while(holder != "translate" && holder != "Translate"){
6696                 if (m->getControl_pressed()) {  filehandle.close(); return Treenames; }
6697                 if(holder == "[" || holder == "[!") { comment = 1; }
6698                 if(holder == "]")                   { comment = 0; }
6699                 filehandle >> holder;
6700 
6701                 //if there is no translate then you must read tree string otherwise use translate to get names
6702                 if((holder == "tree") && (comment != 1)){
6703                     //pass over the "tree rep.6878900 = "
6704                     while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
6705 
6706                     if(c == EOF) { break; }
6707                     filehandle.putback(c);  //put back first ( of tree.
6708                     Treenames = readTreeString(filehandle);
6709 
6710                     break;
6711                 }
6712 
6713                 if (filehandle.eof()) { break; }
6714             }
6715 
6716             //use nexus translation rather than parsing tree to save time
6717             if((holder == "translate") || (holder == "Translate")) {
6718 
6719                 string number, name, h;
6720                 h = ""; // so it enters the loop the first time
6721                 while((h != ";") && (number != ";")) {
6722                     if (m->getControl_pressed()) {  filehandle.close(); return Treenames; }
6723                     filehandle >> number;
6724                     filehandle >> name;
6725 
6726                     //c = , until done with translation then c = ;
6727                     h = name.substr(name.length()-1, name.length());
6728                     name.erase(name.end()-1);  //erase the comma
6729                     Treenames.push_back(number);
6730                 }
6731                 if(number == ";") { Treenames.pop_back(); }  //in case ';' from translation is on next line instead of next to last name
6732             }
6733         }
6734         filehandle.close();
6735 
6736         return Treenames;
6737     }
6738     catch(exception& e) {
6739         m->errorOut(e, "Utils", "parseTreeFile");
6740         exit(1);
6741     }
6742 }
6743 /*******************************************************/
readTreeString(ifstream & filehandle)6744 vector<string> Utils::readTreeString(ifstream& filehandle)	{
6745     try {
6746         char c;
6747         string name;  //, k
6748         vector<string> Treenames;
6749 
6750         while((c = filehandle.peek()) != ';') {
6751             if (m->getControl_pressed()) {  return Treenames; }
6752 
6753             if(c == ')')  {
6754                 //to pass over labels in trees
6755                 c=filehandle.get();
6756                 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
6757                 filehandle.putback(c);
6758             }
6759             if(c == ';') { return Treenames; }
6760             if(c == -1) { return Treenames; }
6761             //if you are a name
6762             if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
6763                 name = "";
6764                 c = filehandle.get();
6765                 while ((c != '(') && (c != ')') && (c != ',') && (c != ':')  && (c != '\n') && (c != 32) && (c != '\t')) {
6766                     name += c;
6767                     c = filehandle.get();
6768                 }
6769 
6770                 if (name != "\r" ) { Treenames.push_back(name);   }
6771                 filehandle.putback(c);
6772             }
6773 
6774             if(c  == ':') { //read until you reach the end of the branch length
6775                 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { c = filehandle.get(); }
6776                 filehandle.putback(c);
6777             }
6778             c = filehandle.get();
6779             if(c == ';') { return Treenames; }
6780             if(c == ')') { filehandle.putback(c); }
6781             if (filehandle.eof()) { break; }
6782         }
6783         return Treenames;
6784     }
6785     catch(exception& e) {
6786         m->errorOut(e, "Utils", "readTreeString");
6787         exit(1);
6788     }
6789 }
6790 /*********************************************************************************************/
6791