1 //
2 // utils.cpp
3 // Mothur
4 //
5 // Created by Sarah Westcott on 11/13/17.
6 // Copyright © 2017 Schloss Lab. All rights reserved.
7 //
8
9 #include "utils.hpp"
10 #include "ordervector.hpp"
11 #include "sharedordervector.h"
12 #include "phylotree.h"
13 #include "taxonomy.hpp"
14 #include "inputdata.h"
15 #include "sharedclrvectors.hpp"
16 #include "sharedrabundfloatvectors.hpp"
17
18 /***********************************************************************/
getLabelTag(string label)19 string getLabelTag(string label){
20
21 string tag = "";
22
23 //remove OTU or phylo tag
24 string newLabel1 = "";
25 for (int i = 0; i < label.length(); i++) {
26 if(label[i]>47 && label[i]<58) { //is a digit
27 }else { tag += label[i]; }
28 }
29
30 return tag;
31 }
32 /***********************************************************************/
Utils()33 Utils::Utils(){
34 try {
35
36 m = MothurOut::getInstance(); modifyNames = m->getChangedSeqNames();
37 long long s = m->getRandomSeed();
38 mersenne_twister_engine.seed(s); srand(s);
39 homePath = m->getHomePath(); currentWorkingDirectory = "";
40 paths = m->getPaths();
41 }
42 catch(exception& e) {
43 m->errorOut(e, "Utils", "mothurRandomShuffle");
44 exit(1);
45 }
46 }
47 /***********************************************************************/
randomUniform()48 float Utils::randomUniform() {
49 try {
50 uniform_real_distribution<float> unif;
51 return (unif(mersenne_twister_engine));
52 }
53 catch(exception& e) {
54 m->errorOut(e, "Utils", "randomUniform");
55 exit(1);
56 }
57 }
58 /***********************************************************************/
randomExp()59 float Utils::randomExp() {
60 try {
61 exponential_distribution<float> unif;
62 return (unif(mersenne_twister_engine));
63 }
64 catch(exception& e) {
65 m->errorOut(e, "Utils", "randomExp");
66 exit(1);
67 }
68 }
69 /***********************************************************************/
randomNorm()70 float Utils::randomNorm() {
71 try {
72 normal_distribution<float> unif;
73 return (unif(mersenne_twister_engine));
74 }
75 catch(exception& e) {
76 m->errorOut(e, "Utils", "randomNorm");
77 exit(1);
78 }
79 }
80 /***********************************************************************/
randomGamma(float range)81 float Utils::randomGamma(float range) {
82 try {
83 gamma_distribution<float> unif(range, range);
84 return (unif(mersenne_twister_engine));
85 }
86 catch(exception& e) {
87 m->errorOut(e, "Utils", "randomGamma");
88 exit(1);
89 }
90 }
91 /***********************************************************************/
randomDirichlet(vector<float> alphas)92 vector<float> Utils::randomDirichlet(vector<float> alphas) {
93 try {
94 int nAlphas = (int)alphas.size();
95 vector<float> dirs(nAlphas, 0);
96
97 float sum = 0.0000;
98 for(int i=0;i<nAlphas;i++){
99 dirs[i] = randomGamma(alphas[i]);
100 while(isinf(dirs[i])) { dirs[i] = randomGamma(alphas[i]); }
101 sum += dirs[i];
102 }
103
104 for(int i=0;i<nAlphas;i++){ dirs[i] /= sum; }
105
106 return dirs;
107 }
108 catch(exception& e) {
109 m->errorOut(e, "Utils", "randomDirichlet");
110 exit(1);
111 }
112 }
113 /***********************************************************************/
mothurRandomShuffle(vector<int> & randomize)114 void Utils::mothurRandomShuffle(vector<int>& randomize){
115 try {
116 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
117 }
118 catch(exception& e) {
119 m->errorOut(e, "Utils", "mothurRandomShuffle");
120 exit(1);
121 }
122
123 }
124 /***********************************************************************/
mothurRandomShuffle(vector<weightedSeq> & randomize)125 void Utils::mothurRandomShuffle(vector<weightedSeq>& randomize){
126 try {
127 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
128 }
129 catch(exception& e) {
130 m->errorOut(e, "Utils", "mothurRandomShuffle");
131 exit(1);
132 }
133
134 }
135 /***********************************************************************/
mothurRandomShuffle(vector<long long> & randomize)136 void Utils::mothurRandomShuffle(vector<long long>& randomize){
137 try {
138 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
139 }
140 catch(exception& e) {
141 m->errorOut(e, "Utils", "mothurRandomShuffle");
142 exit(1);
143 }
144
145 }
146 /***********************************************************************/
mothurRandomShuffle(OrderVector & randomize)147 void Utils::mothurRandomShuffle(OrderVector& randomize){
148 try {
149 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
150 }
151 catch(exception& e) {
152 m->errorOut(e, "Utils", "mothurRandomShuffle");
153 exit(1);
154 }
155
156 }
157 /***********************************************************************/
mothurRandomShuffle(vector<SharedRAbundVector * > & randomize)158 void Utils::mothurRandomShuffle(vector<SharedRAbundVector*>& randomize){
159 try {
160 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
161 }
162 catch(exception& e) {
163 m->errorOut(e, "Utils", "mothurRandomShuffle");
164 exit(1);
165 }
166
167 }
168 /***********************************************************************/
mothurRandomShuffle(SharedOrderVector & randomize)169 void Utils::mothurRandomShuffle(SharedOrderVector& randomize){
170 try {
171 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
172 }
173 catch(exception& e) {
174 m->errorOut(e, "Utils", "mothurRandomShuffle");
175 exit(1);
176 }
177
178 }
179 /***********************************************************************/
mothurRandomShuffle(vector<string> & randomize)180 void Utils::mothurRandomShuffle(vector<string>& randomize){
181 try {
182 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
183 }
184 catch(exception& e) {
185 m->errorOut(e, "Utils", "mothurRandomShuffle");
186 exit(1);
187 }
188
189 }
190 /***********************************************************************/
mothurRandomShuffle(vector<item> & randomize)191 void Utils::mothurRandomShuffle(vector<item>& randomize){
192 try {
193 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
194 }
195 catch(exception& e) {
196 m->errorOut(e, "Utils", "mothurRandomShuffle");
197 exit(1);
198 }
199
200 }
201 /***********************************************************************/
mothurRandomShuffle(vector<PCell * > & randomize)202 void Utils::mothurRandomShuffle(vector<PCell*>& randomize){
203 try {
204 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
205 }
206 catch(exception& e) {
207 m->errorOut(e, "Utils", "mothurRandomShuffle");
208 exit(1);
209 }
210
211 }
212 /***********************************************************************/
mothurRandomShuffle(vector<colDist> & randomize)213 void Utils::mothurRandomShuffle(vector<colDist>& randomize){
214 try {
215 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
216 }
217 catch(exception& e) {
218 m->errorOut(e, "Utils", "mothurRandomShuffle");
219 exit(1);
220 }
221
222 }
223 /***********************************************************************/
mothurRandomShuffle(vector<PDistCellMin> & randomize)224 void Utils::mothurRandomShuffle(vector<PDistCellMin>& randomize){
225 try {
226 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
227 }
228 catch(exception& e) {
229 m->errorOut(e, "Utils", "mothurRandomShuffle");
230 exit(1);
231 }
232
233 }
234 /***********************************************************************/
mothurRandomShuffle(vector<vector<double>> & randomize)235 void Utils::mothurRandomShuffle(vector< vector<double> >& randomize){
236 try {
237 shuffle (randomize.begin(), randomize.end(), mersenne_twister_engine);
238 }
239 catch(exception& e) {
240 m->errorOut(e, "Utils", "mothurRandomShuffle");
241 exit(1);
242 }
243
244 }
245 /***********************************************************************/
getRandomIndex(long long highest)246 long long Utils::getRandomIndex(long long highest){
247 try {
248
249 if (highest == 0) { return 0; }
250
251 uniform_int_distribution<long long> dis(0, highest);
252
253 long long random = dis(mersenne_twister_engine);
254
255 return random;
256 }
257 catch(exception& e) {
258 m->errorOut(e, "Utils", "getRandomIndex");
259 exit(1);
260 }
261 }
262 /***********************************************************************/
263
getRandomIndex(int highest)264 int Utils::getRandomIndex(int highest){
265 try {
266 if (highest == 0) { return 0; }
267
268 uniform_int_distribution<int> dis(0, highest);
269 int random = dis(mersenne_twister_engine);
270 return random;
271 }
272 catch(exception& e) {
273 m->errorOut(e, "Utils", "getRandomIndex");
274 exit(1);
275 }
276
277 }
278 /***********************************************************************/
getRandomNumber()279 int Utils::getRandomNumber(){
280 try {
281 uniform_int_distribution<int> dis;
282
283 int random = dis(mersenne_twister_engine);
284
285 return random;
286 }
287 catch(exception& e) {
288 m->errorOut(e, "Utils", "getRandomNumber");
289 exit(1);
290 }
291
292 }
293 /***********************************************************************/
getRandomDouble0to1()294 double Utils::getRandomDouble0to1(){
295 try {
296 uniform_real_distribution<double> dis(0, 1);
297
298 double random = dis(mersenne_twister_engine);
299
300 return random;
301 }
302 catch(exception& e) {
303 m->errorOut(e, "Utils", "getRandomNumber");
304 exit(1);
305 }
306
307 }
308
309 /*********************************************************************************************/
getRAMUsed()310 double Utils::getRAMUsed() {
311 try {
312
313 #if defined (__APPLE__) || (__MACH__)
314 /* Mac: ru_maxrss gives the size in bytes */
315 struct rusage r_usage;
316 getrusage(RUSAGE_SELF, & r_usage);
317
318 return r_usage.ru_maxrss;
319 #elif (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
320 /* Linux: ru_maxrss gives the size in kilobytes */
321 struct rusage r_usage;
322 getrusage(RUSAGE_SELF, & r_usage);
323 return r_usage.ru_maxrss * 1024;
324 #else
325 MEMORYSTATUSEX status;
326 status.dwLength = sizeof(status);
327 GlobalMemoryStatusEx(&status);
328 return (size_t)(status.ullTotalPhys - status.ullAvailPhys);
329 #endif
330 }
331 catch(exception& e) {
332 m->errorOut(e, "Utils", "getMemoryUsed");
333 exit(1);
334 }
335 }
336 /*********************************************************************************************/
getTotalRAM()337 double Utils::getTotalRAM() {
338 try {
339
340 #if defined NON_WINDOWS
341 #if defined _SC_PHYS_PAGES && defined _SC_PAGESIZE
342 /* This works on linux-gnu, solaris2 and cygwin. */
343 double pages = sysconf (_SC_PHYS_PAGES);
344 double pagesize = sysconf (_SC_PAGESIZE);
345 if (0 <= pages && 0 <= pagesize)
346 return pages * pagesize;
347 #else
348 m->mothurOut("[WARNING]: Cannot determine amount of RAM");
349 #endif
350
351 #elif defined (_WIN32)
352 MEMORYSTATUSEX status;
353 status.dwLength = sizeof(status);
354 GlobalMemoryStatusEx(&status);
355 return (size_t)status.ullTotalPhys;
356 #else
357 struct sysinfo si;
358 if (sysinfo(&si))
359 mothurOut("[WARNING]: Cannot determine amount of RAM");
360 return si.totalram * si.mem_unit;
361
362 #endif
363 return 0;
364 }
365 catch(exception& e) {
366 m->errorOut(e, "Utils", "getTotalRAM");
367 exit(1);
368 }
369 }
370 //********************************************************************/
reverseOligo(string oligo)371 string Utils::reverseOligo(string oligo){
372 try {
373 string reverse = "";
374
375 for(int i=oligo.length()-1;i>=0;i--){
376
377 if(oligo[i] == 'A') { reverse += 'T'; }
378 else if(oligo[i] == 'T'){ reverse += 'A'; }
379 else if(oligo[i] == 'U'){ reverse += 'A'; }
380
381 else if(oligo[i] == 'G'){ reverse += 'C'; }
382 else if(oligo[i] == 'C'){ reverse += 'G'; }
383
384 else if(oligo[i] == 'R'){ reverse += 'Y'; }
385 else if(oligo[i] == 'Y'){ reverse += 'R'; }
386
387 else if(oligo[i] == 'M'){ reverse += 'K'; }
388 else if(oligo[i] == 'K'){ reverse += 'M'; }
389
390 else if(oligo[i] == 'W'){ reverse += 'W'; }
391 else if(oligo[i] == 'S'){ reverse += 'S'; }
392
393 else if(oligo[i] == 'B'){ reverse += 'V'; }
394 else if(oligo[i] == 'V'){ reverse += 'B'; }
395
396 else if(oligo[i] == 'D'){ reverse += 'H'; }
397 else if(oligo[i] == 'H'){ reverse += 'D'; }
398
399 else { reverse += 'N'; }
400 }
401
402
403 return reverse;
404 }
405 catch(exception& e) {
406 m->errorOut(e, "Utils", "reverseOligo");
407 exit(1);
408 }
409 }
410
411 /*********************************************************************************************/
fileExists(string name)412 bool Utils::fileExists(string name) {
413 try {
414 bool fExists = false;
415 name = getFullPathName(name);
416
417 #if defined USE_BOOST
418
419 boost::filesystem::path p(name.c_str());
420
421 if (exists(p)) {
422 if (is_regular_file(p)) { fExists = true; } // is path p a regular file?
423 }
424 #else
425
426 #if defined NON_WINDOWS
427 ifstream in; openInputFile(name, in, "");
428
429 //if this file exists
430 if (in) { in.close(); fExists = true; }
431 #else
432
433 DWORD attributes = GetFileAttributes(name.c_str());
434 fExists = (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
435 #endif
436
437 #endif
438
439 return fExists;
440 }
441 catch(exception& e) {
442 m->errorOut(e, "Utils", "fileExists");
443 exit(1);
444 }
445 }
446 /***********************************************************************/
getFullPathName(string fileName)447 string Utils::getFullPathName(string fileName){
448 try{
449 string path = hasPath(fileName);
450 string newFileName;
451 int pos;
452 vector<string> dirs;
453 int index = 0;
454
455 if (path == "") { return fileName; } //its a simple name
456 else { //we need to complete the pathname
457 // ex. ../../../filename
458 // cwd = /user/work/desktop
459 //get current working directory
460 string cwd = currentWorkingDirectory;
461
462 if (path.find("~") != string::npos) { //go to home directory
463 newFileName = homePath + fileName.substr(fileName.find("~")+1);
464 return newFileName;
465 }else { //find path
466 string pattern = "."; pattern += PATH_SEPARATOR;
467 if (path.rfind(pattern) == string::npos) { return fileName; } //already complete name
468 else { newFileName = fileName.substr(fileName.rfind(pattern)+2); } //save the complete part of the name
469
470 if (cwd == "") {
471 char *cwdpath = NULL; cwdpath = getcwd(NULL, 0); // or _getcwd
472 if (cwdpath != NULL) { cwd = cwdpath; }
473 else { cwd = ""; }
474 currentWorkingDirectory = cwd;
475 }
476 //rip off first '/'
477 string simpleCWD = cwd;
478 #if defined NON_WINDOWS
479 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
480 #endif
481 //break apart the current working directory
482 while (simpleCWD.find_first_of(PATH_SEPARATOR) != string::npos) {
483 string dir = simpleCWD.substr(0,simpleCWD.find_first_of(PATH_SEPARATOR));
484 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of(PATH_SEPARATOR)+1, simpleCWD.length());
485 dirs.push_back(dir);
486 }
487 //get last one // ex. ../../../filename = /user/work/desktop/filename
488 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
489
490 index = dirs.size()-1;
491 string searchString = "."; searchString += PATH_SEPARATOR;
492 while((pos = path.rfind(searchString)) != string::npos) { //while you don't have a complete path
493 if (pos == 0) { break; //you are at the end
494 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
495 path = path.substr(0, pos-1);
496 index--;
497 if (index == 0) { break; }
498 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
499 path = path.substr(0, pos);
500 }else if (pos == 1) { break; //you are at the end
501 }else { m->mothurOut("[ERROR}: Can not resolve path for " + fileName + "\n"); m->setControl_pressed(true); return fileName; }
502 }
503 }
504
505 for (int i = index; i >= 0; i--) { newFileName = dirs[i] + PATH_SEPARATOR + newFileName; }
506
507 #if defined NON_WINDOWS
508 newFileName = PATH_SEPARATOR + newFileName;
509 #endif
510 return newFileName;
511 }
512 }
513 catch(exception& e) {
514 m->errorOut(e, "Utils", "getFullPathName");
515 exit(1);
516 }
517 }
518 /********************************************************************/
findProgramPath(string programName)519 string Utils::findProgramPath(string programName){
520 try {
521 //look in ./
522 //is this the programs path?
523 string tempIn = ".";
524 tempIn += PATH_SEPARATOR;
525
526 //if this file exists
527 string pPath = "";
528 if (fileExists(tempIn+programName)) { pPath = getFullPathName(tempIn); if (m->getDebug()) { m->mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } return pPath; }
529
530 if (m->getDebug()) { m->mothurOut("[DEBUG]: dir's in path: \n"); }
531
532 //get path related to mothur
533 for (int i = 0; i < paths.size(); i++) {
534
535 if (m->getDebug()) { m->mothurOut("[DEBUG]: " + paths[i] + "\n"); }
536
537 //to lower so we can find it
538 string tempLower = "";
539 for (int j = 0; j < paths[i].length(); j++) { tempLower += tolower(paths[i][j]); }
540
541 //is this mothurs path?
542 if (tempLower.find(programName) != -1) { pPath = paths[i]; break; }
543 }
544
545 if (m->getDebug()) { m->mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
546
547 //add programName so it looks like what argv would look like
548 if (pPath != "") { pPath += PATH_SEPARATOR; }
549 else {
550 //okay programName is not in the path, so the folder programName is in must be in the path
551 //lets find out which one
552
553 //get path related to the program
554 for (int i = 0; i < paths.size(); i++) {
555
556 if (m->getDebug()) { m->mothurOut("[DEBUG]: looking in " + paths[i] + " for " + programName + " \n"); }
557
558 //is this the programs path?
559 string tempIn = paths[i] + PATH_SEPARATOR;
560
561 //if this file exists
562 if (fileExists(tempIn + programName)) { pPath = getFullPathName(tempIn); if (m->getDebug()) { m->mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
563 }
564 }
565
566 #if defined NON_WINDOWS
567 #else
568 if (pPath == "") {
569 char buffer[MAX_PATH];
570 GetModuleFileName(NULL, buffer, MAX_PATH) ;
571
572 pPath = buffer;
573 pPath = getPathName(pPath);
574
575 //if this file exists
576 if (fileExists(pPath + programName)) { pPath = getFullPathName(pPath); if (m->getDebug()) { m->mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } }
577 }
578 #endif
579 return pPath;
580 }
581 catch(exception& e) {
582 m->errorOut(e, "Utils", "findProgramPath");
583 exit(1);
584 }
585 }
586 /***********************************************************************/
checkLocations(string & filename,vector<string> locations)587 bool Utils::checkLocations(string& filename, vector<string> locations){
588 try {
589 filename = getFullPathName(filename);
590 string inputDir = locations[0];
591 string outputDir = locations[1];
592 string defaultPath = locations[2];
593 string mothurPath = locations[3];
594 string mothurToolsPath = locations[4];
595
596 bool ableToOpen;
597 ifstream in;
598 ableToOpen = openInputFile(filename, in, "noerror");
599 in.close();
600
601 //if you can't open it, try input location
602 if (!ableToOpen) {
603 if (inputDir != "") { //default path is set
604 string tryPath = inputDir + getSimpleName(filename);
605 m->mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath + ".\n");
606 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
607 filename = tryPath;
608 }
609 }
610
611 //if you can't open it, try output location
612 if (!ableToOpen) {
613 if (outputDir != "") { //default path is set
614 string tryPath = outputDir + getSimpleName(filename);
615 m->mothurOut("Unable to open " + filename + ". Trying output directory " + tryPath+ ".\n");
616 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
617 filename = tryPath;
618 }
619 }
620
621
622 //if you can't open it, try default location
623 if (!ableToOpen) {
624 if (defaultPath != "") { //default path is set
625 string tryPath = defaultPath + getSimpleName(filename);
626 m->mothurOut("Unable to open " + filename + ". Trying default " + tryPath+ ".\n");
627 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
628 filename = tryPath;
629 }
630 }
631
632 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
633 if (!ableToOpen) {
634 string tryPath = mothurPath + getSimpleName(filename);
635 m->mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath+ ".\n");
636 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
637 filename = tryPath;
638 }
639
640 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
641 if (!ableToOpen) {
642 string tryPath = mothurToolsPath + getSimpleName(filename);
643 m->mothurOut("Unable to open " + filename + ". Trying mothur's tools location " + tryPath+ ".\n");
644 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
645 filename = tryPath;
646 }
647
648 if (!ableToOpen) { m->mothurOut("Unable to open " + filename + ".\n"); return false; }
649
650 return true;
651 }
652 catch(exception& e) {
653 m->errorOut(e, "Utils", "checkLocations");
654 exit(1);
655 }
656 }
657 /***********************************************************************/
checkLocations(string & filename,vector<string> locations,string silent)658 bool Utils::checkLocations(string& filename, vector<string> locations, string silent){
659 try {
660 filename = getFullPathName(filename);
661 string inputDir = locations[0];
662 string outputDir = locations[1];
663 string defaultPath = locations[2];
664 string mothurPath = locations[3];
665 string mothurToolsPath = locations[4];
666
667 bool ableToOpen;
668 ifstream in;
669 ableToOpen = openInputFile(filename, in, "noerror");
670 in.close();
671
672 //if you can't open it, try input location
673 if (!ableToOpen) {
674 if (inputDir != "") { //default path is set
675 string tryPath = inputDir + getSimpleName(filename);
676 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
677 filename = tryPath;
678 }
679 }
680
681 //if you can't open it, try output location
682 if (!ableToOpen) {
683 if (outputDir != "") { //default path is set
684 string tryPath = outputDir + getSimpleName(filename);
685 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
686 filename = tryPath;
687 }
688 }
689
690
691 //if you can't open it, try default location
692 if (!ableToOpen) {
693 if (defaultPath != "") { //default path is set
694 string tryPath = defaultPath + getSimpleName(filename);
695 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
696 filename = tryPath;
697 }
698 }
699
700 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
701 if (!ableToOpen) {
702 string tryPath = mothurPath + getSimpleName(filename);
703 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
704 filename = tryPath;
705 }
706
707 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
708 if (!ableToOpen) {
709 if (mothurToolsPath != "") { //default path is set
710 string tryPath = mothurToolsPath + getSimpleName(filename);
711 ifstream in2; ableToOpen = openInputFile(tryPath, in2, "noerror"); in2.close();
712 filename = tryPath;
713 }
714 }
715
716 if (!ableToOpen) { return false; }
717
718 return true;
719 }
720 catch(exception& e) {
721 m->errorOut(e, "Utils", "checkLocations");
722 exit(1);
723 }
724 }
725 /***********************************************************************/
findBlastLocation(string & toolLocation,string mothurProgramPath,vector<string> locations)726 bool Utils::findBlastLocation(string& toolLocation, string mothurProgramPath, vector<string> locations){
727 try {
728 bool foundTool = false;
729 string programName = "formatdb"; programName += EXECUTABLE_EXT;
730
731 toolLocation = "";
732 string blastBin = "blast"; blastBin += PATH_SEPARATOR; blastBin += "bin"; blastBin += PATH_SEPARATOR;
733
734 for (int i = 0; i < locations.size(); i++) { locations[i] += blastBin; }
735
736 vector<string> versionOutputs;
737 foundTool = findTool(programName, toolLocation, mothurProgramPath, versionOutputs, locations);
738
739 if (foundTool) { toolLocation = hasPath(toolLocation); }
740 else { toolLocation = ""; }
741
742 return foundTool;
743 }
744 catch(exception& e) {
745 m->errorOut(e, "Utils", "findBlastLocation");
746 exit(1);
747 }
748 }
749 /***********************************************************************/
findTool(string & toolName,string & toolLocation,string mothurProgramPath,vector<string> & versionOutputs,vector<string> locations)750 bool Utils::findTool(string& toolName, string& toolLocation, string mothurProgramPath, vector<string>& versionOutputs, vector<string> locations){
751 try {
752 bool foundTool = false;
753 string toolCommand = mothurProgramPath + toolName; //windows def
754
755 //test to make sure tool exists
756 ifstream in;
757 toolCommand = getFullPathName(toolCommand);
758 bool ableToOpen = openInputFile(toolCommand, in, "no error"); in.close();
759 if(!ableToOpen) {
760
761 if (checkLocations(toolCommand, locations)) { toolLocation = toolCommand; foundTool = true; }
762 else {
763
764 m->mothurOut(toolCommand + " file does not exist. Checking path... \n");
765 //check to see if tool is in the path??
766
767 ifstream in2;
768 string uLocation = findProgramPath(toolName);
769 uLocation += toolName;
770 ableToOpen = openInputFile(uLocation, in2, "no error"); in2.close();
771
772 if(!ableToOpen) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the " + toolName + " executable.\n"); foundTool = false; }
773 else { m->mothurOut("Found " + toolName + " in your path, using " + uLocation + "\n"); toolLocation = uLocation; foundTool = true; }
774 }
775 }else { toolLocation = toolCommand; foundTool = true; }
776
777 toolLocation = getFullPathName(toolLocation);
778
779 if (foundTool) { //check fasterq_dump version
780 string versionTestCommand = toolLocation + " --version > ./commandScreen.output 2>&1";
781 system(versionTestCommand.c_str());
782
783 ifstream in;
784 string versionOutput = "./commandScreen.output";
785 openInputFile(versionOutput, in, "no error");
786
787 string output = getline(in); gobble(in);
788 versionOutputs = splitWhiteSpace(output);
789 in.close();
790
791 mothurRemove(versionOutput);
792 }
793
794 return foundTool;
795 }
796 catch(exception& e) {
797 m->errorOut(e, "Utils", "findTool");
798 exit(1);
799 }
800 }
801 /***********************************************************************/
trimStringEnd(string name,int numToRemove)802 string Utils::trimStringEnd(string name, int numToRemove){
803 try {
804 int length = name.length();
805 string trimmedName = "";
806
807 if (length > numToRemove) { trimmedName = name.substr(0, (length-numToRemove)); }
808
809 return trimmedName;
810 }
811 catch(exception& e) {
812 m->errorOut(e, "Utils", "trimString");
813 exit(1);
814 }
815 }
816 /***********************************************************************/
817
openInputFile(string fileName,ifstream & fileHandle,string mode)818 bool Utils::openInputFile(string fileName, ifstream& fileHandle, string mode){
819 try {
820 //get full path name
821 string completeFileName = getFullPathName(fileName);
822
823 fileHandle.open(completeFileName.c_str());
824 if(!fileHandle) { return false; }
825 else {
826 //check for blank file
827 zapGremlins(fileHandle);
828 gobble(fileHandle);
829 return true;
830 }
831 }
832 catch(exception& e) {
833 m->errorOut(e, "Utils", "openInputFile - no Error");
834 exit(1);
835 }
836 }
837 /***********************************************************************/
838
openInputFile(string fileName,ifstream & fileHandle)839 bool Utils::openInputFile(string fileName, ifstream& fileHandle){
840 try {
841
842 //get full path name
843 string completeFileName = getFullPathName(fileName);
844
845 fileHandle.open(completeFileName.c_str());
846 if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n"); return false; }
847 else {
848 //check for blank file
849 zapGremlins(fileHandle);
850 gobble(fileHandle);
851 if (fileHandle.eof()) { m->mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct.\n"); }
852 return true;
853 }
854 }
855 catch(exception& e) {
856 m->errorOut(e, "Utils", "openInputFile");
857 exit(1);
858 }
859 }
860 /***********************************************************************/
openInputFileBinary(string fileName,ifstream & fileHandle)861 bool Utils::openInputFileBinary(string fileName, ifstream& fileHandle){
862 try {
863
864 //get full path name
865 string completeFileName = getFullPathName(fileName);
866
867 fileHandle.open(completeFileName.c_str(), ios::binary);
868 if(!fileHandle) {
869 m->mothurOut("[ERROR]: Could not open " + completeFileName+ "\n"); return false; }
870 else {
871 if (fileHandle.eof()) { m->mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct.\n"); }
872 return true;
873 }
874 }
875 catch(exception& e) {
876 m->errorOut(e, "Utils", "openInputFileBinary");
877 exit(1);
878 }
879 }
880 /***********************************************************************/
openInputFileBinary(string fileName,ifstream & fileHandle,string noerror)881 bool Utils::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){
882 try {
883
884 //get full path name
885 string completeFileName = getFullPathName(fileName);
886
887 fileHandle.open(completeFileName.c_str(), ios::binary);
888 if(!fileHandle) { return false; }
889 else { return true; }
890 }
891 catch(exception& e) {
892 m->errorOut(e, "Utils", "openInputFileBinary - no error");
893 exit(1);
894 }
895 }
896 /***********************************************************************/
897 #ifdef USE_BOOST
openInputFileBinary(string fileName,ifstream & file,boost::iostreams::filtering_istream & in)898 bool Utils::openInputFileBinary(string fileName, ifstream& file, boost::iostreams::filtering_istream& in){
899 try {
900
901 //get full path name
902 string completeFileName = getFullPathName(fileName);
903
904 file.open(completeFileName.c_str(), ios_base::in | ios_base::binary);
905
906 if(!file) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n"); return false; }
907 else {
908 //check for blank file
909 in.push(boost::iostreams::gzip_decompressor());
910 in.push(file);
911 if (file.eof()) { m->mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct.\n"); }
912 return true;
913 }
914 }
915 catch(exception& e) {
916 m->errorOut(e, "Utils", "openInputFileGZBinary");
917 exit(1);
918 }
919 }
920 /***********************************************************************/
openInputFileBinary(string fileName,ifstream & file,boost::iostreams::filtering_istream & in,string noerror)921 bool Utils::openInputFileBinary(string fileName, ifstream& file, boost::iostreams::filtering_istream& in, string noerror){
922 try {
923
924 //get full path name
925 string completeFileName = getFullPathName(fileName);
926
927 file.open(completeFileName.c_str(), ios_base::in | ios_base::binary);
928
929 if(!file) { return false; }
930 else { //check for blank file
931 in.push(boost::iostreams::gzip_decompressor());
932 in.push(file);
933 return true;
934 }
935 }
936 catch(exception& e) {
937 m->errorOut(e, "Utils", "openInputFileGZBinary - no error");
938 exit(1);
939 }
940 }
941 #endif
942 /***********************************************************************/
943 //results[0] = allGZ, results[1] = allNotGZ
allGZFiles(vector<string> & files)944 vector<bool> Utils::allGZFiles(vector<string> & files){
945 try {
946 vector<bool> results;
947 bool allGZ = true;
948 bool allNOTGZ = true;
949
950 for (int i = 0; i < files.size(); i++) {
951 if (m->getControl_pressed()) { break; }
952
953 //ignore none and blank filenames
954 if ((files[i] != "") || (files[i] != "NONE")) {
955 if (isGZ(files[i])[1]) { allNOTGZ = false; }
956 else { allGZ = false; }
957 }
958 }
959
960 if (!allGZ && !allNOTGZ) { //mixed bag
961 m->mothurOut("[ERROR]: Cannot mix .gz and non compressed files. Please decompress your files and rerun.\n"); m->setControl_pressed(true);
962 }
963
964 results.push_back(allGZ);
965 results.push_back(allNOTGZ);
966
967 return results;
968 }
969 catch(exception& e) {
970 m->errorOut(e, "Utils", "areGZFiles");
971 exit(1);
972 }
973 }
974 /***********************************************************************/
975 //returns false if no api installed
isHDF5(string filename)976 bool Utils::isHDF5(string filename){
977 try {
978 bool result = false;
979
980 #ifdef USE_HDF5
981 if(!H5::H5File::isHdf5(filename.c_str())){
982 //m->mothurOut("[WARNING]: " + filename + " is not an HDF5 file.\n");
983 return false;
984 }else { return true; }
985 #else
986 return false;
987 #endif
988
989 return result;
990 }
991 catch(exception& e) {
992 m->errorOut(e, "Utils", "isHDF5");
993 exit(1);
994 }
995 }
996 /***********************************************************************/
isGZ(string filename)997 vector<bool> Utils::isGZ(string filename){
998 try {
999 vector<bool> results; results.resize(2, false);
1000 #ifdef USE_BOOST
1001 ifstream fileHandle;
1002 boost::iostreams::filtering_istream gzin;
1003
1004 if ((getExtension(filename) != ".gz") && (getExtension(filename) != ".GZ")) { return results; } // results[0] = false; results[1] = false;
1005
1006 bool ableToOpen = openInputFileBinary(filename, fileHandle, gzin, ""); //no error
1007 if (!ableToOpen) { return results; } // results[0] = false; results[1] = false;
1008 else { results[0] = true; }
1009
1010 char c;
1011 try
1012 {
1013 gzin >> c;
1014 results[1] = true;
1015 }
1016 catch ( boost::iostreams::gzip_error & e )
1017 {
1018 gzin.pop();
1019 fileHandle.close();
1020 return results; // results[0] = true; results[1] = false;
1021 }
1022 fileHandle.close();
1023 #else
1024 m->mothurOut("[ERROR]: cannot test for gz format without enabling boost libraries.\n"); m->setControl_pressed(true);
1025 #endif
1026 return results; //results[0] = true; results[1] = true;
1027 }
1028 catch(exception& e) {
1029 m->errorOut(e, "Utils", "isGZ");
1030 exit(1);
1031 }
1032 }
1033
1034 /***********************************************************************/
1035
renameFile(string oldName,string newName)1036 int Utils::renameFile(string oldName, string newName){
1037 try {
1038 if(m->getDebug()) { m->mothurOut("[DEBUG]: renaming " + oldName + " to " + newName + "\n"); }
1039
1040 if (oldName == newName) { return 0; }
1041
1042 ifstream inTest;
1043 bool exist = openInputFile(newName, inTest, "");
1044 inTest.close();
1045
1046 #if defined NON_WINDOWS
1047 if (exist) { //you could open it so you want to delete it
1048 if(m->getDebug()) { m->mothurOut("[DEBUG]: removing old copy of " + newName + "\n"); }
1049 mothurRemove(newName);
1050 }
1051
1052 int renameOk = rename(oldName.c_str(), newName.c_str());
1053
1054 if(m->getDebug()) { m->mothurOut("[DEBUG]: rename " + oldName + " " + newName + " returned " + toString(renameOk) + "\n"); }
1055 /*
1056 if(m->getDebug()) { m->mothurOut("[DEBUG]: mv " + oldName + " to " + newName + "\n"); }
1057
1058 string command = "mv " + oldName + " " + newName;
1059
1060 if(m->getDebug()) { m->mothurOut("[DEBUG]: running system command mv " + oldName + " " + newName + "\n"); }
1061
1062 int returnCode = system(command.c_str());
1063
1064 if(m->getDebug()) { m->mothurOut("[DEBUG]: system command mv " + oldName + " " + newName + " returned " + toString(returnCode) + "\n"); }
1065
1066 if (returnCode != 0) {
1067 int renameOk = rename(oldName.c_str(), newName.c_str());
1068
1069 if(m->getDebug()) { m->mothurOut("[DEBUG]: rename " + oldName + " " + newName + " returned " + toString(renameOk) + "\n"); }
1070 }
1071 */
1072 #else
1073 mothurRemove(newName);
1074 int renameOk = rename(oldName.c_str(), newName.c_str());
1075
1076 if(m->getDebug()) { m->mothurOut("[DEBUG]: rename " + oldName + " " + newName + " returned " + toString(renameOk) + "\n"); }
1077 #endif
1078 return 0;
1079
1080 }
1081 catch(exception& e) {
1082 m->errorOut(e, "Utils", "renameFile");
1083 exit(1);
1084 }
1085 }
1086 /***********************************************************************/
1087
copyFile(string oldName,string newName)1088 int Utils::copyFile(string oldName, string newName){
1089 try {
1090 if(m->getDebug()) { m->mothurOut("[DEBUG]: renaming " + oldName + " to " + newName + "\n"); }
1091
1092 if (oldName == newName) { return 0; }
1093
1094 ifstream inTest;
1095 bool exist = openInputFile(newName, inTest, "");
1096 inTest.close();
1097
1098 #if defined NON_WINDOWS
1099 if (exist) { //you could open it so you want to delete it
1100 if(m->getDebug()) { m->mothurOut("[DEBUG]: removing old copy of " + newName + "\n"); }
1101 mothurRemove(newName);
1102 }
1103 appendFiles(oldName, newName);
1104 //if(m->getDebug()) { m->mothurOut("[DEBUG]: cp " + oldName + " to " + newName + "\n"); }
1105
1106 //string command = "cp " + oldName + " " + newName;
1107
1108 //if(m->getDebug()) { m->mothurOut("[DEBUG]: running system command cp " + oldName + " " + newName + "\n"); }
1109
1110 //int returnCode = system(command.c_str());
1111
1112 // if(m->getDebug()) { m->mothurOut("[DEBUG]: system command cp " + oldName + " " + newName + " returned " + toString(returnCode) + "\n"); }
1113 #else
1114 mothurRemove(newName);
1115 appendFiles(oldName, newName);
1116 #endif
1117 return 0;
1118
1119 }
1120 catch(exception& e) {
1121 m->errorOut(e, "Utils", "copyFile");
1122 exit(1);
1123 }
1124 }
1125
1126 /***********************************************************************/
1127
openOutputFile(string fileName,ofstream & fileHandle)1128 bool Utils::openOutputFile(string fileName, ofstream& fileHandle){
1129 try {
1130 string completeFileName = getFullPathName(fileName);
1131 fileHandle.open(completeFileName.c_str(), ios::trunc);
1132
1133 if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n"); return false; }
1134 else { return true; }
1135 }
1136 catch(exception& e) {
1137 m->errorOut(e, "Utils", "openOutputFile");
1138 exit(1);
1139 }
1140
1141 }
1142 /***********************************************************************/
1143
openOutputFileBinary(string fileName,ofstream & fileHandle)1144 bool Utils::openOutputFileBinary(string fileName, ofstream& fileHandle){
1145 try {
1146 string completeFileName = getFullPathName(fileName);
1147 fileHandle.open(completeFileName.c_str(), ios::trunc | ios::binary);
1148
1149 if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + completeFileName + "\n"); return false; }
1150 else { return true; }
1151 }
1152 catch(exception& e) {
1153 m->errorOut(e, "Utils", "openOutputFileBinary");
1154 exit(1);
1155 }
1156 }
1157 /**************************************************************************************************/
appendFiles(string temp,string filename)1158 int Utils::appendFiles(string temp, string filename) {
1159 try{
1160 ofstream output;
1161 ifstream input;
1162
1163 //open output file in append mode
1164 openOutputFileBinaryAppend(filename, output);
1165 bool ableToOpen = openInputFileBinary(temp, input, "no error");
1166
1167 int numLines = 0;
1168 if (ableToOpen) { //you opened it
1169 char buffer[4096];
1170 while (!input.eof()) {
1171 input.read(buffer, 4096);
1172 output.write(buffer, input.gcount());
1173 //count number of lines
1174 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1175 }
1176 input.close();
1177 }
1178 output.close();
1179
1180 return numLines;
1181 }
1182 catch(exception& e) {
1183 m->errorOut(e, "Utils", "appendFiles");
1184 exit(1);
1185 }
1186 }
1187 /**************************************************************************************************/
appendFiles(string filename,ofstream & out)1188 void Utils::appendFiles(string filename, ofstream& out) {
1189 try{
1190 ifstream input;
1191 bool ableToOpen = openInputFileBinary(filename, input, "no error");
1192
1193 if (ableToOpen) { //you opened it
1194 char buffer[4096];
1195 while (!input.eof()) {
1196 if (m->getControl_pressed()) { break; }
1197 input.read(buffer, 4096);
1198 out.write(buffer, input.gcount());
1199 }
1200 input.close();
1201 }
1202 }
1203 catch(exception& e) {
1204 m->errorOut(e, "Utils", "appendFiles");
1205 exit(1);
1206 }
1207 }
1208
1209 /**************************************************************************************************/
appendFilesFront(string temp,string filename)1210 int Utils::appendFilesFront(string temp, string filename) {
1211 try{
1212 ofstream output;
1213 ifstream input;
1214
1215 //open output file in append mode
1216 openOutputFileBinaryAppend(temp, output);
1217 bool ableToOpen = openInputFileBinary(filename, input, "no error");
1218
1219 if (ableToOpen) { //you opened it
1220 char buffer[4096];
1221 while (!input.eof()) {
1222 input.read(buffer, 4096);
1223 output.write(buffer, input.gcount());
1224 }
1225 input.close();
1226 }
1227 output.close();
1228
1229 mothurRemove(filename);
1230 renameFile(temp, filename);
1231 mothurRemove(temp);
1232
1233 return 0;
1234 }
1235 catch(exception& e) {
1236 m->errorOut(e, "Utils", "appendFiles");
1237 exit(1);
1238 }
1239 }
1240
1241 /**************************************************************************************************/
appendBinaryFiles(string temp,string filename)1242 bool Utils::appendBinaryFiles(string temp, string filename) {
1243 try{
1244 ofstream output;
1245 ifstream input;
1246
1247 //open output file in append mode
1248 openOutputFileBinaryAppend(filename, output);
1249 bool ableToOpen = openInputFileBinary(temp, input, "no error");
1250
1251 if (ableToOpen) { //you opened it
1252
1253 char buffer[4096];
1254 while (!input.eof()) {
1255 input.read(buffer, 4096);
1256 output.write(buffer, input.gcount());
1257 }
1258 input.close();
1259 }
1260 output.close();
1261
1262 return ableToOpen;
1263 }
1264 catch(exception& e) {
1265 m->errorOut(e, "Utils", "appendBinaryFiles");
1266 exit(1);
1267 }
1268 }
1269 /**************************************************************************************************/
appendSFFFiles(string temp,string filename)1270 bool Utils::appendSFFFiles(string temp, string filename) {
1271 try{
1272 ofstream output;
1273 bool ableToOpen = true;
1274
1275 //open output file in append mode
1276 string fullFileName = getFullPathName(filename);
1277
1278 output.open(fullFileName.c_str(), ios::app | ios::binary);
1279 if(!output) { m->mothurOut("[ERROR]: Could not open " + fullFileName + "\n"); return false; }
1280 else {
1281 //get full path name
1282 string completeFileName = getFullPathName(temp);
1283 ifstream input;
1284 openInputFileBinary(completeFileName, input);
1285
1286 if(!input) { return false; }
1287 else {
1288 char buffer[4096];
1289 while (!input.eof()) {
1290 input.read(buffer, 4096);
1291 output.write(buffer, input.gcount());
1292 }
1293 input.close();
1294 }
1295 output.close();
1296 }
1297
1298 return ableToOpen;
1299 }
1300 catch(exception& e) {
1301 m->errorOut(e, "Utils", "appendSFFFiles");
1302 exit(1);
1303 }
1304 }
1305 /**************************************************************************************************/
appendFilesWithoutHeaders(string temp,string filename)1306 int Utils::appendFilesWithoutHeaders(string temp, string filename) {
1307 try{
1308 ofstream output;
1309 ifstream input;
1310
1311 //open output file in append mode
1312 openOutputFileAppend(filename, output);
1313 bool ableToOpen = openInputFile(temp, input, "no error");
1314
1315 int numLines = 0;
1316 if (ableToOpen) { //you opened it
1317
1318 string headers = getline(input); gobble(input);
1319 char buffer[4096];
1320 while (!input.eof()) {
1321 input.read(buffer, 4096);
1322 output.write(buffer, input.gcount());
1323 //count number of lines
1324 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1325 }
1326 input.close();
1327 }
1328
1329 output.close();
1330
1331 return numLines;
1332 }
1333 catch(exception& e) {
1334 m->errorOut(e, "Utils", "appendFiles");
1335 exit(1);
1336 }
1337 }
1338 /**************************************************************************************************/
sortFile(string distFile,string outputDir)1339 string Utils::sortFile(string distFile, string outputDir){
1340 try {
1341
1342 //if (outputDir == "") { outputDir += hasPath(distFile); }
1343 string outfile = getRootName(distFile) + "sorted.dist";
1344
1345
1346 //if you can, use the unix sort since its been optimized for years
1347 #if defined NON_WINDOWS
1348 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1349 system(command.c_str());
1350 #else //you are stuck with my best attempt...
1351 //windows sort does not have a way to specify a column, only a character in the line
1352 //since we cannot assume that the distance will always be at the the same character location on each line
1353 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1354
1355 //read in file line by file and put distance first
1356 string tempDistFile = distFile + ".temp";
1357 ifstream input;
1358 ofstream output;
1359 openInputFile(distFile, input);
1360 openOutputFile(tempDistFile, output);
1361
1362 string firstName, secondName;
1363 float dist;
1364 while (!input.eof()) {
1365 input >> firstName >> secondName >> dist;
1366 output << dist << '\t' << firstName << '\t' << secondName << endl;
1367 gobble(input);
1368 }
1369 input.close();
1370 output.close();
1371
1372
1373 //sort using windows sort
1374 string tempOutfile = outfile + ".temp";
1375 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1376 system(command.c_str());
1377
1378 //read in sorted file and put distance at end again
1379 ifstream input2;
1380 ofstream output2;
1381 openInputFile(tempOutfile, input2);
1382 openOutputFile(outfile, output2);
1383
1384 while (!input2.eof()) {
1385 input2 >> dist >> firstName >> secondName;
1386 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1387 gobble(input2);
1388 }
1389 input2.close();
1390 output2.close();
1391
1392 //remove temp files
1393 mothurRemove(tempDistFile);
1394 mothurRemove(tempOutfile);
1395 #endif
1396
1397 return outfile;
1398 }
1399 catch(exception& e) {
1400 m->errorOut(e, "Utils", "sortFile");
1401 exit(1);
1402 }
1403 }
1404 /***********************************************************************/
openOutputFileAppend(string fileName,ofstream & fileHandle)1405 bool Utils::openOutputFileAppend(string fileName, ofstream& fileHandle){
1406 try {
1407 fileName = getFullPathName(fileName);
1408
1409 fileHandle.open(fileName.c_str(), ios::app);
1410 if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + fileName + "\n"); return false; }
1411 return true;
1412 }
1413 catch(exception& e) {
1414 m->errorOut(e, "Utils", "openOutputFileAppend");
1415 exit(1);
1416 }
1417 }
1418 /***********************************************************************/
openOutputFileBinaryAppend(string fileName,ofstream & fileHandle)1419 bool Utils::openOutputFileBinaryAppend(string fileName, ofstream& fileHandle){
1420 try {
1421 fileName = getFullPathName(fileName);
1422
1423 fileHandle.open(fileName.c_str(), ios::app | ios::binary);
1424 if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + fileName + "\n"); return false; }
1425
1426 return true;
1427 }
1428 catch(exception& e) {
1429 m->errorOut(e, "Utils", "openOutputFileAppend");
1430 exit(1);
1431 }
1432 }
1433
1434 /***********************************************************************/
gobble(istream & f)1435 void Utils::gobble(istream& f){
1436 try {
1437
1438 char d;
1439 while(isspace(d=f.get())) { ;}
1440 if(!f.eof()) { f.putback(d); }
1441 }
1442 catch(exception& e) {
1443 m->errorOut(e, "Utils", "gobble");
1444 exit(1);
1445 }
1446 }
1447 /***********************************************************************/
gobble(istringstream & f)1448 void Utils::gobble(istringstream& f){
1449 try {
1450 char d;
1451 while(isspace(d=f.get())) {;}
1452 if(!f.eof()) { f.putback(d); }
1453 }
1454 catch(exception& e) {
1455 m->errorOut(e, "Utils", "gobble");
1456 exit(1);
1457 }
1458 }
1459 /***********************************************************************/
zapGremlins(istream & f)1460 void Utils::zapGremlins(istream& f){
1461 try {
1462
1463 char d;
1464 while('\0'==(d=f.get())) { ;}
1465 if(!f.eof()) { f.putback(d); }
1466 }
1467 catch(exception& e) {
1468 m->errorOut(e, "Utils", "zapGremlins");
1469 exit(1);
1470 }
1471 }
1472 /***********************************************************************/
zapGremlins(istringstream & f)1473 void Utils::zapGremlins(istringstream& f){
1474 try {
1475 char d;
1476 while('\0'==(d=f.get())) { ;}
1477 if(!f.eof()) { f.putback(d); }
1478 }
1479 catch(exception& e) {
1480 m->errorOut(e, "Utils", "zapGremlins");
1481 exit(1);
1482 }
1483 }
1484
1485 /***********************************************************************/
getline(istringstream & fileHandle)1486 string Utils::getline(istringstream& fileHandle) {
1487 try {
1488 string line = "";
1489 while (!fileHandle.eof()) {
1490 //get next character
1491 char c = fileHandle.get();
1492
1493 //are you at the end of the line
1494 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
1495 else { line += c; }
1496 }
1497
1498 return line;
1499 }
1500 catch(exception& e) {
1501 m->errorOut(e, "Utils", "getline");
1502 exit(1);
1503 }
1504 }
1505 /***********************************************************************/
getline(ifstream & fileHandle,vector<string> & headers)1506 void Utils::getline(ifstream& fileHandle, vector<string>& headers) {
1507 try {
1508 string line = getline(fileHandle);
1509 headers = splitWhiteSpace(line);
1510 }
1511 catch(exception& e) {
1512 m->errorOut(e, "Utils", "getline");
1513 exit(1);
1514 }
1515 }
1516 /***********************************************************************/
getline(ifstream & fileHandle)1517 string Utils::getline(ifstream& fileHandle) {
1518 try {
1519 string line = "";
1520 while (fileHandle) {
1521 //get next character
1522 char c = fileHandle.get();
1523
1524 //are you at the end of the line
1525 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
1526 else { line += c; }
1527 }
1528
1529 return line;
1530 }
1531 catch(exception& e) {
1532 m->errorOut(e, "Utils", "getline");
1533 exit(1);
1534 }
1535 }
1536 #ifdef USE_BOOST
1537 /***********************************************************************/
getline(boost::iostreams::filtering_istream & fileHandle)1538 string Utils::getline(boost::iostreams::filtering_istream& fileHandle) {
1539 try {
1540 string line = "";
1541 while (fileHandle) {
1542 //get next character
1543 char c = fileHandle.get();
1544
1545 //are you at the end of the line
1546 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
1547 else { line += c; }
1548 }
1549
1550 return line;
1551 }
1552 catch(exception& e) {
1553 m->errorOut(e, "Utils", "getline");
1554 exit(1);
1555 }
1556 }
1557 #endif
1558 /**********************************************************************/
getPathName(string longName)1559 string Utils::getPathName(string longName){
1560 try {
1561 string rootPathName = longName;
1562
1563 if(longName.find_last_of("/\\") != longName.npos){
1564 int pos = longName.find_last_of("/\\")+1;
1565 rootPathName = longName.substr(0, pos);
1566 }
1567
1568 return rootPathName;
1569 }
1570 catch(exception& e) {
1571 m->errorOut(e, "Utils", "getPathName");
1572 exit(1);
1573 }
1574 }
1575 /***********************************************************************/
getRootName(string longName)1576 string Utils::getRootName(string longName){
1577 try {
1578
1579 string rootName = longName;
1580
1581 if(rootName.find_last_of(".") != rootName.npos){
1582 int pos = rootName.find_last_of('.')+1;
1583 rootName = rootName.substr(0, pos);
1584 }
1585
1586 return rootName;
1587 }
1588 catch(exception& e) {
1589 m->errorOut(e, "Utils", "getRootName");
1590 exit(1);
1591 }
1592 }
1593 /***********************************************************************/
1594
getSimpleName(string longName)1595 string Utils::getSimpleName(string longName){
1596 try {
1597 string simpleName = longName;
1598
1599 size_t found; found=longName.find_last_of("/\\");
1600
1601 if(found != longName.npos){ simpleName = longName.substr(found+1); }
1602
1603 return simpleName;
1604 }
1605 catch(exception& e) {
1606 m->errorOut(e, "Utils", "getSimpleName");
1607 exit(1);
1608 }
1609 }
1610 //**********************************************************************************************************************
getStringFromVector(vector<string> & list,string delim)1611 string Utils::getStringFromVector(vector<string>& list, string delim){
1612 try {
1613 string result = "";
1614
1615 if (list.size() == 0) { return result; }
1616
1617 result = list[0];
1618
1619 for (int i = 1; i < list.size(); i++) {
1620 if (m->getControl_pressed()) { break; }
1621 result += delim + list[i];
1622 }
1623
1624 return result;
1625 }
1626 catch(exception& e) {
1627 m->errorOut(e, "Utils", "getStringFromVector");
1628 exit(1);
1629 }
1630 }
1631 //**********************************************************************************************************************
getStringFromVector(vector<int> & list,string delim)1632 string Utils::getStringFromVector(vector<int>& list, string delim){
1633 try {
1634 string result = "";
1635
1636 if (list.size() == 0) { return result; }
1637
1638 result = toString(list[0]);
1639
1640 for (int i = 1; i < list.size(); i++) {
1641 if (m->getControl_pressed()) { break; }
1642 string temp = toString(list[i]);
1643 result += delim + temp;
1644 }
1645
1646 return result;
1647 }
1648 catch(exception& e) {
1649 m->errorOut(e, "Utils", "getStringFromVector");
1650 exit(1);
1651 }
1652 }
1653 //**********************************************************************************************************************
getSetFromList(ListVector * & list,vector<vector<string>> & otus)1654 set<string> Utils::getSetFromList(ListVector*& list, vector< vector<string> >& otus){
1655 try {
1656 set<string> results; otus.clear();
1657
1658 if (list->getNumSeqs() == 0) { return results; }
1659
1660 for (int i = 0; i < list->getNumBins(); i++) {
1661 if (m->getControl_pressed()) { break; }
1662
1663 string thisBin = list->get(i);
1664 vector<string> binNames; splitAtComma(thisBin, binNames);
1665
1666 otus.push_back(binNames);
1667
1668 for (int j = 0; j < binNames.size(); j++) { results.insert(binNames[j]); }
1669 }
1670
1671 return results;
1672 }
1673 catch(exception& e) {
1674 m->errorOut(e, "Utils", "getSetFromList");
1675 exit(1);
1676 }
1677 }
1678 //**********************************************************************************************************************
getStringFromVector(vector<double> & list,string delim)1679 string Utils::getStringFromVector(vector<double>& list, string delim){
1680 try {
1681 string result = "";
1682
1683 if (list.size() == 0) { return result; }
1684
1685 result = toString(list[0]);
1686
1687 for (int i = 1; i < list.size(); i++) {
1688 if (m->getControl_pressed()) { break; }
1689 string temp = toString(list[i]);
1690 result += delim + temp;
1691 }
1692
1693 return result;
1694 }
1695 catch(exception& e) {
1696 m->errorOut(e, "Utils", "getStringFromVector");
1697 exit(1);
1698 }
1699 }
1700 //**********************************************************************************************************************
getStringFromSet(set<int> & list,string delim)1701 string Utils::getStringFromSet(set<int>& list, string delim){
1702 try {
1703 string result = "";
1704
1705 if (list.size() == 0) { return result; }
1706
1707 vector<int> vlist;
1708 for (set<int>::iterator it = list.begin(); it != list.end(); it++) {
1709 if (m->getControl_pressed()) { break; }
1710 int value = *it;
1711 vlist.push_back(value);
1712 }
1713 result = getStringFromVector(vlist, delim);
1714
1715 return result;
1716 }
1717 catch(exception& e) {
1718 m->errorOut(e, "Utils", "getStringFromVector");
1719 exit(1);
1720 }
1721 }
1722 //**********************************************************************************************************************
getStringFromSet(set<string> & list,string delim)1723 string Utils::getStringFromSet(set<string>& list, string delim){
1724 try {
1725 string result = "";
1726
1727 if (list.size() == 0) { return result; }
1728
1729 vector<string> vlist;
1730 for (set<string>::iterator it = list.begin(); it != list.end(); it++) {
1731 if (m->getControl_pressed()) { break; }
1732 vlist.push_back(*it);
1733 }
1734 result = getStringFromVector(vlist, delim);
1735
1736 return result;
1737 }
1738 catch(exception& e) {
1739 m->errorOut(e, "Utils", "getStringFromVector");
1740 exit(1);
1741 }
1742 }
1743 //**********************************************************************************************************************
1744 //NOTE: assumes questions.size() == qanswers.size(), issues.size() == ianswers.size(), howtos.size() == hanswers.size()
getFormattedHelp(vector<string> questions,vector<string> qanswers,vector<string> issues,vector<string> ianswers,vector<string> howtos,vector<string> hanswers)1745 string Utils::getFormattedHelp(vector<string> questions, vector<string> qanswers, vector<string> issues, vector<string> ianswers, vector<string> howtos,vector<string> hanswers) {
1746 try {
1747
1748 string commonQuestions = ""; vector<string> headers;
1749 string header = "\nCommon Questions: \n"; headers.push_back(header);
1750 header = "\nCommon Issues: \n"; headers.push_back(header);
1751 header = "\nHow To: \n"; headers.push_back(header);
1752
1753 commonQuestions += headers[0]+"\n";
1754 #if defined NON_WINDOWS
1755 cout << BOLDGREEN << headers[0]; cout << RESET << endl;
1756 #endif
1757
1758 for (int i = 0; i < questions.size(); i++) {
1759 commonQuestions += toString(i+1) + ". " + questions[i]+"\n"+qanswers[i]+"\n";
1760 #if defined NON_WINDOWS
1761 cout << BOLDBLUE << toString(i+1)+". "+questions[i]; cout << RESET << endl << qanswers[i] << endl;
1762 #endif
1763 }
1764
1765 if (questions.size() == 0) {
1766 commonQuestions += "Can't find your question? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1767 #if defined NON_WINDOWS
1768 cout << RESET "Can't find your question? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1769 #endif
1770
1771 }
1772
1773 commonQuestions += headers[1]+"\n";
1774 #if defined NON_WINDOWS
1775 cout << BOLDGREEN << headers[1]; cout << RESET << endl;
1776 #endif
1777
1778 for (int i = 0; i < issues.size(); i++) {
1779 commonQuestions += toString(i+1)+". "+issues[i]+"\n"+ianswers[i]+"\n";
1780 #if defined NON_WINDOWS
1781 cout << BOLDBLUE << toString(i+1)+". "+issues[i]; cout << RESET << endl << ianswers[i] << endl;
1782 #endif
1783 }
1784
1785 if (issues.size() == 0) {
1786 commonQuestions += "Can't find your issue? Please feel free to ask questions on our forum, https://forum.mothur.org or send bug reports to mothur.bugs@gmail.com.\n\n";
1787 #if defined NON_WINDOWS
1788 cout << RESET "Can't find your issue? Please feel free to ask questions on our forum, https://forum.mothur.org or send bug reports to mothur.bugs@gmail.com.\n\n";
1789 #endif
1790
1791 }
1792
1793
1794 commonQuestions += headers[2]+"\n";
1795 #if defined NON_WINDOWS
1796 cout << BOLDGREEN << headers[2]; cout << RESET << endl;
1797 #endif
1798
1799 for (int i = 0; i < howtos.size(); i++) {
1800 commonQuestions += toString(i+1) + ". " + howtos[i]+"\n"+hanswers[i]+"\n";
1801 #if defined NON_WINDOWS
1802 cout << BOLDBLUE << toString(i+1)+". "+howtos[i]; cout << RESET << endl << hanswers[i] << endl;
1803 #endif
1804 }
1805
1806 if (howtos.size() == 0) {
1807 commonQuestions += "Not sure how to do what you want? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1808 #if defined NON_WINDOWS
1809 cout << RESET "Not sure how to do what you want? Please feel free to ask questions on our forum, https://forum.mothur.org.\n\n";
1810 #endif
1811
1812 }
1813
1814 #if defined NON_WINDOWS
1815 m->mothurOutJustToLog(commonQuestions);
1816
1817 cout << BOLDMAGENTA << "\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki.\n"; cout << RESET << endl;
1818 m->mothurOutJustToLog("\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki.\n");
1819 #else
1820 m->mothurOut(commonQuestions + "\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki.\n");
1821 #endif
1822
1823 return commonQuestions;
1824 }
1825 catch(exception& e) {
1826 m->errorOut(e, "Utils", "getFormattedHelp");
1827 exit(1);
1828 }
1829 }
1830 //**********************************************************************************************************************
removeNs(string seq)1831 string Utils::removeNs(string seq){
1832 try {
1833 string newSeq = "";
1834 for (int i = 0; i < seq.length(); i++) { if (seq[i] != 'N') { newSeq += seq[i]; } }
1835 return newSeq;
1836 }
1837 catch(exception& e) {
1838 m->errorOut(e, "Utils", "removeNs");
1839 exit(1);
1840 }
1841 }
1842 /***********************************************************************/
getOTUNames(vector<string> & currentLabels,int numBins,string tagHeader)1843 int Utils::getOTUNames(vector<string>& currentLabels, int numBins, string tagHeader){
1844 try {
1845
1846 if (currentLabels.size() == numBins) { return 0; }
1847
1848 int maxLabelNumber = 0;
1849 if (currentLabels.size() < numBins) {
1850 string snumBins = toString(numBins);
1851
1852 for (int i = 0; i < numBins; i++) {
1853 string binLabel = tagHeader;
1854 if (i < currentLabels.size()) { //label exists
1855 if (getLabelTag(currentLabels[i]) == tagHeader) { //adjust 0's??
1856 string sbinNumber = getSimpleLabel(currentLabels[i]);
1857 int tempBinNumber; mothurConvert(sbinNumber, tempBinNumber);
1858 if (tempBinNumber > maxLabelNumber) { maxLabelNumber = tempBinNumber; }
1859 if (sbinNumber.length() < snumBins.length()) {
1860 int diff = snumBins.length() - sbinNumber.length();
1861 for (int h = 0; h < diff; h++) { binLabel += "0"; }
1862 }
1863 binLabel += sbinNumber;
1864 currentLabels[i] = binLabel;
1865 }
1866 }else{ //create new label
1867 string sbinNumber = toString(maxLabelNumber+1); maxLabelNumber++;
1868 if (sbinNumber.length() < snumBins.length()) {
1869 int diff = snumBins.length() - sbinNumber.length();
1870 for (int h = 0; h < diff; h++) { binLabel += "0"; }
1871 }
1872 binLabel += sbinNumber;
1873 currentLabels.push_back(binLabel);
1874 }
1875 }
1876 }
1877 return currentLabels.size();
1878
1879 }
1880 catch(exception& e) {
1881 m->errorOut(e, "Utils", "getOTUNames");
1882 exit(1);
1883 }
1884 }
1885 /**************************************************************************************/
getCombos(vector<string> & groupComb,vector<string> userGroups,int & numComp)1886 void Utils::getCombos(vector<string>& groupComb, vector<string> userGroups, int& numComp) { //groupcomb, Groups, numcomb
1887 try {
1888 sort(userGroups.begin(), userGroups.end());
1889
1890 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
1891 numComp = 0;
1892 for (int i=0; i< userGroups.size(); i++) {
1893 numComp += i;
1894 for (int l = 0; l < i; l++) { //set group comparison labels
1895 if (userGroups[i] > userGroups[l]) { groupComb.push_back(userGroups[l] + "-" + userGroups[i]); }
1896 else { groupComb.push_back(userGroups[i] + "-" + userGroups[l]); }
1897 }
1898 }
1899 }
1900 catch(exception& e) {
1901 m->errorOut(e, "Utils", "getCombos");
1902 exit(1);
1903 }
1904 }
1905 /***********************************************************************/
dirCheckWritable(string & dirName)1906 bool Utils::dirCheckWritable(string& dirName){
1907 try {
1908
1909 if (dirName == "") { return false; }
1910
1911 //add / to name if needed
1912 string lastChar = dirName.substr(dirName.length()-1);
1913 if (lastChar != PATH_SEPARATOR) { dirName += PATH_SEPARATOR; }
1914
1915 //test to make sure directory exists
1916 dirName = getFullPathName(dirName);
1917 string outTemp = dirName + "temp"+ toString(time(NULL));
1918 ofstream out;
1919 out.open(outTemp.c_str(), ios::trunc);
1920 if(!out) { m->mothurOut(dirName + " directory does not exist or is not writable.\n"); }
1921 else{ out.close(); mothurRemove(outTemp); return true; }
1922
1923 return false;
1924 }
1925 catch(exception& e) {
1926 m->errorOut(e, "Utils", "dirCheckWritable");
1927 exit(1);
1928 }
1929 }
1930 /***********************************************************************/
dirCheckExists(string & dirName)1931 bool Utils::dirCheckExists(string& dirName){
1932 return (dirCheckExists(dirName, true));
1933 }
1934 /***********************************************************************/
dirCheckExists(string & dirName,bool reportError)1935 bool Utils::dirCheckExists(string& dirName, bool reportError){
1936 try {
1937
1938 if (dirName == "") { return false; }
1939
1940 //add / to name if needed
1941 string lastChar = dirName.substr(dirName.length()-1);
1942 if (lastChar != PATH_SEPARATOR) { dirName += PATH_SEPARATOR; }
1943
1944 //test to make sure directory exists
1945 dirName = getFullPathName(dirName);
1946
1947 #if defined USE_BOOST
1948
1949 boost::filesystem::path p(dirName.c_str());
1950
1951 if (exists(p)) { return true; }
1952 else { if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); } }
1953
1954 #else
1955 #if defined NON_WINDOWS
1956
1957 struct stat info;
1958
1959 if(stat(dirName.c_str(), &info ) != 0 ) {
1960 if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); }
1961 }else if( info.st_mode & S_IFDIR ) { // S_ISDIR() doesn't exist on my windows
1962 return true;
1963 }else {
1964 if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); }
1965 }
1966
1967 #else
1968 DWORD dwAttrib = GetFileAttributes(dirName.c_str());
1969
1970 if (dwAttrib != INVALID_FILE_ATTRIBUTES &&
1971 (dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) { return true; }
1972 else { if (reportError) { m->mothurOut("[ERROR]: cannot access " + dirName + "\n"); } }
1973
1974 #endif
1975 #endif
1976 return false;
1977 }
1978 catch(exception& e) {
1979 m->errorOut(e, "Utils", "dirCheckExists");
1980 exit(1);
1981 }
1982 }
1983 /***********************************************************************/
1984 //returns true if it exists or if we can make it
mkDir(string & dirName)1985 bool Utils::mkDir(string& dirName){
1986 try {
1987 bool dirExist = dirCheckExists(dirName, false);
1988 if (dirExist) { return true; }
1989
1990 #ifdef USE_BOOST
1991
1992 boost::filesystem::path dir(dirName.c_str());
1993 if(boost::filesystem::create_directories(dir)) {}
1994 else { return false; }
1995
1996 #else
1997 #if defined NON_WINDOWS
1998
1999 if ((mkdir(dirName.c_str(), S_IRWXU | S_IRWXG | S_IRWXO )) == 0) {}
2000 else { return false; }
2001
2002 #else
2003
2004 if (CreateDirectory(dirName.c_str(), NULL) ||
2005 ERROR_ALREADY_EXISTS == GetLastError()) { }
2006 else { return false; }
2007
2008 #endif
2009 #endif
2010
2011 if (dirCheckWritable(dirName)) { return true; }
2012
2013 return false;
2014 }
2015 catch(exception& e) {
2016 m->errorOut(e, "Utils", "mkDir");
2017 exit(1);
2018 }
2019 }
2020 //***********************************************************************
parseClasses(string classes)2021 map<string, vector<string> > Utils::parseClasses(string classes){
2022 try {
2023 map<string, vector<string> > parts;
2024
2025 //treatment<Early|Late>-age<young|old>
2026 vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
2027
2028 for (int i = 0; i < pieces.size(); i++) {
2029 string category = ""; string value = "";
2030 bool foundOpen = false;
2031 for (int j = 0; j < pieces[i].length(); j++) {
2032 if (m->getControl_pressed()) { return parts; }
2033
2034 if (pieces[i][j] == '<') { foundOpen = true; }
2035 else if (pieces[i][j] == '>') { j += pieces[i].length(); }
2036 else {
2037 if (!foundOpen) { category += pieces[i][j]; }
2038 else { value += pieces[i][j]; }
2039 }
2040 }
2041 vector<string> values; splitAtChar(value, values, '|');
2042 parts[category] = values;
2043 }
2044
2045 return parts;
2046 }
2047 catch(exception& e) {
2048 m->errorOut(e, "Utils", "parseClasses");
2049 exit(1);
2050 }
2051 }
2052 /**************************************************************************************************/
2053 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
parseTax(string tax,vector<string> & scores)2054 vector<string> Utils::parseTax(string tax, vector<string>& scores) {
2055 try {
2056
2057 string taxon;
2058 vector<string> taxs;
2059
2060 while (tax.find_first_of(';') != -1) {
2061
2062 if (m->getControl_pressed()) { return taxs; }
2063
2064 //get taxon
2065 taxon = tax.substr(0,tax.find_first_of(';'));
2066
2067 int pos = taxon.find_last_of('(');
2068 if (pos != -1) {
2069 //is it a number?
2070 int pos2 = taxon.find_last_of(')');
2071 if (pos2 != -1) {
2072 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
2073 if (isNumeric1(confidenceScore)) {
2074 taxon = taxon.substr(0, pos); //rip off confidence
2075 scores.push_back(confidenceScore);
2076 }else{ scores.push_back("null"); }
2077 }
2078 }else{ scores.push_back("null"); }
2079
2080 //strip "" if they are there
2081 pos = taxon.find("\"");
2082 if (pos != string::npos) {
2083 string newTax = "";
2084 for (int k = 0; k < taxon.length(); k++) {
2085 if (taxon[k] != '\"') { newTax += taxon[k]; }
2086 }
2087 taxon = newTax;
2088 }
2089
2090 //look for bootstrap value
2091 taxs.push_back(taxon);
2092 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
2093 }
2094
2095 return taxs;
2096 }
2097 catch(exception& e) {
2098 m->errorOut(e, "Utils", "parseTax");
2099 exit(1);
2100 }
2101 }
2102
2103 /***********************************************************************/
hasPath(string longName)2104 string Utils::hasPath(string longName){
2105 try {
2106 string path = "";
2107 size_t found;
2108 found=longName.find_last_of("~/\\");
2109
2110 if(found != longName.npos){ path = longName.substr(0, found+1); }
2111
2112 return path;
2113 }
2114 catch(exception& e) {
2115 m->errorOut(e, "Utils", "hasPath");
2116 exit(1);
2117 }
2118 }
2119 /***********************************************************************/
getCurrentDate(string & thisYear,string & thisMonth,string & thisDay)2120 void Utils::getCurrentDate(string& thisYear, string& thisMonth, string& thisDay){
2121 try {
2122 time_t rawtime;
2123 struct tm * timeinfo;
2124
2125 time (&rawtime);
2126 timeinfo = localtime(&rawtime);
2127
2128 char buffer[80];
2129 strftime(buffer,sizeof(buffer),"%Y",timeinfo);
2130 string year(buffer); thisYear = year;
2131
2132 strftime(buffer,sizeof(buffer),"%m",timeinfo);
2133 string Month(buffer); thisMonth = Month;
2134
2135 strftime(buffer,sizeof(buffer),"%d",timeinfo);
2136 string Day(buffer); thisDay = Day;
2137 }
2138 catch(exception& e) {
2139 m->errorOut(e, "Utils", "getCurrentDate");
2140 exit(1);
2141 }
2142 }
2143 /***********************************************************************/
isASCII(string input)2144 bool Utils::isASCII(string input){
2145 try {
2146
2147 for (int i = 0; i < input.length(); i++) {
2148 if (isascii(input[i]) == 0) { return false; } //non ascii
2149 }
2150 return true;
2151 }
2152 catch(exception& e) {
2153 m->errorOut(e, "Utils", "isASCII");
2154 exit(1);
2155 }
2156 }
2157 /***********************************************************************/
getExtension(string longName)2158 string Utils::getExtension(string longName){
2159 try {
2160 string extension = "";
2161
2162 if(longName.find_last_of('.') != longName.npos){
2163 int pos = longName.find_last_of('.');
2164 extension = longName.substr(pos, longName.length());
2165 }
2166
2167 return extension;
2168 }
2169 catch(exception& e) {
2170 m->errorOut(e, "Utils", "getExtension");
2171 exit(1);
2172 }
2173 }
2174 /***********************************************************************/
mothurInitialPrep(string & defaultPath,string & tools,string & mothurVersion,string & releaseDate,string & OS)2175 bool Utils::mothurInitialPrep(string& defaultPath, string& tools, string& mothurVersion, string& releaseDate, string& OS){
2176 try {
2177
2178 string lastChar = "";
2179 #ifdef MOTHUR_FILES
2180 defaultPath = MOTHUR_FILES;
2181 defaultPath = removeQuotes(defaultPath);
2182 //add / to name if needed
2183 lastChar = defaultPath.substr(defaultPath.length()-1);
2184 if (lastChar != PATH_SEPARATOR) { defaultPath += PATH_SEPARATOR; }
2185
2186 defaultPath = getFullPathName(defaultPath);
2187 #else
2188 defaultPath = "";
2189 #endif
2190
2191 #ifdef MOTHUR_TOOLS
2192 tools = MOTHUR_TOOLS;
2193 tools = removeQuotes(tools);
2194 //add / to name if needed
2195 lastChar = tools.substr(tools.length()-1);
2196 if (lastChar != PATH_SEPARATOR) { tools += PATH_SEPARATOR; }
2197
2198 tools = getFullPathName(tools);
2199 #else
2200 tools = "";
2201 #endif
2202
2203 #ifdef LOGFILE_NAME
2204 string logfilename = LOGFILE_NAME;
2205 logfilename = getFullPathName(logfilename);
2206
2207 m->appendLogBuffer("Using Static Logfile " + logfilename + "\n");
2208
2209 m->setLogFileName(logfilename, false);
2210 m->mothurOut("\n");
2211 #endif
2212
2213 releaseDate = "";
2214 #ifdef RELEASE_DATE
2215 releaseDate = RELEASE_DATE;
2216 #else
2217 string year, month, day;
2218 getCurrentDate(year, month, day);
2219 releaseDate = month + "/" + day + "/" + year;
2220 #endif
2221
2222 mothurVersion = VERSION;
2223
2224
2225 //version
2226 #if defined NON_WINDOWS
2227 #if defined (__APPLE__) || (__MACH__)
2228 m->appendLogBuffer("Mac version\n\n");
2229 #else
2230 m->appendLogBuffer("Linux version\n\n");
2231 #endif
2232 #else
2233 m->appendLogBuffer("Windows version\n\n");
2234 #endif
2235
2236 string packagesUsed = "";
2237 #ifdef USE_READLINE
2238 packagesUsed += "ReadLine,";
2239 #endif
2240
2241 #ifdef USE_BOOST
2242 packagesUsed += "Boost,";
2243 #endif
2244
2245 #ifdef USE_HDF5
2246 packagesUsed += "HDF5,";
2247 #endif
2248
2249 #ifdef USE_GSL
2250 packagesUsed += "GSL,";
2251 #endif
2252
2253 if (packagesUsed != "") {
2254 //remove last comma
2255 packagesUsed = packagesUsed.substr(0,packagesUsed.length()-1);
2256 m->appendLogBuffer("Using " + packagesUsed + "\n");
2257 }
2258
2259 #ifdef MOTHUR_FILES
2260 m->appendLogBuffer("\nUsing default search path for mothur input files: " + defaultPath + "\n\n");
2261 #endif
2262
2263 #ifdef MOTHUR_TOOLS
2264 m->appendLogBuffer("\nUsing mothur tools location: " + tools + "\n\n");
2265 #endif
2266
2267 //header
2268 m->appendLogBuffer("mothur v." + mothurVersion + "\n");
2269 m->appendLogBuffer("Last updated: " + releaseDate + "\n");
2270 m->appendLogBuffer("by\n");
2271 m->appendLogBuffer("Patrick D. Schloss\n\n");
2272 m->appendLogBuffer("Department of Microbiology & Immunology\n\n");
2273 m->appendLogBuffer("University of Michigan\n");
2274 m->appendLogBuffer("http://www.mothur.org\n\n");
2275 m->appendLogBuffer("When using, please cite:\n");
2276 m->appendLogBuffer("Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.\n\n");
2277 m->appendLogBuffer("Distributed under the GNU General Public License\n\n");
2278 m->appendLogBuffer("Type 'help()' for information on the commands that are available\n\n");
2279 m->appendLogBuffer("For questions and analysis support, please visit our forum at https://forum.mothur.org\n\n");
2280 m->appendLogBuffer("Type 'quit()' to exit program\n\n");
2281
2282 m->setRandomSeed(19760620);
2283 m->appendLogBuffer("[NOTE]: Setting random seed to 19760620.\n\n");
2284
2285 OS = "";
2286 //version
2287 #if defined NON_WINDOWS
2288 #if defined (__APPLE__) || (__MACH__)
2289 OS = "Mac ";
2290 #else
2291 OS = "Linux ";
2292 #endif
2293 #else
2294 OS = "Windows ";
2295 #endif
2296
2297 return true;
2298 }
2299 catch(exception& e) {
2300 m->errorOut(e, "Utils", "mothurInitialPrep");
2301 exit(1);
2302 }
2303 }
2304 /***********************************************************************/
2305 /***********************************************************************/
isBlank(string fileName)2306 bool Utils::isBlank(string fileName){
2307 try {
2308
2309 fileName = getFullPathName(fileName);
2310
2311 ifstream fileHandle;
2312 fileHandle.open(fileName.c_str());
2313 if(!fileHandle) { m->mothurOut("[ERROR]: Could not open " + fileName + "\n"); }
2314 else { //check for blank file
2315 zapGremlins(fileHandle);
2316 gobble(fileHandle);
2317 if (fileHandle.eof()) { fileHandle.close(); return true; }
2318 fileHandle.close();
2319 }
2320 return false;
2321 }
2322 catch(exception& e) {
2323 m->errorOut(e, "Utils", "isBlank");
2324 exit(1);
2325 }
2326 }
2327 /***********************************************************************/
stringBlank(string input)2328 bool Utils::stringBlank(string input){
2329 try {
2330 for (int i = 0; i < input.length(); i++) { if (!isspace(input[i])) { return false; } }
2331 return true;
2332 }
2333 catch(exception& e) {
2334 m->errorOut(e, "Utils", "isBlank");
2335 exit(1);
2336 }
2337 }
2338 /**************************************************************************************************/
setFilePosFasta(string filename,long long & num,char delim)2339 vector<double> Utils::setFilePosFasta(string filename, long long& num, char delim) {
2340 try {
2341 vector<double> positions;
2342 ifstream inFASTA;
2343 string completeFileName = getFullPathName(filename);
2344 //inFASTA.open(completeFileName.c_str(), ios::binary);
2345 openInputFileBinary(completeFileName, inFASTA);
2346 int nameLine = 2;
2347 if (delim == '@') { nameLine = 4; }
2348 else if (delim == '>') { nameLine = 2; }
2349 else { m->mothurOut("[ERROR]: unknown file deliminator, quitting.\n"); m->setControl_pressed(true); }
2350
2351 double count = 0;
2352 long long numLines = 0;
2353 while(!inFASTA.eof()){
2354 char c = inFASTA.get(); count++;
2355 string input = ""; input += c;
2356 while ((c != '\n') && (c != '\r') && (c != '\f') && (c != EOF)) {
2357 c = inFASTA.get(); count++;
2358 input += c;
2359 }
2360 numLines++;
2361 //gobble
2362 while(isspace(c=inFASTA.get())) { input += c; count++;}
2363 if(!inFASTA.eof()) { inFASTA.putback(c); count--; }
2364
2365 if (input.length() != 0) {
2366 if((input[0] == delim) && (((numLines-1)%nameLine) == 0)){ //this is a name line
2367 positions.push_back(count+numLines-input.length());
2368 }else if (int(c) == -1) { break; }
2369 else { input = ""; }
2370 }
2371 }
2372 inFASTA.close();
2373
2374 num = positions.size();
2375
2376 FILE * pFile;
2377 double size;
2378
2379 //get num bytes in file
2380 pFile = fopen (completeFileName.c_str(),"rb");
2381 if (pFile==NULL) perror ("Error opening file");
2382 else{
2383 fseek (pFile, 0, SEEK_END);
2384 size=ftell (pFile);
2385 fclose (pFile);
2386 }
2387
2388 positions.push_back(size);
2389 positions[0] = 0;
2390
2391 return positions;
2392 }
2393 catch(exception& e) {
2394 m->errorOut(e, "Utils", "setFilePosFasta");
2395 exit(1);
2396 }
2397 }
2398 /**************************************************************************************************/
setFilePosFasta(string filename,long long & num)2399 vector<double> Utils::setFilePosFasta(string filename, long long& num) {
2400 try {
2401 vector<double> positions;
2402 ifstream inFASTA;
2403 //openInputFileBinary(filename, inFASTA);
2404 string completeFileName = getFullPathName(filename);
2405 //inFASTA.open(completeFileName.c_str(), ios::binary);
2406 openInputFileBinary(completeFileName, inFASTA);
2407
2408 string input;
2409 double count = 0;
2410 while(!inFASTA.eof()){
2411 char c = inFASTA.get(); count++;
2412 if (c == '>') { positions.push_back(count-1); }
2413 }
2414 inFASTA.close();
2415
2416 num = positions.size();
2417
2418 FILE * pFile;
2419 double size;
2420
2421 //get num bytes in file
2422 pFile = fopen (completeFileName.c_str(),"rb");
2423 if (pFile==NULL) perror ("Error opening file");
2424 else{
2425 fseek (pFile, 0, SEEK_END);
2426 size=ftell (pFile);
2427 fclose (pFile);
2428 }
2429
2430 positions.push_back(size);
2431 positions[0] = 0;
2432
2433 return positions;
2434 }
2435 catch(exception& e) {
2436 m->errorOut(e, "Utils", "setFilePosFasta");
2437 exit(1);
2438 }
2439 }
2440 //**********************************************************************************************************************
readConsTax(string inputfile,PhyloTree & tree)2441 vector<Taxonomy> Utils::readConsTax(string inputfile, PhyloTree& tree){
2442 try {
2443 //read headers
2444 ifstream in; openInputFile(inputfile, in); getline(in);
2445
2446 vector<Taxonomy> taxes;
2447 while (!in.eof()) {
2448
2449 if (m->getControl_pressed()) { break; }
2450
2451 Taxonomy thisTax(in);
2452 taxes.push_back(thisTax);
2453
2454 tree.addSeqToTree(thisTax.getName(), thisTax.getTaxons());
2455 }
2456 in.close();
2457
2458 return taxes;
2459 }
2460 catch(exception& e) {
2461 m->errorOut(e, "Utils", "readConsTax");
2462 exit(1);
2463 }
2464 }
2465 //**********************************************************************************************************************
readConsTax(string inputfile)2466 vector<consTax> Utils::readConsTax(string inputfile){
2467 try {
2468
2469 vector<consTax> taxes;
2470
2471 ifstream in;
2472 openInputFile(inputfile, in);
2473
2474 //read headers
2475 getline(in);
2476
2477 while (!in.eof()) {
2478
2479 if (m->getControl_pressed()) { break; }
2480
2481 string otu = ""; string tax = "unknown";
2482 int size = 0;
2483
2484 in >> otu; gobble(in);
2485 in >> size; gobble(in);
2486 tax = getline(in); gobble(in);
2487
2488 consTax temp(otu, tax, size);
2489 taxes.push_back(temp);
2490 }
2491 in.close();
2492
2493 return taxes;
2494 }
2495 catch(exception& e) {
2496 m->errorOut(e, "Utils", "readConsTax");
2497 exit(1);
2498 }
2499 }
2500 //**********************************************************************************************************************
readConsTax(string inputfile,vector<Taxonomy> & conTax)2501 void Utils::readConsTax(string inputfile, vector<Taxonomy>& conTax){
2502 try {
2503 conTax.clear();
2504
2505 ifstream in; openInputFile(inputfile, in);
2506 getline(in); //read headers
2507
2508 while (!in.eof()) {
2509
2510 if (m->getControl_pressed()) { break; }
2511
2512 string otu = ""; string tax = "unknown";
2513 int size = 0;
2514
2515 in >> otu; gobble(in);
2516 in >> size; gobble(in);
2517 tax = getline(in); gobble(in);
2518
2519 Taxonomy temp(otu, tax, size);
2520 conTax.push_back(temp);
2521 }
2522 in.close();
2523
2524 return;
2525 }
2526 catch(exception& e) {
2527 m->errorOut(e, "Utils", "readConsTax");
2528 exit(1);
2529 }
2530 }
2531 //**********************************************************************************************************************
readConsTax(string inputfile,map<int,consTax2> & taxes)2532 int Utils::readConsTax(string inputfile, map<int, consTax2>& taxes){
2533 try {
2534 ifstream in;
2535 openInputFile(inputfile, in);
2536
2537 //read headers
2538 getline(in);
2539
2540 while (!in.eof()) {
2541
2542 if (m->getControl_pressed()) { break; }
2543
2544 string otu = ""; string tax = "unknown";
2545 int size = 0;
2546
2547 in >> otu; gobble(in);
2548 in >> size; gobble(in);
2549 tax = getline(in); gobble(in);
2550
2551 consTax2 temp(otu, tax, size);
2552 string simpleBin = getSimpleLabel(otu);
2553 int bin;
2554 convert(simpleBin, bin);
2555 taxes[bin] = temp;
2556 }
2557 in.close();
2558
2559 return 0;
2560 }
2561 catch(exception& e) {
2562 m->errorOut(e, "Utils", "readConsTax");
2563 exit(1);
2564 }
2565 }
2566 /**************************************************************************************************/
setFilePosEachLine(string filename,long long & num)2567 vector<double> Utils::setFilePosEachLine(string filename, long long& num) {
2568 try {
2569 filename = getFullPathName(filename);
2570
2571 vector<double> positions;
2572 ifstream in;
2573 //openInputFile(filename, in);
2574 openInputFileBinary(filename, in);
2575
2576 string input;
2577 unsigned long long count = 0;
2578 positions.push_back(0);
2579
2580 while(!in.eof()){
2581 //getline counting reads
2582 char d = in.get(); count++;
2583 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
2584 //get next character
2585 d = in.get();
2586 count++;
2587 }
2588
2589 if (!in.eof()) {
2590 d=in.get(); count++;
2591 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
2592 }
2593 positions.push_back(count-1);
2594
2595 }
2596 in.close();
2597
2598 num = positions.size()-1;
2599
2600 FILE * pFile;
2601 double size = 0;
2602
2603 //get num bytes in file
2604 pFile = fopen (filename.c_str(),"rb");
2605 if (pFile==NULL) perror ("Error opening file");
2606 else{
2607 fseek (pFile, 0, SEEK_END);
2608 size=ftell (pFile);
2609 fclose (pFile);
2610 }
2611
2612 positions[(positions.size()-1)] = size;
2613
2614 return positions;
2615 }
2616 catch(exception& e) {
2617 m->errorOut(e, "Utils", "setFilePosEachLine");
2618 exit(1);
2619 }
2620 }
2621 /**************************************************************************************************/
setFilePosEachLine(string filename,unsigned long long & num)2622 vector<double> Utils::setFilePosEachLine(string filename, unsigned long long& num) {
2623 try {
2624 filename = getFullPathName(filename);
2625
2626 vector<double> positions;
2627 ifstream in;
2628 //openInputFile(filename, in);
2629 openInputFileBinary(filename, in);
2630
2631 string input;
2632 unsigned long long count = 0;
2633 positions.push_back(0);
2634
2635 while(!in.eof()){
2636 //getline counting reads
2637 char d = in.get(); count++;
2638 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
2639 //get next character
2640 d = in.get();
2641 count++;
2642 }
2643
2644 if (!in.eof()) {
2645 d=in.get(); count++;
2646 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
2647 }
2648 positions.push_back(count-1);
2649 }
2650 in.close();
2651
2652 num = positions.size()-1;
2653
2654 FILE * pFile;
2655 double size = 0;
2656
2657 //get num bytes in file
2658 pFile = fopen (filename.c_str(),"rb");
2659 if (pFile==NULL) perror ("Error opening file");
2660 else{
2661 fseek (pFile, 0, SEEK_END);
2662 size=ftell (pFile);
2663 fclose (pFile);
2664 }
2665
2666 positions[(positions.size()-1)] = size;
2667
2668 return positions;
2669 }
2670 catch(exception& e) {
2671 m->errorOut(e, "Utils", "setFilePosEachLine");
2672 exit(1);
2673 }
2674 }
2675
2676 /**************************************************************************************************/
2677
divideFile(string filename,int & proc)2678 vector<double> Utils::divideFile(string filename, int& proc) {
2679 try{
2680 vector<double> filePos;
2681 filePos.push_back(0);
2682
2683 FILE * pFile;
2684 double size = 0;
2685
2686 filename = getFullPathName(filename);
2687
2688 //get num bytes in file
2689 pFile = fopen (filename.c_str(),"rb");
2690 if (pFile==NULL) perror ("Error opening file");
2691 else{
2692 fseek (pFile, 0, SEEK_END);
2693 size=ftell (pFile);
2694 fclose (pFile);
2695 }
2696
2697 if (proc == 1) { filePos.push_back(size); return filePos; }
2698
2699 #if defined NON_WINDOWS
2700
2701 //estimate file breaks
2702 double chunkSize = 0;
2703 chunkSize = size / proc;
2704
2705 //file to small to divide by processors
2706 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
2707
2708 if (proc > 1) {
2709 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
2710 for (int i = 0; i < proc; i++) {
2711 double spot = (i+1) * chunkSize;
2712
2713 ifstream in;
2714 openInputFile(filename, in);
2715 in.seekg(spot);
2716
2717 //look for next '>'
2718 double newSpot = spot;
2719 while (!in.eof()) {
2720 char c = in.get();
2721
2722 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
2723 else if (int(c) == -1) { break; }
2724
2725 }
2726
2727 //there was not another sequence before the end of the file
2728 double sanityPos = in.tellg();
2729
2730 if (isEqual(sanityPos, -1)) { break; }
2731 else { filePos.push_back(newSpot); }
2732
2733 in.close();
2734 }
2735 }
2736 //save end pos
2737 filePos.push_back(size);
2738
2739 //sanity check filePos
2740 for (int i = 0; i < (filePos.size()-1); i++) {
2741 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
2742 }
2743
2744 proc = (filePos.size() - 1);
2745 #else
2746 m->mothurOut("[ERROR]: Windows version should not be calling the divideFile function.\n");
2747 proc=1;
2748 filePos.push_back(size);
2749 #endif
2750 return filePos;
2751 }
2752 catch(exception& e) {
2753 m->errorOut(e, "Utils", "divideFile");
2754 exit(1);
2755 }
2756 }
2757 /**************************************************************************************************/
2758
divideFile(string filename,int & proc,char delimChar)2759 vector<double> Utils::divideFile(string filename, int& proc, char delimChar) {
2760 try{
2761 vector<double> filePos;
2762 filePos.push_back(0);
2763
2764 FILE * pFile;
2765 double size = 0;
2766
2767 filename = getFullPathName(filename);
2768
2769 //get num bytes in file
2770 pFile = fopen (filename.c_str(),"rb");
2771 if (pFile==NULL) perror ("Error opening file");
2772 else{
2773 fseek (pFile, 0, SEEK_END);
2774 size=ftell (pFile);
2775 fclose (pFile);
2776 }
2777
2778 char secondaryDelim = '>';
2779 if (delimChar == '@') { secondaryDelim = '+'; }
2780
2781 if (proc == 1) { filePos.push_back(size); return filePos; }
2782
2783 #if defined NON_WINDOWS
2784
2785 //estimate file breaks
2786 double chunkSize = 0;
2787 chunkSize = size / proc;
2788
2789 //file to small to divide by processors
2790 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
2791
2792 //for each process seekg to closest file break and search for next delimChar char. make that the filebreak
2793 for (int i = 0; i < proc; i++) {
2794 double spot = (i+1) * chunkSize;
2795
2796 ifstream in;
2797 openInputFile(filename, in);
2798 in.seekg(spot);
2799
2800 getline(in); //get to end of line in case you jump into middle of line where the delim char happens to fall.
2801
2802 //look for next delimChar
2803 double newSpot = spot;
2804 while (!in.eof()) {
2805 char c = in.get();
2806 string input = ""; input += c;
2807 while ((c != '\n') && (c != '\r') && (c != '\f') && (c != EOF)) {
2808 c = in.get();
2809 input += c;
2810 }
2811
2812 if (input.length() != 0) {
2813 if(input[0] == delimChar){ //this is a potential name line
2814 newSpot = in.tellg();
2815 newSpot -=input.length();
2816 //get two lines and look for secondary delim
2817 //inf a fasta file this would be a new sequence, in fastq it will be the + line, if this was a nameline.
2818 getline(in); gobble(in);
2819 if (!in.eof()) {
2820 string secondInput = getline(in); gobble(in);
2821 if (secondInput[0] == secondaryDelim) { break; } //yes, it was a nameline so stop
2822 else { input = ""; gobble(in); } //nope it was a delim at the beginning of a non nameline, keep looking.
2823 }
2824 }else if (int(c) == -1) { break; }
2825 else { input = ""; gobble(in); }
2826 }
2827 }
2828
2829 //there was not another sequence before the end of the file
2830 double sanityPos = in.tellg();
2831
2832 if (isEqual(sanityPos, -1)) { break; }
2833 else { filePos.push_back(newSpot); }
2834
2835 in.close();
2836 }
2837
2838 //save end pos
2839 filePos.push_back(size);
2840
2841 //sanity check filePos
2842 for (int i = 0; i < (filePos.size()-1); i++) {
2843 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
2844 }
2845
2846 proc = (filePos.size() - 1);
2847 #else
2848 m->mothurOut("[ERROR]: Windows version should not be calling the divideFile function.\n");
2849 proc=1;
2850 filePos.push_back(size);
2851 #endif
2852 return filePos;
2853 }
2854 catch(exception& e) {
2855 m->errorOut(e, "Utils", "divideFile");
2856 exit(1);
2857 }
2858 }
2859
2860 /**************************************************************************************************/
2861
divideFilePerLine(string filename,int & proc)2862 vector<double> Utils::divideFilePerLine(string filename, int& proc) {
2863 try{
2864 vector<double> filePos;
2865 filePos.push_back(0);
2866
2867 FILE * pFile;
2868 double size = 0;
2869
2870 filename = getFullPathName(filename);
2871
2872 //get num bytes in file
2873 pFile = fopen (filename.c_str(),"rb");
2874 if (pFile==NULL) perror ("Error opening file");
2875 else{
2876 fseek (pFile, 0, SEEK_END);
2877 size=ftell (pFile);
2878 fclose (pFile);
2879 }
2880
2881 #if defined NON_WINDOWS
2882 //estimate file breaks
2883 double chunkSize = 0;
2884 chunkSize = size / proc;
2885
2886 //file to small to divide by processors
2887 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
2888
2889 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
2890 for (int i = 0; i < proc; i++) {
2891 double spot = (i+1) * chunkSize;
2892
2893 ifstream in;
2894 openInputFile(filename, in);
2895 in.seekg(spot);
2896
2897 //look for next line break
2898 double newSpot = spot;
2899 while (!in.eof()) {
2900 char c = in.get();
2901
2902 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
2903 else if (int(c) == -1) { break; }
2904 }
2905
2906 //there was not another line before the end of the file
2907 double sanityPos = in.tellg();
2908
2909 if (sanityPos == -1) { break; }
2910 else { filePos.push_back(newSpot); }
2911
2912 in.close();
2913 }
2914
2915 //save end pos
2916 filePos.push_back(size);
2917
2918 //sanity check filePos
2919 for (int i = 0; i < (filePos.size()-1); i++) {
2920 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
2921 }
2922
2923 proc = (filePos.size() - 1);
2924 #else
2925 m->mothurOut("[ERROR]: Windows version should not be calling the divideFile function.\n");
2926 proc=1;
2927 filePos.push_back(size);
2928 #endif
2929 return filePos;
2930 }
2931 catch(exception& e) {
2932 m->errorOut(e, "Utils", "divideFile");
2933 exit(1);
2934 }
2935 }
2936 /**************************************************************************************************/
divideFile(string filename,int & proc,vector<string> & files)2937 int Utils::divideFile(string filename, int& proc, vector<string>& files) {
2938 try{
2939
2940 vector<double> filePos = divideFile(filename, proc);
2941
2942 for (int i = 0; i < (filePos.size()-1); i++) {
2943
2944 //read file chunk
2945 ifstream in;
2946 openInputFile(filename, in);
2947 in.seekg(filePos[i]);
2948 unsigned long long size = filePos[(i+1)] - filePos[i];
2949 char* chunk = new char[size];
2950 in.read(chunk, size);
2951 in.close();
2952
2953 //open new file
2954 string fileChunkName = filename + "." + toString(i) + ".tmp";
2955 ofstream out;
2956 openOutputFile(fileChunkName, out);
2957
2958 out << chunk << endl;
2959 out.close();
2960 delete[] chunk;
2961
2962 //save name
2963 files.push_back(fileChunkName);
2964 }
2965
2966 return 0;
2967 }
2968 catch(exception& e) {
2969 m->errorOut(e, "Utils", "divideFile");
2970 exit(1);
2971 }
2972 }
2973 /***********************************************************************/
2974
isTrue(string f)2975 bool Utils::isTrue(string f){
2976 try {
2977
2978 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
2979
2980 if ((f == "TRUE") || (f == "T")) { return true; }
2981 else { return false; }
2982 }
2983 catch(exception& e) {
2984 m->errorOut(e, "Utils", "isTrue");
2985 exit(1);
2986 }
2987 }
2988
2989 /***********************************************************************/
2990
roundDist(float dist,int precision)2991 float Utils::roundDist(float dist, int precision){
2992 try {
2993 return int(dist * precision + 0.5)/float(precision);
2994 }
2995 catch(exception& e) {
2996 m->errorOut(e, "Utils", "roundDist");
2997 exit(1);
2998 }
2999 }
3000 /***********************************************************************/
3001
ceilDist(float dist,int precision)3002 float Utils::ceilDist(float dist, int precision){
3003 try {
3004 return int(ceil(dist * precision))/float(precision);
3005 }
3006 catch(exception& e) {
3007 m->errorOut(e, "Utils", "ceilDist");
3008 exit(1);
3009 }
3010 }
3011 /***********************************************************************/
3012
splitWhiteSpace(string & rest,char buffer[],int size)3013 vector<string> Utils::splitWhiteSpace(string& rest, char buffer[], int size){
3014 try {
3015 vector<string> pieces;
3016
3017 for (int i = 0; i < size; i++) {
3018 if (!isspace(buffer[i])) { rest += buffer[i]; }
3019 else {
3020 if (rest != "") { pieces.push_back(rest); rest = ""; }
3021 while (i < size) { //gobble white space
3022 if (isspace(buffer[i])) { i++; }
3023 else { rest = buffer[i]; break; }
3024 }
3025 }
3026 }
3027
3028 return pieces;
3029 }
3030 catch(exception& e) {
3031 m->errorOut(e, "Utils", "splitWhiteSpace");
3032 exit(1);
3033 }
3034 }
3035 /***********************************************************************/
trimWhiteSpace(string input)3036 string Utils::trimWhiteSpace(string input){
3037 try {
3038
3039 int start, end; start = 0; end = input.length();
3040
3041 //no spaces
3042 if (input.find_first_of(' ') == string::npos) { return input; }
3043
3044 for (int i = 0; i < input.length(); i++) {
3045 if (input[i] != ' ') { start = i; break; }
3046 }
3047
3048 end = start;
3049 for (int i = input.length()-1; i > start; i--) {
3050 if (input[i] != ' ') { end = i; break; }
3051 }
3052
3053 string trimmed = input.substr(start, end-start+1);
3054
3055 return trimmed;
3056 }
3057 catch(exception& e) {
3058 m->errorOut(e, "Utils", "trimWhiteSpace");
3059 exit(1);
3060 }
3061 }
3062 /***********************************************************************/
splitWhiteSpace(string input)3063 vector<string> Utils::splitWhiteSpace(string input){
3064 try {
3065 vector<string> pieces;
3066 string rest = "";
3067
3068 for (int i = 0; i < input.length(); i++) {
3069 if (!isspace(input[i])) { rest += input[i]; }
3070 else {
3071 if (rest != "") { pieces.push_back(rest); rest = ""; }
3072 while (i < input.length()) { //gobble white space
3073 if (isspace(input[i])) { i++; }
3074 else { rest = input[i]; break; }
3075 }
3076 }
3077 }
3078
3079 if (rest != "") { pieces.push_back(rest); }
3080
3081 return pieces;
3082 }
3083 catch(exception& e) {
3084 m->errorOut(e, "Utils", "splitWhiteSpace");
3085 exit(1);
3086 }
3087 }
3088 /***********************************************************************/
splitWhiteSpace(string input,vector<float> & pieces,int index)3089 int Utils::splitWhiteSpace(string input, vector<float>& pieces, int index){
3090 try {
3091 pieces.clear();
3092 string rest = "";
3093 int count = 0;
3094
3095 for (int i = 0; i < input.length(); i++) {
3096 if (!isspace(input[i])) { rest += input[i]; }
3097 else {
3098 if (rest != "") { float tdist; mothurConvert(rest, tdist); pieces.push_back(tdist); count++; rest = ""; }
3099 while (i < input.length()) { //gobble white space
3100 if (isspace(input[i])) { i++; }
3101 else { rest = input[i]; break; }
3102 }
3103 if (count > index) { return 0; }
3104 }
3105 }
3106
3107 if (rest != "") { float tdist; mothurConvert(rest, tdist); count++; pieces.push_back(tdist); }
3108
3109 return 0;
3110 }
3111 catch(exception& e) {
3112 m->errorOut(e, "Utils", "splitWhiteSpace");
3113 exit(1);
3114 }
3115 }
3116 /***********************************************************************/
splitWhiteSpaceWithQuotes(string input)3117 vector<string> Utils::splitWhiteSpaceWithQuotes(string input){
3118 try {
3119 vector<string> pieces;
3120 string rest = "";
3121
3122 int pos = input.find('\'');
3123 int pos2 = input.find('\"');
3124
3125 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
3126 else {
3127 for (int i = 0; i < input.length(); i++) {
3128
3129 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
3130 rest += input[i];
3131 for (int j = i+1; j < input.length(); j++) {
3132 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
3133 rest += input[j];
3134 i = j;
3135 j+=input.length();
3136 }else { rest += input[j]; }
3137 }
3138 }else if (!isspace(input[i])) { rest += input[i]; }
3139 else {
3140 if (rest != "") { pieces.push_back(rest); rest = ""; }
3141 while (i < input.length()) { //gobble white space
3142 if (isspace(input[i])) { i++; }
3143 else { rest = input[i]; break; }
3144 }
3145 }
3146 }
3147
3148 if (rest != "") { pieces.push_back(rest); }
3149 }
3150 return pieces;
3151 }
3152 catch(exception& e) {
3153 m->errorOut(e, "Utils", "splitWhiteSpace");
3154 exit(1);
3155 }
3156 }
3157 //**********************************************************************************************************************
readTax(string taxfile,map<string,string> & taxMap,bool removeConfidence)3158 int Utils::readTax(string taxfile, map<string, string>& taxMap, bool removeConfidence) {
3159 try {
3160 //open input file
3161 ifstream in;
3162 openInputFile(taxfile, in);
3163
3164 bool error = false;
3165 string name, taxonomy;
3166
3167 while (!in.eof()) {
3168 if (m->getControl_pressed()) { break; }
3169
3170 in >> name; gobble(in);
3171 taxonomy = getline(in); gobble(in);
3172
3173 checkName(name);
3174
3175 //are there confidence scores, if so remove them
3176 if (removeConfidence) { if (taxonomy.find_first_of('(') != -1) { removeConfidences(taxonomy); } }
3177 map<string, string>::iterator itTax = taxMap.find(name);
3178
3179 if(itTax == taxMap.end()) {
3180 bool ignore = false;
3181 if (taxonomy != "") { if (taxonomy[taxonomy.length()-1] != ';') { m->mothurOut("[ERROR]: " + name + " is missing the final ';', ignoring.\n"); ignore=true; }
3182 }
3183 if (!ignore) { taxMap[name] = taxonomy; }
3184 }else { m->mothurOut("[ERROR]: " + name + " is already in your taxonomy file, names must be unique.\n"); error = true; }
3185 }
3186 in.close();
3187
3188 if (error) { m->setControl_pressed(true); }
3189
3190 return taxMap.size();
3191
3192 }
3193 catch(exception& e) {
3194 m->errorOut(e, "Utils", "readTax");
3195 exit(1);
3196 }
3197 }
3198 /**********************************************************************************************************************/
3199 //nameMap is filled with redundant names mapped to unique name
readNames(string namefile,map<string,string> & nameMap,bool redund)3200 int Utils::readNames(string namefile, map<string, string>& nameMap, bool redund) {
3201 try {
3202 //open input file
3203 ifstream in;
3204 openInputFile(namefile, in);
3205
3206 string rest = "";
3207 char buffer[4096];
3208 bool pairDone = false;
3209 bool columnOne = true;
3210 string firstCol, secondCol;
3211
3212 while (!in.eof()) {
3213 if (m->getControl_pressed()) { break; }
3214
3215 in.read(buffer, 4096);
3216 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3217
3218 for (int i = 0; i < pieces.size(); i++) {
3219 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3220 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3221
3222 if (pairDone) {
3223 checkName(firstCol);
3224 checkName(secondCol);
3225
3226 //parse names into vector
3227 vector<string> theseNames;
3228 splitAtComma(secondCol, theseNames);
3229 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
3230 pairDone = false;
3231 }
3232 }
3233 }
3234 in.close();
3235
3236 if (rest != "") {
3237 vector<string> pieces = splitWhiteSpace(rest);
3238
3239 for (int i = 0; i < pieces.size(); i++) {
3240 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3241 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3242
3243 if (pairDone) {
3244 checkName(firstCol);
3245 checkName(secondCol);
3246
3247 //parse names into vector
3248 vector<string> theseNames;
3249 splitAtComma(secondCol, theseNames);
3250 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
3251 pairDone = false;
3252 }
3253 }
3254 }
3255
3256 return nameMap.size();
3257
3258 }
3259 catch(exception& e) {
3260 m->errorOut(e, "Utils", "readNames");
3261 exit(1);
3262 }
3263 }
3264 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap,int flip)3265 int Utils::readNames(string namefile, map<string, string>& nameMap, int flip) {
3266 try {
3267 //open input file
3268 ifstream in;
3269 openInputFile(namefile, in);
3270
3271 string rest = "";
3272 char buffer[4096];
3273 bool pairDone = false;
3274 bool columnOne = true;
3275 string firstCol, secondCol;
3276
3277 while (!in.eof()) {
3278 if (m->getControl_pressed()) { break; }
3279
3280 in.read(buffer, 4096);
3281 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3282
3283 for (int i = 0; i < pieces.size(); i++) {
3284 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3285 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3286
3287 if (pairDone) {
3288 checkName(firstCol);
3289 checkName(secondCol);
3290 nameMap[secondCol] = firstCol;
3291 pairDone = false;
3292 }
3293 }
3294 }
3295 in.close();
3296
3297 if (rest != "") {
3298 vector<string> pieces = splitWhiteSpace(rest);
3299
3300 for (int i = 0; i < pieces.size(); i++) {
3301 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3302 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3303
3304 if (pairDone) {
3305 checkName(firstCol);
3306 checkName(secondCol);
3307 nameMap[secondCol] = firstCol;
3308 pairDone = false;
3309 }
3310 }
3311 }
3312
3313 return nameMap.size();
3314
3315 }
3316 catch(exception& e) {
3317 m->errorOut(e, "Utils", "readNames");
3318 exit(1);
3319 }
3320 }
3321 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap,map<string,int> & nameCount)3322 int Utils::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
3323 try {
3324 nameMap.clear(); nameCount.clear();
3325 //open input file
3326 ifstream in;
3327 openInputFile(namefile, in);
3328
3329 string rest = "";
3330 char buffer[4096];
3331 bool pairDone = false;
3332 bool columnOne = true;
3333 string firstCol, secondCol;
3334
3335 while (!in.eof()) {
3336 if (m->getControl_pressed()) { break; }
3337
3338 in.read(buffer, 4096);
3339 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3340
3341 for (int i = 0; i < pieces.size(); i++) {
3342 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3343 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3344
3345 if (pairDone) {
3346 checkName(firstCol);
3347 checkName(secondCol);
3348 //parse names into vector
3349 vector<string> theseNames;
3350 splitAtComma(secondCol, theseNames);
3351 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
3352 nameCount[firstCol] = theseNames.size();
3353 pairDone = false;
3354 }
3355 }
3356 }
3357 in.close();
3358
3359 if (rest != "") {
3360 vector<string> pieces = splitWhiteSpace(rest);
3361
3362 for (int i = 0; i < pieces.size(); i++) {
3363 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3364 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3365
3366 if (pairDone) {
3367 checkName(firstCol);
3368 checkName(secondCol);
3369 //parse names into vector
3370 vector<string> theseNames;
3371 splitAtComma(secondCol, theseNames);
3372 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
3373 nameCount[firstCol] = theseNames.size();
3374 pairDone = false;
3375 }
3376 }
3377
3378 }
3379 return nameMap.size();
3380
3381 }
3382 catch(exception& e) {
3383 m->errorOut(e, "Utils", "readNames");
3384 exit(1);
3385 }
3386 }
3387 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap)3388 int Utils::readNames(string namefile, map<string, string>& nameMap) {
3389 try {
3390 //open input file
3391 ifstream in;
3392 openInputFile(namefile, in);
3393
3394 string rest = "";
3395 char buffer[4096];
3396 bool pairDone = false;
3397 bool columnOne = true;
3398 string firstCol, secondCol;
3399
3400 while (!in.eof()) {
3401 if (m->getControl_pressed()) { break; }
3402
3403 in.read(buffer, 4096);
3404 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3405
3406 for (int i = 0; i < pieces.size(); i++) {
3407 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3408 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3409
3410 if (pairDone) {
3411 checkName(firstCol);
3412 checkName(secondCol);
3413 nameMap[firstCol] = secondCol; pairDone = false; }
3414 }
3415 }
3416 in.close();
3417
3418 if (rest != "") {
3419 vector<string> pieces = splitWhiteSpace(rest);
3420
3421 for (int i = 0; i < pieces.size(); i++) {
3422 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3423 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3424
3425 if (pairDone) {
3426 checkName(firstCol);
3427 checkName(secondCol);
3428 nameMap[firstCol] = secondCol; pairDone = false; }
3429 }
3430 }
3431
3432 return nameMap.size();
3433
3434 }
3435 catch(exception& e) {
3436 m->errorOut(e, "Utils", "readNames");
3437 exit(1);
3438 }
3439 }
3440 /**********************************************************************************************************************/
readNames(string namefile,map<string,string> & nameMap,set<string> & namesToInclude)3441 int Utils::readNames(string namefile, map<string, string>& nameMap, set<string>& namesToInclude) {
3442 try {
3443 //open input file
3444 ifstream in;
3445 openInputFile(namefile, in);
3446
3447 string firstCol, secondCol;
3448
3449 while (!in.eof()) {
3450 if (m->getControl_pressed()) { break; }
3451
3452 in >> firstCol; gobble(in);
3453 in >> secondCol; gobble(in);
3454
3455 checkName(firstCol);
3456 checkName(secondCol);
3457
3458 vector<string> secondNames; splitAtComma(secondCol, secondNames);
3459
3460 secondCol = ""; firstCol = "";
3461
3462 for (int i = 0; i < secondNames.size(); i++) {
3463 if (namesToInclude.count(secondNames[i]) != 0) { //we want to include you
3464 secondCol += secondNames[i] + ",";
3465 if (firstCol == "") { firstCol = secondNames[i]; }
3466 }
3467 }
3468
3469 if (secondCol != "") {
3470 //remove last comma
3471 secondCol = secondCol.substr(0,secondCol.length()-1);
3472
3473 nameMap[firstCol] = secondCol;
3474 }
3475
3476 }
3477 in.close();
3478
3479
3480 return nameMap.size();
3481
3482 }
3483 catch(exception& e) {
3484 m->errorOut(e, "Utils", "readNames");
3485 exit(1);
3486 }
3487 }
3488
3489 /**********************************************************************************************************************/
readNames(string namefile,map<string,vector<string>> & nameMap)3490 int Utils::readNames(string namefile, map<string, vector<string> >& nameMap) {
3491 try {
3492 //open input file
3493 ifstream in;
3494 openInputFile(namefile, in);
3495
3496 string rest = "";
3497 char buffer[4096];
3498 bool pairDone = false;
3499 bool columnOne = true;
3500 string firstCol, secondCol;
3501
3502 while (!in.eof()) {
3503 if (m->getControl_pressed()) { break; }
3504
3505 in.read(buffer, 4096);
3506 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3507
3508 for (int i = 0; i < pieces.size(); i++) {
3509 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3510 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3511
3512 if (pairDone) {
3513 checkName(firstCol);
3514 checkName(secondCol);
3515 vector<string> temp;
3516 splitAtComma(secondCol, temp);
3517 nameMap[firstCol] = temp;
3518 pairDone = false;
3519 }
3520 }
3521 }
3522 in.close();
3523
3524 if (rest != "") {
3525 vector<string> pieces = splitWhiteSpace(rest);
3526
3527 for (int i = 0; i < pieces.size(); i++) {
3528 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3529 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3530
3531 if (pairDone) {
3532 checkName(firstCol);
3533 checkName(secondCol);
3534 vector<string> temp;
3535 splitAtComma(secondCol, temp);
3536 nameMap[firstCol] = temp;
3537 pairDone = false;
3538 }
3539 }
3540 }
3541
3542 return nameMap.size();
3543 }
3544 catch(exception& e) {
3545 m->errorOut(e, "Utils", "readNames");
3546 exit(1);
3547 }
3548 }
3549 /**********************************************************************************************************************/
readNames(string namefile)3550 map<string, int> Utils::readNames(string namefile) {
3551 try {
3552 map<string, int> nameMap;
3553
3554 //open input file
3555 ifstream in;
3556 openInputFile(namefile, in);
3557
3558
3559 string firstCol, secondCol;
3560
3561 while (!in.eof()) {
3562 if (m->getControl_pressed()) { break; }
3563
3564 in >> firstCol; gobble(in);
3565 in >> secondCol; gobble(in);
3566
3567 checkName(firstCol);
3568 checkName(secondCol);
3569 int num = getNumNames(secondCol);
3570 nameMap[firstCol] = num;
3571 }
3572 in.close();
3573
3574 return nameMap;
3575
3576 }
3577 catch(exception& e) {
3578 m->errorOut(e, "Utils", "readNames");
3579 exit(1);
3580 }
3581 }
3582 /**********************************************************************************************************************/
scanNames(string namefile)3583 int Utils::scanNames(string namefile) {
3584 try {
3585
3586 //open input file
3587 ifstream in;
3588 openInputFile(namefile, in);
3589
3590 int total = 0;
3591 string firstCol, secondCol;
3592
3593 while (!in.eof()) {
3594 if (m->getControl_pressed()) { break; }
3595
3596 in >> firstCol; gobble(in);
3597 in >> secondCol; gobble(in);
3598
3599 total += getNumNames(secondCol);
3600 }
3601 in.close();
3602
3603 return total;
3604
3605 }
3606 catch(exception& e) {
3607 m->errorOut(e, "Utils", "scanNames");
3608 exit(1);
3609 }
3610 }
3611 /**********************************************************************************************************************/
readNames(string namefile,map<string,long long> & nameMap)3612 void Utils::readNames(string namefile, map<string, long long>& nameMap) {
3613 try {
3614 //open input file
3615 ifstream in; openInputFile(namefile, in);
3616
3617 string rest = "";
3618 char buffer[4096];
3619 bool pairDone = false;
3620 bool columnOne = true;
3621 string firstCol, secondCol;
3622
3623 while (!in.eof()) {
3624 if (m->getControl_pressed()) { break; }
3625
3626 in.read(buffer, 4096);
3627 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3628
3629 for (int i = 0; i < pieces.size(); i++) {
3630 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3631 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3632
3633 if (pairDone) {
3634 checkName(firstCol);
3635 checkName(secondCol);
3636 long long num = getNumNames(secondCol);
3637 nameMap[firstCol] = num;
3638 pairDone = false;
3639 }
3640 }
3641 }
3642 in.close();
3643
3644 if (rest != "") {
3645 vector<string> pieces = splitWhiteSpace(rest);
3646 for (int i = 0; i < pieces.size(); i++) {
3647 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3648 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3649
3650 if (pairDone) {
3651 checkName(firstCol);
3652 checkName(secondCol);
3653 long long num = getNumNames(secondCol);
3654 nameMap[firstCol] = num;
3655 pairDone = false;
3656 }
3657 }
3658 }
3659 }
3660 catch(exception& e) {
3661 m->errorOut(e, "Utils", "readNames");
3662 exit(1);
3663 }
3664 }
3665
3666 /**********************************************************************************************************************/
readNames(string namefile,unsigned long int & numSeqs)3667 map<string, int> Utils::readNames(string namefile, unsigned long int& numSeqs) {
3668 try {
3669 map<string, int> nameMap;
3670 numSeqs = 0;
3671
3672 //open input file
3673 ifstream in;
3674 openInputFile(namefile, in);
3675
3676 string rest = "";
3677 char buffer[4096];
3678 bool pairDone = false;
3679 bool columnOne = true;
3680 string firstCol, secondCol;
3681
3682 while (!in.eof()) {
3683 if (m->getControl_pressed()) { break; }
3684
3685 in.read(buffer, 4096);
3686 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3687
3688 for (int i = 0; i < pieces.size(); i++) {
3689 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3690 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3691
3692 if (pairDone) {
3693 checkName(firstCol);
3694 checkName(secondCol);
3695 int num = getNumNames(secondCol);
3696 nameMap[firstCol] = num;
3697 pairDone = false;
3698 numSeqs += num;
3699 }
3700 }
3701 }
3702 in.close();
3703
3704 if (rest != "") {
3705 vector<string> pieces = splitWhiteSpace(rest);
3706 for (int i = 0; i < pieces.size(); i++) {
3707 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3708 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3709
3710 if (pairDone) {
3711 checkName(firstCol);
3712 checkName(secondCol);
3713 int num = getNumNames(secondCol);
3714 nameMap[firstCol] = num;
3715 pairDone = false;
3716 numSeqs += num;
3717 }
3718 }
3719 }
3720
3721 return nameMap;
3722
3723 }
3724 catch(exception& e) {
3725 m->errorOut(e, "Utils", "readNames");
3726 exit(1);
3727 }
3728 }
3729 //**********************************************************************************************************************
printVsearchFile(vector<seqPriorityNode> & nameMapCount,string filename,string tag,string tag2)3730 int Utils::printVsearchFile(vector<seqPriorityNode>& nameMapCount, string filename, string tag, string tag2){
3731 try {
3732
3733 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
3734
3735 ofstream out;
3736 openOutputFile(filename, out);
3737
3738 //print new file in order of
3739 for (int i = 0; i < nameMapCount.size(); i++) {
3740 if (m->getControl_pressed()) {break;}
3741 out << ">" << nameMapCount[i].name << tag << nameMapCount[i].numIdentical << tag2 << endl << nameMapCount[i].seq << endl;
3742 }
3743 out.close();
3744
3745 return 0;
3746 }
3747 catch(exception& e) {
3748 m->errorOut(e, "Utils", "printVsearchFile");
3749 exit(1);
3750 }
3751 }
3752 /************************************************************/
checkName(string & name)3753 int Utils::checkName(string& name) {
3754 try {
3755 if (modifyNames) {
3756 for (int i = 0; i < name.length(); i++) {
3757 if (name[i] == ':') { name[i] = '_'; m->setChangedSeqNames(true); }
3758 }
3759 }
3760 return 0;
3761 }
3762 catch(exception& e) {
3763 m->errorOut(e, "Utils", "checkName");
3764 exit(1);
3765 }
3766 }
3767 /************************************************************/
checkGroupName(string name)3768 bool Utils::checkGroupName(string name) {
3769 try {
3770
3771 bool goodName = true;
3772 for (int i = 0; i < name.length(); i++) {
3773 if (name[i] == ':') { goodName = false; break; }
3774 else if (name[i] == '-') { goodName = false; break; }
3775 else if (name[i] == '/') { goodName = false; break; }
3776 }
3777
3778 if (!goodName) {
3779 m->mothurOut("\n[WARNING]: group " + name + " contains illegal characters in the name. Group names should not include :, -, or / characters. The ':' character is a special character used in trees. Using ':' will result in your tree being unreadable by tree reading software. The '-' character is a special character used by mothur to parse group names. Using the '-' character will prevent you from selecting groups. The '/' character will created unreadable filenames when mothur includes the group in an output filename.\n\n");
3780 }
3781
3782 return goodName;
3783 }
3784 catch(exception& e) {
3785 m->errorOut(e, "Utils", "checkGroupName");
3786 exit(1);
3787 }
3788 }
3789 /**********************************************************************************************************************/
readNames(string namefile,vector<seqPriorityNode> & nameVector,map<string,string> & fastamap)3790 int Utils::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
3791 try {
3792 int error = 0;
3793
3794 //open input file
3795 ifstream in;
3796 openInputFile(namefile, in);
3797
3798 string rest = "";
3799 char buffer[4096];
3800 bool pairDone = false;
3801 bool columnOne = true;
3802 string firstCol, secondCol;
3803
3804 while (!in.eof()) {
3805 if (m->getControl_pressed()) { break; }
3806
3807 in.read(buffer, 4096);
3808 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3809
3810 for (int i = 0; i < pieces.size(); i++) {
3811 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3812 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3813
3814 if (pairDone) {
3815 checkName(firstCol);
3816 checkName(secondCol);
3817 int num = getNumNames(secondCol);
3818
3819 map<string, string>::iterator it = fastamap.find(firstCol);
3820 if (it == fastamap.end()) {
3821 error = 1;
3822 m->mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct.\n");
3823 }else {
3824 seqPriorityNode temp(num, it->second, firstCol);
3825 nameVector.push_back(temp);
3826 }
3827
3828 pairDone = false;
3829 }
3830 }
3831 }
3832 in.close();
3833
3834 if (rest != "") {
3835 vector<string> pieces = splitWhiteSpace(rest);
3836
3837 for (int i = 0; i < pieces.size(); i++) {
3838 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
3839 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
3840
3841 if (pairDone) {
3842 checkName(firstCol);
3843 checkName(secondCol);
3844 int num = getNumNames(secondCol);
3845
3846 map<string, string>::iterator it = fastamap.find(firstCol);
3847 if (it == fastamap.end()) {
3848 error = 1;
3849 m->mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct.\n");
3850 }else {
3851 seqPriorityNode temp(num, it->second, firstCol);
3852 nameVector.push_back(temp);
3853 }
3854
3855 pairDone = false;
3856 }
3857 }
3858 }
3859 return error;
3860 }
3861 catch(exception& e) {
3862 m->errorOut(e, "Utils", "readNames");
3863 exit(1);
3864 }
3865 }
3866 //**********************************************************************************************************************
readAccnos(string accnosfile)3867 set<string> Utils::readAccnos(string accnosfile){
3868 try {
3869 set<string> names;
3870 ifstream in;
3871 bool ableToOpen = openInputFile(accnosfile, in, "");
3872 if (!ableToOpen) { m->mothurOut("[ERROR]: Could not open " + accnosfile + "\n"); return names; }
3873 string name;
3874
3875 while (!in.eof()) {
3876 if (m->getControl_pressed()) { break; }
3877
3878 in >> name; gobble(in);
3879
3880 checkName(name);
3881 names.insert(name);
3882 }
3883 in.close();
3884
3885 return names;
3886 }
3887 catch(exception& e) {
3888 m->errorOut(e, "Utils", "readAccnos");
3889 exit(1);
3890 }
3891 }
3892 //**********************************************************************************************************************
printAccnos(string accnosfile,vector<string> & names)3893 void Utils::printAccnos(string accnosfile, vector<string>& names){
3894 try {
3895 ofstream out; openOutputFile(accnosfile, out);
3896
3897 //output to .accnos file
3898 for (int i = 0; i < names.size(); i++) {
3899
3900 if (m->getControl_pressed()) { break; }
3901
3902 out << names[i] << endl;
3903 }
3904 out.close();
3905 }
3906 catch(exception& e) {
3907 m->errorOut(e, "Utils", "printAccnos");
3908 exit(1);
3909 }
3910 }
3911 //**********************************************************************************************************************
printAccnos(string accnosfile,set<string> & names)3912 void Utils::printAccnos(string accnosfile, set<string>& names){
3913 try {
3914 ofstream out; openOutputFile(accnosfile, out);
3915
3916 //output to .accnos file
3917 for (set<string>::iterator it = names.begin(); it != names.end(); it++) {
3918
3919 if (m->getControl_pressed()) { break; }
3920
3921 out << *it << endl;
3922 }
3923 out.close();
3924 }
3925 catch(exception& e) {
3926 m->errorOut(e, "Utils", "printAccnos");
3927 exit(1);
3928 }
3929 }
3930 //**********************************************************************************************************************
readAccnos(string accnosfile,vector<string> & names)3931 int Utils::readAccnos(string accnosfile, vector<string>& names){
3932 try {
3933 names.clear();
3934 ifstream in;
3935 openInputFile(accnosfile, in);
3936 string name;
3937
3938 string rest = "";
3939 char buffer[4096];
3940
3941 while (!in.eof()) {
3942 if (m->getControl_pressed()) { break; }
3943
3944 in.read(buffer, 4096);
3945 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
3946
3947 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
3948 }
3949 in.close();
3950
3951 if (rest != "") {
3952 vector<string> pieces = splitWhiteSpace(rest);
3953 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
3954 }
3955
3956 return 0;
3957 }
3958 catch(exception& e) {
3959 m->errorOut(e, "Utils", "readAccnos");
3960 exit(1);
3961 }
3962 }
3963 //**********************************************************************************************************************
readAccnos(string accnosfile,vector<string> & names,string noerror)3964 int Utils::readAccnos(string accnosfile, vector<string>& names, string noerror){
3965 try {
3966 names.clear();
3967 ifstream in;
3968 openInputFile(accnosfile, in, noerror);
3969 string name;
3970
3971 while (!in.eof()) {
3972 if (m->getControl_pressed()) { break; }
3973
3974 string line = trimWhiteSpace(getline(in));
3975 checkName(line);
3976 if (line != "") { names.push_back(line); }
3977 }
3978 in.close();
3979
3980 return 0;
3981 }
3982 catch(exception& e) {
3983 m->errorOut(e, "Utils", "readAccnos");
3984 exit(1);
3985 }
3986 }
3987 /***********************************************************************/
3988
getNumNames(string names)3989 int Utils::getNumNames(string names){
3990 try {
3991 int count = 0;
3992
3993 if(names != ""){
3994 count = 1;
3995 for(int i=0;i<names.size();i++){
3996 if(names[i] == ','){
3997 count++;
3998 }
3999 }
4000 }
4001
4002 return count;
4003 }
4004 catch(exception& e) {
4005 m->errorOut(e, "Utils", "getNumNames");
4006 exit(1);
4007 }
4008 }
4009 /***********************************************************************/
4010
getNumChar(string line,char c)4011 int Utils::getNumChar(string line, char c){
4012 try {
4013 int count = 0;
4014
4015 if(line != ""){
4016 for(int i=0;i<line.size();i++){
4017 if(line[i] == c){
4018 count++;
4019 }
4020 }
4021 }
4022
4023 return count;
4024 }
4025 catch(exception& e) {
4026 m->errorOut(e, "Utils", "getNumChar");
4027 exit(1);
4028 }
4029 }
4030 /***********************************************************************/
getSimpleLabel(string label)4031 string Utils::getSimpleLabel(string label){
4032 try {
4033 string simple = "";
4034
4035 //remove OTU or phylo tag
4036 string newLabel1 = "";
4037 for (int i = 0; i < label.length(); i++) {
4038 if(label[i]>47 && label[i]<58) { //is a digit
4039 newLabel1 += label[i];
4040 }
4041 }
4042
4043 int num1;
4044
4045 mothurConvert(newLabel1, num1);
4046
4047 simple = toString(num1);
4048
4049 return simple;
4050 }
4051 catch(exception& e) {
4052 m->errorOut(e, "Utils", "getSimpleLabel");
4053 exit(1);
4054 }
4055 }
4056 /***********************************************************************/
4057
isLabelEquivalent(string label1,string label2)4058 bool Utils::isLabelEquivalent(string label1, string label2){
4059 try {
4060 bool same = false;
4061
4062 //remove OTU or phylo tag
4063 string newLabel1 = "";
4064 for (int i = 0; i < label1.length(); i++) {
4065 if(label1[i]>47 && label1[i]<58) { //is a digit
4066 newLabel1 += label1[i];
4067 }
4068 }
4069
4070 string newLabel2 = "";
4071 for (int i = 0; i < label2.length(); i++) {
4072 if(label2[i]>47 && label2[i]<58) { //is a digit
4073 newLabel2 += label2[i];
4074 }
4075 }
4076
4077 int num1, num2;
4078 mothurConvert(newLabel1, num1);
4079 mothurConvert(newLabel2, num2);
4080
4081 if (num1 == num2) { same = true; }
4082
4083 return same;
4084 }
4085 catch(exception& e) {
4086 m->errorOut(e, "Utils", "isLabelEquivalent");
4087 exit(1);
4088 }
4089 }
4090 //**********************************************************************************************************************
isSubset(vector<string> bigset,vector<string> subset)4091 bool Utils::isSubset(vector<string> bigset, vector<string> subset) {
4092 try {
4093
4094
4095 if (subset.size() > bigset.size()) { return false; }
4096
4097 //check if each guy in subset is also in bigset
4098 for (int i = 0; i < subset.size(); i++) {
4099 bool match = false;
4100 for (int j = 0; j < bigset.size(); j++) {
4101 if (subset[i] == bigset[j]) { match = true; break; }
4102 }
4103
4104 //you have a guy in subset that had no match in bigset
4105 if (!match) { return false; }
4106 }
4107
4108 return true;
4109
4110 }
4111 catch(exception& e) {
4112 m->errorOut(e, "Utils", "isSubset");
4113 exit(1);
4114 }
4115 }
4116 /***********************************************************************/
mothurRemove(string filename)4117 bool Utils::mothurRemove(string filename){
4118 try {
4119 filename = getFullPathName(filename);
4120 int error = remove(filename.c_str());
4121 return error;
4122 }
4123 catch(exception& e) {
4124 m->errorOut(e, "Utils", "mothurRemove");
4125 exit(1);
4126 }
4127 }
4128 /***********************************************************************/
mothurConvert(string item)4129 char* Utils::mothurConvert(string item){
4130 try {
4131 char* converted = new char[item.length()+1];
4132
4133 *converted = '\0'; strncat(converted, item.c_str(), item.length());
4134
4135 //size_t size = item.length()+1;
4136
4137 //strncat(converted, item.c_str(), size-strlen(converted)-1);
4138
4139 //converted[size-1] = '\0';
4140
4141 //if (m->getDebug()) { m->mothurOut("[DEBUG]: converting string " + item + " to char* " + converted + "\n"); }
4142
4143 return converted;
4144 }
4145 catch(exception& e) {
4146 m->errorOut(e, "Utils", "mothurConvert-char*");
4147 exit(1);
4148 }
4149 }
4150 /***********************************************************************/
mothurConvert(string item,int & num)4151 bool Utils::mothurConvert(string item, int& num){
4152 try {
4153 bool error = false;
4154
4155 if (isNumeric1(item)) { convert(item, num); }
4156 else {
4157 num = 0;
4158 error = true;
4159 m->mothurOut("[ERROR]: cannot convert " + item + " to an integer.\n");
4160 m->setControl_pressed(true);
4161 }
4162
4163 return error;
4164 }
4165 catch(exception& e) {
4166 m->errorOut(e, "Utils", "mothurConvert-int");
4167 exit(1);
4168 }
4169 }
4170 /***********************************************************************/
mothurConvert(char item,int & num)4171 bool Utils::mothurConvert(char item, int& num){
4172 try {
4173 bool error = false;
4174
4175 if (isdigit(item)) {
4176 string mystring; mothurConvert(item, mystring);
4177 mothurConvert(mystring, num);
4178 }else {
4179 num = 0;
4180 error = true;
4181 m->mothurOut("[ERROR]: cannot convert " + toString(item) + " to an integer.\n");
4182 m->setControl_pressed(true);
4183 }
4184
4185 return error;
4186 }
4187 catch(exception& e) {
4188 m->errorOut(e, "Utils", "mothurConvert-int");
4189 exit(1);
4190 }
4191 }
4192 /***********************************************************************/
mothurConvert(char item,string & output)4193 bool Utils::mothurConvert(char item, string& output){
4194 try {
4195
4196 stringstream ss;
4197 ss << item;
4198 ss >> output;
4199 return true;
4200
4201 }
4202 catch(exception& e) {
4203 m->errorOut(e, "Utils", "mothurConvert-char");
4204 exit(1);
4205 }
4206 }
4207 /***********************************************************************/
mothurConvert(string item,intDist & num)4208 bool Utils::mothurConvert(string item, intDist& num){
4209 try {
4210 bool error = false;
4211
4212 if (isNumeric1(item)) {
4213 convert(item, num);
4214 }else {
4215 num = 0;
4216 error = true;
4217 m->mothurOut("[ERROR]: cannot convert " + item + " to an integer.\n");
4218 m->setControl_pressed(true);
4219 }
4220
4221 return error;
4222 }
4223 catch(exception& e) {
4224 m->errorOut(e, "Utils", "mothurConvert-intDist");
4225 exit(1);
4226 }
4227 }
4228 /***********************************************************************/
mothurConvert(vector<long long> & input)4229 set<long long> Utils::mothurConvert(vector<long long>& input){
4230 try {
4231 set<long long> output(input.begin(), input.end());
4232
4233
4234 return output;
4235 }
4236 catch(exception& e) {
4237 m->errorOut(e, "Utils", "mothurConvert-vectorToSet");
4238 exit(1);
4239 }
4240 }
4241 /***********************************************************************/
mothurConvert(set<long long> & input)4242 vector<long long> Utils::mothurConvert(set<long long>& input){
4243 try {
4244 vector<long long> output(input.begin(), input.end());
4245
4246
4247 return output;
4248 }
4249 catch(exception& e) {
4250 m->errorOut(e, "Utils", "mothurConvert-SetToVector");
4251 exit(1);
4252 }
4253 }
4254 /***********************************************************************/
mothurConvert(vector<string> & input)4255 set<string> Utils::mothurConvert(vector<string>& input){
4256 try {
4257 set<string> output(input.begin(), input.end());
4258
4259
4260 return output;
4261 }
4262 catch(exception& e) {
4263 m->errorOut(e, "Utils", "mothurConvert-vectorToSet");
4264 exit(1);
4265 }
4266 }
4267 /***********************************************************************/
mothurConvert(set<string> & input)4268 vector<string> Utils::mothurConvert(set<string>& input){
4269 try {
4270 vector<string> output(input.begin(), input.end());
4271
4272
4273 return output;
4274 }
4275 catch(exception& e) {
4276 m->errorOut(e, "Utils", "mothurConvert-SetToVector");
4277 exit(1);
4278 }
4279 }
4280 /**************************************************************************************************/
addUnclassifieds(string tax,int maxlevel,bool probs)4281 string Utils::addUnclassifieds(string tax, int maxlevel, bool probs) {
4282 try{
4283 string newTax, taxon;
4284
4285 string savedTax = tax;
4286 vector<string> taxons; splitAtChar(tax, taxons, ';'); taxons.pop_back();
4287 vector<int> confidences;
4288
4289 if (taxons.size() == maxlevel) { return savedTax; }
4290
4291 int index = 0;
4292 int confidence = 0;
4293 int level = 1;
4294 for (int i = 0; i < taxons.size(); i++) {
4295 index = i;
4296 string thisTax = taxons[i]+";";
4297 confidence = removeConfidences(thisTax);
4298 confidences.push_back(confidence);
4299
4300 if (thisTax == "unclassified;"){ index--; break; }
4301 else{ newTax += taxons[i] + ";"; }
4302 }
4303 level = index+1;
4304
4305 string thisTax = taxons[index]+";";
4306
4307 removeConfidences(thisTax);
4308 taxon = thisTax.substr(0, thisTax.length()-1);
4309
4310 string cTax = "";
4311 if (probs) { cTax = taxon + "_unclassified(" + toString(confidences[index]) + ");"; }
4312 else { cTax = taxon + "_unclassified;"; }
4313
4314 //add "unclassified" until you reach maxLevel
4315 while (level < maxlevel) {
4316 newTax += cTax;
4317 level++;
4318 }
4319
4320 return newTax;
4321 }
4322 catch(exception& e) {
4323 m->errorOut(e, "Utils", "addUnclassifieds");
4324 exit(1);
4325 }
4326 }
4327 /**************************************************************************************************/
trimTax(string tax,int trimLevel)4328 string Utils::trimTax(string tax, int trimLevel) {
4329 try{
4330 string newTax = "";
4331 string savedTax = tax;
4332 vector<string> taxons; splitAtChar(tax, taxons, ';'); taxons.pop_back();
4333
4334 if (taxons.size() == trimLevel) { return savedTax; }
4335 else {
4336 int level = 0;
4337 for (int i = 0; i < taxons.size(); i++) {
4338 newTax += taxons[i] +";";
4339 level++;
4340 if (level == trimLevel) { break; }
4341 }
4342 }
4343
4344 return newTax;
4345 }
4346 catch(exception& e) {
4347 m->errorOut(e, "Utils", "trimTax");
4348 exit(1);
4349 }
4350 }
4351 /**************************************************************************************************/
toUpper(string item)4352 string Utils::toUpper(string item) {
4353 try{
4354 string newItem = "";
4355
4356 for (int i = 0; i < item.length(); i++) {
4357 newItem += toupper(item[i]);
4358 }
4359 return newItem;
4360 }
4361 catch(exception& e) {
4362 m->errorOut(e, "Utils", "toUpper");
4363 exit(1);
4364 }
4365 }
4366 /**************************************************************************************************/
toLower(string item)4367 string Utils::toLower(string item) {
4368 try{
4369 string newItem = "";
4370
4371 for (int i = 0; i < item.length(); i++) {
4372 newItem += tolower(item[i]);
4373 }
4374 return newItem;
4375 }
4376 catch(exception& e) {
4377 m->errorOut(e, "Utils", "toLower");
4378 exit(1);
4379 }
4380 }
4381 /***********************************************************************/
isNumeric1(string stringToCheck)4382 bool Utils::isNumeric1(string stringToCheck){
4383 try {
4384 bool numeric = false;
4385
4386 if (stringToCheck == "") { numeric = false; }
4387 else if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
4388
4389 return numeric;
4390 }
4391 catch(exception& e) {
4392 m->errorOut(e, "Utils", "isNumeric1");
4393 exit(1);
4394 }
4395
4396 }
4397 /***********************************************************************/
isPositiveNumeric(string stringToCheck)4398 bool Utils::isPositiveNumeric(string stringToCheck){
4399 try {
4400 bool numeric = false;
4401
4402 if (stringToCheck == "") { numeric = false; }
4403 else if(stringToCheck.find_first_not_of("0123456789.") == string::npos) { numeric = true; }
4404
4405 return numeric;
4406 }
4407 catch(exception& e) {
4408 m->errorOut(e, "Utils", "isPositiveNumeric");
4409 exit(1);
4410 }
4411
4412 }
4413 /***********************************************************************/
isEqual(float num1,float num2)4414 bool Utils::isEqual(float num1, float num2){
4415 try {
4416 bool equal = false;
4417
4418 if (fabs(num1-num2) <= fabs(num1 * 0.001)) { equal = true; }
4419
4420 return equal;
4421 }
4422 catch(exception& e) {
4423 m->errorOut(e, "Utils", "isEqual");
4424 exit(1);
4425 }
4426 }
4427 /***********************************************************************/
isEqual(double num1,double num2)4428 bool Utils::isEqual(double num1, double num2){
4429 try {
4430 bool equal = false;
4431
4432 if (fabs(num1-num2) <= fabs(num1 * 0.001)) { equal = true; }
4433
4434 return equal;
4435 }
4436 catch(exception& e) {
4437 m->errorOut(e, "Utils", "isEqual");
4438 exit(1);
4439 }
4440 }
4441 /***********************************************************************/
allSpaces(string stringToCheck)4442 bool Utils::allSpaces(string stringToCheck){
4443 try {
4444
4445 for (int i = 0; i < stringToCheck.length(); i++) {
4446 char c = stringToCheck[i];
4447 if (!isspace(c)) { return false; }
4448 }
4449
4450 return true;
4451 }
4452 catch(exception& e) {
4453 m->errorOut(e, "Utils", "isNumeric1");
4454 exit(1);
4455 }
4456
4457 }
4458 /***********************************************************************/
isInteger(string stringToCheck)4459 bool Utils::isInteger(string stringToCheck){
4460 try {
4461 bool isInt = false;
4462
4463 if(stringToCheck.find_first_not_of("0123456789-") == string::npos) { isInt = true; }
4464
4465 return isInt;
4466 }
4467 catch(exception& e) {
4468 m->errorOut(e, "Utils", "isInteger");
4469 exit(1);
4470 }
4471
4472 }
4473 /***********************************************************************/
containsAlphas(string stringToCheck)4474 bool Utils::containsAlphas(string stringToCheck){
4475 try {
4476 bool containsAlpha = false;
4477
4478 if(stringToCheck.find_first_of("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOopPQqRrSsTtUuVvWwXxYyZz") != string::npos) { containsAlpha = true; }
4479
4480 return containsAlpha;
4481 }
4482 catch(exception& e) {
4483 m->errorOut(e, "Utils", "containsAlphas");
4484 exit(1);
4485 }
4486
4487 }
4488 /***********************************************************************/
isAllAlphas(string stringToCheck)4489 bool Utils::isAllAlphas(string stringToCheck){
4490 try {
4491 bool allAlphas = true;
4492
4493 if(stringToCheck.find_first_not_of("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOopPQqRrSsTtUuVvWwXxYyZz") != string::npos) { allAlphas = false; }
4494
4495 return allAlphas;
4496 }
4497 catch(exception& e) {
4498 m->errorOut(e, "Utils", "isAllAlphas");
4499 exit(1);
4500 }
4501
4502 }
4503 /***********************************************************************/
isAllAlphaNumerics(string stringToCheck)4504 bool Utils::isAllAlphaNumerics(string stringToCheck){
4505 try {
4506 bool allAlphaNumerics = true;
4507
4508 if(stringToCheck.find_first_not_of("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOopPQqRrSsTtUuVvWwXxYyZz0123456789") != string::npos) { allAlphaNumerics = false; }
4509
4510 return allAlphaNumerics;
4511 }
4512 catch(exception& e) {
4513 m->errorOut(e, "Utils", "isAllAlphas");
4514 exit(1);
4515 }
4516
4517 }
4518 /***********************************************************************/
mothurConvert(string item,float & num)4519 bool Utils::mothurConvert(string item, float& num){
4520 try {
4521 bool error = false;
4522
4523 if (isNumeric1(item)) {
4524 convert(item, num);
4525 }else {
4526 try {
4527 num = atof(item.c_str());
4528 }catch(exception& e) {
4529 num = 0;
4530 error = true;
4531 m->mothurOut("[ERROR]: cannot convert " + item + " to a float.\n");
4532 m->setControl_pressed(true);
4533 }
4534 }
4535
4536 return error;
4537 }
4538 catch(exception& e) {
4539 m->errorOut(e, "Utils", "mothurConvert-float");
4540 exit(1);
4541 }
4542 }
4543 /***********************************************************************/
mothurConvert(string item,double & num)4544 bool Utils::mothurConvert(string item, double& num){
4545 try {
4546 bool error = false;
4547
4548 if (isNumeric1(item)) {
4549 convert(item, num);
4550 }else {
4551 try {
4552 num = atof(item.c_str());
4553 }catch(exception& e) {
4554 num = 0;
4555 error = true;
4556 m->mothurOut("[ERROR]: cannot convert " + item + " to a double.\n");
4557 m->setControl_pressed(true);
4558 }
4559 }
4560
4561 return error;
4562 }
4563 catch(exception& e) {
4564 m->errorOut(e, "Utils", "mothurConvert-double");
4565 exit(1);
4566 }
4567 }
4568 /**************************************************************************************************/
4569
binomial(int maxOrder)4570 vector<vector<double> > Utils::binomial(int maxOrder){
4571 try {
4572 vector<vector<double> > binomial(maxOrder+1);
4573
4574 for(int i=0;i<=maxOrder;i++){
4575 binomial[i].resize(maxOrder+1);
4576 binomial[i][0]=1;
4577 binomial[0][i]=0;
4578 }
4579 binomial[0][0]=1;
4580
4581 binomial[1][0]=1;
4582 binomial[1][1]=1;
4583
4584 for(int i=2;i<=maxOrder;i++){
4585 binomial[1][i]=0;
4586 }
4587
4588 for(int i=2;i<=maxOrder;i++){
4589 for(int j=1;j<=maxOrder;j++){
4590 if(i==j){ binomial[i][j]=1; }
4591 if(j>i) { binomial[i][j]=0; }
4592 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
4593 }
4594 }
4595
4596 return binomial;
4597
4598 }
4599 catch(exception& e) {
4600 m->errorOut(e, "Utils", "binomial");
4601 exit(1);
4602 }
4603 }
4604 /**************************************************************************************************/
fromBase36(string base36)4605 unsigned int Utils::fromBase36(string base36){
4606 try {
4607 unsigned int num = 0;
4608
4609 map<char, int> converts;
4610 converts['A'] = 0;
4611 converts['a'] = 0;
4612 converts['B'] = 1;
4613 converts['b'] = 1;
4614 converts['C'] = 2;
4615 converts['c'] = 2;
4616 converts['D'] = 3;
4617 converts['d'] = 3;
4618 converts['E'] = 4;
4619 converts['e'] = 4;
4620 converts['F'] = 5;
4621 converts['f'] = 5;
4622 converts['G'] = 6;
4623 converts['g'] = 6;
4624 converts['H'] = 7;
4625 converts['h'] = 7;
4626 converts['I'] = 8;
4627 converts['i'] = 8;
4628 converts['J'] = 9;
4629 converts['j'] = 9;
4630 converts['K'] = 10;
4631 converts['k'] = 10;
4632 converts['L'] = 11;
4633 converts['l'] = 11;
4634 converts['M'] = 12;
4635 converts['m'] = 12;
4636 converts['N'] = 13;
4637 converts['n'] = 13;
4638 converts['O'] = 14;
4639 converts['o'] = 14;
4640 converts['P'] = 15;
4641 converts['p'] = 15;
4642 converts['Q'] = 16;
4643 converts['q'] = 16;
4644 converts['R'] = 17;
4645 converts['r'] = 17;
4646 converts['S'] = 18;
4647 converts['s'] = 18;
4648 converts['T'] = 19;
4649 converts['t'] = 19;
4650 converts['U'] = 20;
4651 converts['u'] = 20;
4652 converts['V'] = 21;
4653 converts['v'] = 21;
4654 converts['W'] = 22;
4655 converts['w'] = 22;
4656 converts['X'] = 23;
4657 converts['x'] = 23;
4658 converts['Y'] = 24;
4659 converts['y'] = 24;
4660 converts['Z'] = 25;
4661 converts['z'] = 25;
4662 converts['0'] = 26;
4663 converts['1'] = 27;
4664 converts['2'] = 28;
4665 converts['3'] = 29;
4666 converts['4'] = 30;
4667 converts['5'] = 31;
4668 converts['6'] = 32;
4669 converts['7'] = 33;
4670 converts['8'] = 34;
4671 converts['9'] = 35;
4672
4673 int i = 0;
4674 while (i < base36.length()) {
4675 char c = base36[i];
4676 num = 36 * num + converts[c];
4677 i++;
4678 }
4679
4680 return num;
4681
4682 }
4683 catch(exception& e) {
4684 m->errorOut(e, "Utils", "fromBase36");
4685 exit(1);
4686 }
4687 }
4688 /***********************************************************************/
findEdianness()4689 string Utils::findEdianness() {
4690 try {
4691 // find real endian type
4692 string endianType = "unknown";
4693 int num = 1;
4694 if(*(char *)&num == 1)
4695 {
4696 endianType = "LITTLE_ENDIAN";
4697 }
4698 else
4699 {
4700 endianType = "BIG_ENDIAN";
4701 }
4702 return endianType;
4703 }
4704 catch(exception& e) {
4705 m->errorOut(e, "Utils", "findEdianness");
4706 exit(1);
4707 }
4708 }
4709 /***********************************************************************/
median(vector<double> x)4710 double Utils::median(vector<double> x) {
4711 try {
4712 double value = 0.0;
4713
4714 if (x.size() == 0) { } //error
4715 else {
4716 //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2.
4717 sort(x.begin(), x.end());
4718 //is x.size even?
4719 if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints
4720 int midIndex1 = (x.size()/2)-1;
4721 int midIndex2 = (x.size()/2);
4722 value = (x[midIndex1]+ x[midIndex2]) / 2.0;
4723 }else {
4724 int midIndex = (x.size()/2);
4725 value = x[midIndex];
4726 }
4727 }
4728 return value;
4729 }
4730 catch(exception& e) {
4731 m->errorOut(e, "Utils", "median");
4732 exit(1);
4733 }
4734 }
4735 /***********************************************************************/
median(vector<int> x)4736 int Utils::median(vector<int> x) {
4737 try {
4738 double value = 0;
4739
4740 if (x.size() == 0) { } //error
4741 else {
4742 //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2.
4743 sort(x.begin(), x.end());
4744 //is x.size even?
4745 if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints
4746 int midIndex1 = (x.size()/2)-1;
4747 int midIndex2 = (x.size()/2);
4748 value = (x[midIndex1]+ x[midIndex2]) / 2.0;
4749 }else {
4750 int midIndex = (x.size()/2);
4751 value = x[midIndex];
4752 }
4753 }
4754 return (int) value;
4755 }
4756 catch(exception& e) {
4757 m->errorOut(e, "Utils", "median - int");
4758 exit(1);
4759 }
4760 }
4761 /***********************************************************************/
average(vector<int> x)4762 int Utils::average(vector<int> x) {
4763 try {
4764 int value = 0;
4765
4766 for (int i = 0; i < x.size(); i++) {
4767 if (m->getControl_pressed()) { break; }
4768 value += x[i];
4769 }
4770
4771 return ((int) value / x.size());
4772 }
4773 catch(exception& e) {
4774 m->errorOut(e, "Utils", "average - int");
4775 exit(1);
4776 }
4777 }
factorial(int num)4778 int Utils::factorial(int num){
4779 try {
4780 int total = 1;
4781
4782 for (int i = 1; i <= num; i++) {
4783 total *= i;
4784 }
4785
4786 return total;
4787 }
4788 catch(exception& e) {
4789 m->errorOut(e, "Utils", "factorial");
4790 exit(1);
4791 }
4792 }
4793 /***********************************************************************/
getAlignmentLength(string file)4794 int Utils::getAlignmentLength(string file){
4795 try {
4796 ifstream in; openInputFile(file, in);
4797
4798 Sequence seq(in);
4799
4800 in.close();
4801
4802 return seq.getAlignLength();
4803 }
4804 catch(exception& e) {
4805 m->errorOut(e, "Utils", "getAlignmentLength");
4806 exit(1);
4807 }
4808 }
4809
4810 /***********************************************************************/
4811
getNumSeqs(ifstream & file)4812 int Utils::getNumSeqs(ifstream& file){
4813 try {
4814 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
4815 file.seekg(0);
4816 return numSeqs;
4817 }
4818 catch(exception& e) {
4819 m->errorOut(e, "Utils", "getNumSeqs");
4820 exit(1);
4821 }
4822 }
4823 /***********************************************************************/
getNumSeqs(ifstream & file,int & numSeqs)4824 void Utils::getNumSeqs(ifstream& file, int& numSeqs){
4825 try {
4826 string input;
4827 numSeqs = 0;
4828 while(!file.eof()){
4829 input = getline(file);
4830 if (input.length() != 0) {
4831 if(input[0] == '>'){ numSeqs++; }
4832 }
4833 }
4834 }
4835 catch(exception& e) {
4836 m->errorOut(e, "Utils", "getNumSeqs");
4837 exit(1);
4838 }
4839 }
4840 /***********************************************************************/
4841
4842 //This function parses the estimator options and puts them in a vector
splitAtChar(string & estim,vector<string> & container,char symbol)4843 void Utils::splitAtChar(string& estim, vector<string>& container, char symbol) {
4844 try {
4845
4846 if (symbol == '-') { splitAtDash(estim, container); return; }
4847
4848 string individual = "";
4849 int estimLength = estim.size();
4850 for(int i=0;i<estimLength;i++){
4851 if(estim[i] == symbol){
4852 container.push_back(individual);
4853 individual = "";
4854 }
4855 else{
4856 individual += estim[i];
4857 }
4858 }
4859 container.push_back(individual);
4860
4861 }
4862 catch(exception& e) {
4863 m->errorOut(e, "Utils", "splitAtChar");
4864 exit(1);
4865 }
4866 }
4867 /***********************************************************************/
4868
4869 //This function parses the estimator options and puts them in a vector
splitAtChar(string & estim,set<string> & container,char symbol)4870 void Utils::splitAtChar(string& estim, set<string>& container, char symbol) {
4871 try {
4872
4873 if (symbol == '-') { splitAtDash(estim, container); return; }
4874
4875 string individual = "";
4876 int estimLength = estim.size();
4877 for(int i=0;i<estimLength;i++){
4878 if(estim[i] == symbol){
4879 container.insert(individual);
4880 individual = "";
4881 }
4882 else{
4883 individual += estim[i];
4884 }
4885 }
4886 container.insert(individual);
4887
4888 }
4889 catch(exception& e) {
4890 m->errorOut(e, "Utils", "splitAtChar");
4891 exit(1);
4892 }
4893 }
4894
4895 /***********************************************************************/
4896
4897 //This function parses the estimator options and puts them in a vector
splitAtDash(string & estim,vector<string> & container)4898 void Utils::splitAtDash(string& estim, vector<string>& container) {
4899 try {
4900 string individual = "";
4901 int estimLength = estim.size();
4902 bool prevEscape = false;
4903
4904 for(int i=0;i<estimLength;i++){
4905 if(estim[i] == '-'){
4906 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
4907 else {
4908 container.push_back(individual);
4909 individual = "";
4910 }
4911 }else if(estim[i] == '\\'){
4912 if (i < estimLength-1) {
4913 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
4914 else { individual += estim[i]; prevEscape = false; } //if no, add in
4915 }else { individual += estim[i]; }
4916 }else {
4917 individual += estim[i];
4918 }
4919 }
4920
4921
4922
4923 container.push_back(individual);
4924 }
4925 catch(exception& e) {
4926 m->errorOut(e, "Utils", "splitAtDash");
4927 exit(1);
4928 }
4929 }
4930
4931 /***********************************************************************/
4932 //This function parses the label options and puts them in a set
splitAtDash(string & estim,set<string> & container)4933 void Utils::splitAtDash(string& estim, set<string>& container) {
4934 try {
4935 string individual = "";
4936 int estimLength = estim.size();
4937 bool prevEscape = false;
4938
4939 for(int i=0;i<estimLength;i++){
4940 if(estim[i] == '-'){
4941 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
4942 else {
4943 container.insert(individual);
4944 individual = "";
4945 }
4946 }else if(estim[i] == '\\'){
4947 if (i < estimLength-1) {
4948 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
4949 else { individual += estim[i]; prevEscape = false; } //if no, add in
4950 }else { individual += estim[i]; }
4951 }else {
4952 individual += estim[i];
4953 }
4954 }
4955 container.insert(individual);
4956
4957 }
4958 catch(exception& e) {
4959 m->errorOut(e, "Utils", "splitAtDash");
4960 exit(1);
4961 }
4962 }
4963 /***********************************************************************/
4964 //This function parses the line options and puts them in a set
splitAtDash(string & estim,set<int> & container)4965 void Utils::splitAtDash(string& estim, set<int>& container) {
4966 try {
4967 string individual = "";
4968 int lineNum;
4969 int estimLength = estim.size();
4970 bool prevEscape = false;
4971
4972 for(int i=0;i<estimLength;i++){
4973 if(estim[i] == '-'){
4974 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
4975 else {
4976 convert(individual, lineNum); //convert the string to int
4977 container.insert(lineNum);
4978 individual = "";
4979 }
4980 }else if(estim[i] == '\\'){
4981 if (i < estimLength-1) {
4982 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
4983 else { individual += estim[i]; prevEscape = false; } //if no, add in
4984 }else { individual += estim[i]; }
4985 }else {
4986 individual += estim[i];
4987 }
4988 }
4989
4990 convert(individual, lineNum); //convert the string to int
4991 container.insert(lineNum);
4992 }
4993 catch(exception& e) {
4994 m->errorOut(e, "Utils", "splitAtDash");
4995 exit(1);
4996 }
4997 }
4998
4999 /***********************************************************************/
makeList(vector<string> & names)5000 string Utils::makeList(vector<string>& names) {
5001 try {
5002 string list = "";
5003
5004 if (names.size() == 0) { return list; }
5005
5006 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
5007
5008 //get last name
5009 list += names[names.size()-1];
5010
5011 return list;
5012 }
5013 catch(exception& e) {
5014 m->errorOut(e, "Utils", "makeList");
5015 exit(1);
5016 }
5017 }
5018
5019 /***********************************************************************/
5020 //This function parses the a string and puts peices in a vector
splitAtComma(string & estim,vector<string> & container)5021 void Utils::splitAtComma(string& estim, vector<string>& container) {
5022 try {
5023 string individual = "";
5024 int estimLength = estim.size();
5025 for(int i=0;i<estimLength;i++){
5026 if(estim[i] == ','){
5027 container.push_back(individual);
5028 individual = "";
5029 }
5030 else{
5031 individual += estim[i];
5032 }
5033 }
5034 container.push_back(individual);
5035
5036 }
5037 catch(exception& e) {
5038 m->errorOut(e, "Utils", "splitAtComma");
5039 exit(1);
5040 }
5041 }
5042 /***********************************************************************/
5043 //This function parses the a string and puts peices in a vector
splitAtComma(string & estim,vector<int> & convertedContainer)5044 void Utils::splitAtComma(string& estim, vector<int>& convertedContainer) {
5045 try {
5046 string individual = "";
5047 vector<string> container;
5048 int estimLength = estim.size();
5049 for(int i=0;i<estimLength;i++){
5050 if(estim[i] == ','){
5051 container.push_back(individual);
5052 individual = "";
5053 }
5054 else{
5055 individual += estim[i];
5056 }
5057 }
5058 container.push_back(individual);
5059
5060 for (int i = 0; i < container.size(); i++) {
5061 int temp;
5062 if (mothurConvert(container[i], temp)) { convertedContainer.push_back(temp); }
5063 }
5064
5065 }
5066 catch(exception& e) {
5067 m->errorOut(e, "Utils", "splitAtComma");
5068 exit(1);
5069 }
5070 }
5071 /***********************************************************************/
5072 //This function splits up the various option parameters
splitAtChar(string & prefix,string & suffix,char c)5073 void Utils::splitAtChar(string& prefix, string& suffix, char c){
5074 try {
5075
5076 string individual = "";
5077 int estimLength = prefix.size();
5078 for(int i=0;i<estimLength;i++){
5079 if(prefix[i] == c){
5080 suffix = prefix.substr(i+1);
5081 prefix = individual;
5082 break;
5083 }
5084 else{
5085 individual += prefix[i];
5086 }
5087 }
5088
5089 }
5090 catch(exception& e) {
5091 m->errorOut(e, "Utils", "splitAtChar");
5092 exit(1);
5093 }
5094 }
5095
5096 /***********************************************************************/
5097
5098 //This function splits up the various option parameters
splitAtComma(string & prefix,string & suffix)5099 void Utils::splitAtComma(string& prefix, string& suffix){
5100 try {
5101 prefix = suffix.substr(0,suffix.find_first_of(','));
5102 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
5103 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
5104 string space = " ";
5105 while(suffix.at(0) == ' ')
5106 suffix = suffix.substr(1, suffix.length());
5107 }else { suffix = ""; }
5108
5109 }
5110 catch(exception& e) {
5111 m->errorOut(e, "Utils", "splitAtComma");
5112 exit(1);
5113 }
5114 }
5115 /***********************************************************************/
5116
5117 //This function separates the key value from the option value i.e. dist=96_...
splitAtEquals(string & key,string & value)5118 void Utils::splitAtEquals(string& key, string& value){
5119 try {
5120 if(value.find_first_of('=') != -1){
5121 key = value.substr(0,value.find_first_of('='));
5122 if ((value.find_first_of('=')+1) <= value.length()) {
5123 value = value.substr(value.find_first_of('=')+1, value.length());
5124 }
5125 }else{
5126 key = value;
5127 value = 1;
5128 }
5129 }
5130 catch(exception& e) {
5131 m->errorOut(e, "Utils", "splitAtEquals");
5132 exit(1);
5133 }
5134 }
5135
5136 /**************************************************************************************************/
5137
inUsersGroups(string groupname,vector<string> Groups)5138 bool Utils::inUsersGroups(string groupname, vector<string> Groups) {
5139 try {
5140 for (int i = 0; i < Groups.size(); i++) {
5141 if (groupname == Groups[i]) { return true; }
5142 }
5143 return false;
5144 }
5145 catch(exception& e) {
5146 m->errorOut(e, "Utils", "inUsersGroups");
5147 exit(1);
5148 }
5149 }
5150 /**************************************************************************************************/
5151
inUsersGroups(string groupname,set<string> Groups)5152 bool Utils::inUsersGroups(string groupname, set<string> Groups) {
5153 try {
5154 if (Groups.count(groupname) != 0) { return true; } //found it
5155 return false;
5156 }
5157 catch(exception& e) {
5158 m->errorOut(e, "Utils", "inUsersGroups");
5159 exit(1);
5160 }
5161 }
5162
5163 /**************************************************************************************************/
5164
inUsersGroups(vector<int> set,vector<vector<int>> sets)5165 bool Utils::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
5166 try {
5167 for (int i = 0; i < sets.size(); i++) {
5168 if (set == sets[i]) { return true; }
5169 }
5170 return false;
5171 }
5172 catch(exception& e) {
5173 m->errorOut(e, "Utils", "inUsersGroups");
5174 exit(1);
5175 }
5176 }
5177 /**************************************************************************************************/
5178
inUsersGroups(int groupname,vector<int> Groups)5179 bool Utils::inUsersGroups(int groupname, vector<int> Groups) {
5180 try {
5181 for (int i = 0; i < Groups.size(); i++) {
5182 if (groupname == Groups[i]) { return true; }
5183 }
5184 return false;
5185 }
5186 catch(exception& e) {
5187 m->errorOut(e, "Utils", "inUsersGroups");
5188 exit(1);
5189 }
5190 }
5191
5192 /**************************************************************************************************/
5193 //returns true if any of the strings in first vector are in second vector
inUsersGroups(vector<string> groupnames,vector<string> Groups)5194 bool Utils::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
5195 try {
5196
5197 for (int i = 0; i < groupnames.size(); i++) {
5198 if (inUsersGroups(groupnames[i], Groups)) { return true; }
5199 }
5200 return false;
5201 }
5202 catch(exception& e) {
5203 m->errorOut(e, "Utils", "inUsersGroups");
5204 exit(1);
5205 }
5206 }
5207
5208 /**************************************************************************************************/
getTag(string filename)5209 string Utils::getTag(string filename) {
5210 try {
5211 string tag = "Otu";
5212 int pos = filename.find_first_of(".tx.");
5213 if (pos != string::npos) { tag = "Phylo"; }
5214
5215 int pos2 = filename.find_first_of(".asv.");
5216 if (pos2 != string::npos) { tag = "ASV"; }
5217
5218 return tag;
5219 }
5220 catch(exception& e) {
5221 m->errorOut(e, "Utils", "getTag");
5222 exit(1);
5223 }
5224 }
5225 /**************************************************************************************************/
5226 //removes entries that are only white space
removeBlanks(vector<string> & tempVector)5227 int Utils::removeBlanks(vector<string>& tempVector) {
5228 try {
5229 vector<string> newVector;
5230 for (int i = 0; i < tempVector.size(); i++) {
5231 bool isBlank = true;
5232 for (int j = 0; j < tempVector[i].length(); j++) {
5233 if (!isspace(tempVector[i][j])) { isBlank = false; j+= tempVector[i].length(); } //contains non space chars, break out and save
5234 }
5235 if (!isBlank) { newVector.push_back(tempVector[i]); }
5236 }
5237 tempVector = newVector;
5238 return 0;
5239 }
5240 catch(exception& e) {
5241 m->errorOut(e, "Utils", "removeBlanks");
5242 exit(1);
5243 }
5244 }
5245 /***********************************************************************/
getNextShared(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel,string optionOutput)5246 SharedRAbundVectors* Utils::getNextShared(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel, string optionOutput) {//input, allLines, userLabels, processedLabels
5247 try {
5248
5249 SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
5250
5251 //as long as you are not at the end of the file or done wih the lines you want
5252 while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5253
5254 if (m->getControl_pressed()) { delete lookup; return NULL; }
5255
5256 if (lastLabel == "") { lastLabel = lookup->getLabel(); }
5257
5258 if(allLines == 1 || userLabels.count(lookup->getLabel()) == 1){ //process all lines or this is a line we want
5259
5260 m->mothurOut(lookup->getLabel()+ " " + optionOutput +"\n");
5261
5262 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5263
5264 return lookup;
5265 }
5266
5267 if ((anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5268
5269 string saveLabel = lookup->getLabel();
5270
5271 delete lookup;
5272 lookup = input.getSharedRAbundVectors(lastLabel);
5273 m->mothurOut(lookup->getLabel()+"\n");
5274
5275 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5276
5277 lastLabel = saveLabel;
5278
5279 return lookup;
5280 }
5281
5282 lastLabel = lookup->getLabel();
5283 //prevent memory leak
5284 delete lookup;
5285
5286 if (m->getControl_pressed()) { return NULL; }
5287
5288 //get next line to process
5289 lookup = input.getSharedRAbundVectors();
5290 }
5291
5292 if (m->getControl_pressed()) { delete lookup; return NULL; }
5293
5294 //output error messages about any remaining user labels
5295 set<string>::iterator it;
5296 bool needToRun = false;
5297 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5298 m->mothurOut("Your file does not include the label " + *it);
5299 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5300 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5301 }
5302
5303 //run last label if you need to
5304 if (needToRun ) {
5305 delete lookup;
5306 lookup = input.getSharedRAbundVectors(lastLabel);
5307 if (lookup != NULL) {
5308 m->mothurOut(lookup->getLabel()+"\n");
5309 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5310 }
5311 return lookup;
5312 }
5313
5314 return lookup;
5315
5316 }catch(exception& e) {
5317 m->errorOut(e, "Utils", "getNextShared");
5318 exit(1);
5319 }
5320 }
5321 /***********************************************************************/
getNextRelabund(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5322 SharedRAbundFloatVectors* Utils::getNextRelabund(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5323 try {
5324
5325 SharedRAbundFloatVectors* lookup = input.getSharedRAbundFloatVectors();
5326
5327 //as long as you are not at the end of the file or done wih the lines you want
5328 while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5329
5330 if (m->getControl_pressed()) { delete lookup; return NULL; }
5331
5332 if (lastLabel == "") { lastLabel = lookup->getLabel(); }
5333
5334 if(allLines == 1 || userLabels.count(lookup->getLabel()) == 1){ //process all lines or this is a line we want
5335
5336 m->mothurOut(lookup->getLabel()+"\n");
5337
5338 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5339
5340 return lookup;
5341 }
5342
5343 if ((anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5344
5345 string saveLabel = lookup->getLabel();
5346
5347 delete lookup;
5348 lookup = input.getSharedRAbundFloatVectors(lastLabel);
5349 m->mothurOut(lookup->getLabel()+"\n");
5350
5351 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5352
5353 lastLabel = saveLabel;
5354
5355 return lookup;
5356 }
5357
5358 lastLabel = lookup->getLabel();
5359 //prevent memory leak
5360 delete lookup;
5361
5362 if (m->getControl_pressed()) { return NULL; }
5363
5364 //get next line to process
5365 lookup = input.getSharedRAbundFloatVectors();
5366 }
5367
5368 if (m->getControl_pressed()) { delete lookup; return NULL; }
5369
5370 //output error messages about any remaining user labels
5371 set<string>::iterator it;
5372 bool needToRun = false;
5373 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5374 m->mothurOut("Your file does not include the label " + *it);
5375 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5376 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5377 }
5378
5379 //run last label if you need to
5380 if (needToRun ) {
5381 delete lookup;
5382 lookup = input.getSharedRAbundFloatVectors(lastLabel);
5383 if (lookup != NULL) {
5384 m->mothurOut(lookup->getLabel()+"\n");
5385 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5386 }
5387 return lookup;
5388 }
5389
5390 return lookup;
5391
5392 }catch(exception& e) {
5393 m->errorOut(e, "Utils", "getNextRelabund");
5394 exit(1);
5395 }
5396 }
5397 /***********************************************************************/
getNextCLR(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5398 SharedCLRVectors* Utils::getNextCLR(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5399 try {
5400
5401 SharedCLRVectors* lookup = input.getSharedCLRVectors();
5402
5403 //as long as you are not at the end of the file or done wih the lines you want
5404 while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5405
5406 if (m->getControl_pressed()) { delete lookup; return NULL; }
5407
5408 if (lastLabel == "") { lastLabel = lookup->getLabel(); }
5409
5410 if(allLines == 1 || userLabels.count(lookup->getLabel()) == 1){ //process all lines or this is a line we want
5411
5412 m->mothurOut(lookup->getLabel()+"\n");
5413
5414 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5415
5416 return lookup;
5417 }
5418
5419 if ((anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5420
5421 string saveLabel = lookup->getLabel();
5422
5423 delete lookup;
5424 lookup = input.getSharedCLRVectors(lastLabel);
5425 m->mothurOut(lookup->getLabel()+"\n");
5426
5427 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5428
5429 lastLabel = saveLabel;
5430
5431 return lookup;
5432 }
5433
5434 lastLabel = lookup->getLabel();
5435 //prevent memory leak
5436 delete lookup;
5437
5438 if (m->getControl_pressed()) { return NULL; }
5439
5440 //get next line to process
5441 lookup = input.getSharedCLRVectors();
5442 }
5443
5444 if (m->getControl_pressed()) { delete lookup; return NULL; }
5445
5446 //output error messages about any remaining user labels
5447 set<string>::iterator it;
5448 bool needToRun = false;
5449 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5450 m->mothurOut("Your file does not include the label " + *it);
5451 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5452 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5453 }
5454
5455 //run last label if you need to
5456 if (needToRun ) {
5457 delete lookup;
5458 lookup = input.getSharedCLRVectors(lastLabel);
5459 if (lookup != NULL) {
5460 m->mothurOut(lookup->getLabel()+"\n");
5461 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
5462 }
5463 return lookup;
5464 }
5465
5466 return lookup;
5467
5468 }catch(exception& e) {
5469 m->errorOut(e, "Utils", "getNextCLR");
5470 exit(1);
5471 }
5472 }
5473 /***********************************************************************/
getNextList(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5474 ListVector* Utils::getNextList(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5475 try {
5476
5477 ListVector* list = input.getListVector();
5478
5479 //as long as you are not at the end of the file or done wih the lines you want
5480 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5481
5482 if (m->getControl_pressed()) { delete list; return NULL; }
5483
5484 if (lastLabel == "") { lastLabel = list->getLabel(); }
5485
5486 if(allLines == 1 || userLabels.count(list->getLabel()) == 1){ //process all lines or this is a line we want
5487
5488 m->mothurOut(list->getLabel()+"\n");
5489
5490 processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel());
5491
5492 return list;
5493 }
5494
5495 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5496
5497 string saveLabel = list->getLabel();
5498
5499 delete list;
5500 list = input.getListVector(lastLabel);
5501 m->mothurOut(list->getLabel()+"\n");
5502
5503 processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel());
5504
5505 lastLabel = saveLabel;
5506
5507 return list;
5508 }
5509
5510 lastLabel = list->getLabel();
5511 //prevent memory leak
5512 delete list;
5513
5514 if (m->getControl_pressed()) { return NULL; }
5515
5516 //get next line to process
5517 list = input.getListVector();
5518 }
5519
5520 if (m->getControl_pressed()) { delete list; return NULL; }
5521
5522 //output error messages about any remaining user labels
5523 set<string>::iterator it;
5524 bool needToRun = false;
5525 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5526 m->mothurOut("Your file does not include the label " + *it);
5527 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5528 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5529 }
5530
5531 //run last label if you need to
5532 if (needToRun ) {
5533 delete list;
5534 list = input.getListVector(lastLabel);
5535 if (list != NULL) {
5536 m->mothurOut(list->getLabel()+"\n");
5537 processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel());
5538 }
5539 return list;
5540 }
5541
5542 return list;
5543
5544 }catch(exception& e) {
5545 m->errorOut(e, "Utils", "getNextList");
5546 exit(1);
5547 }
5548 }
5549 /***********************************************************************/
getNextRAbund(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5550 RAbundVector* Utils::getNextRAbund(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5551 try {
5552
5553 RAbundVector* rabund = input.getRAbundVector();
5554
5555 //as long as you are not at the end of the file or done wih the lines you want
5556 while((rabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5557
5558 if (m->getControl_pressed()) { delete rabund; return NULL; }
5559
5560 if (lastLabel == "") { lastLabel = rabund->getLabel(); }
5561
5562 if(allLines == 1 || userLabels.count(rabund->getLabel()) == 1){ //process all lines or this is a line we want
5563
5564 m->mothurOut(rabund->getLabel()+"\n");
5565
5566 processedLabels.insert(rabund->getLabel()); userLabels.erase(rabund->getLabel());
5567
5568 return rabund;
5569 }
5570
5571 if ((anyLabelsToProcess(rabund->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5572
5573 string saveLabel = rabund->getLabel();
5574
5575 delete rabund;
5576 rabund = input.getRAbundVector(lastLabel);
5577 m->mothurOut(rabund->getLabel()+"\n");
5578
5579 processedLabels.insert(rabund->getLabel()); userLabels.erase(rabund->getLabel());
5580
5581 lastLabel = saveLabel;
5582
5583 return rabund;
5584 }
5585
5586 lastLabel = rabund->getLabel();
5587 //prevent memory leak
5588 delete rabund;
5589
5590 if (m->getControl_pressed()) { return NULL; }
5591
5592 //get next line to process
5593 rabund = input.getRAbundVector();
5594 }
5595
5596 if (m->getControl_pressed()) { delete rabund; return NULL; }
5597
5598 //output error messages about any remaining user labels
5599 set<string>::iterator it;
5600 bool needToRun = false;
5601 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5602 m->mothurOut("Your file does not include the label " + *it);
5603 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5604 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5605 }
5606
5607 //run last label if you need to
5608 if (needToRun ) {
5609 delete rabund;
5610 rabund = input.getRAbundVector(lastLabel);
5611 if (rabund != NULL) {
5612 m->mothurOut(rabund->getLabel()+"\n");
5613 processedLabels.insert(rabund->getLabel()); userLabels.erase(rabund->getLabel());
5614 }
5615 return rabund;
5616 }
5617
5618 return rabund;
5619
5620 }catch(exception& e) {
5621 m->errorOut(e, "Utils", "getNextRAbund");
5622 exit(1);
5623 }
5624 }
5625 /***********************************************************************/
getNextSAbund(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5626 SAbundVector* Utils::getNextSAbund(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5627 try {
5628
5629 SAbundVector* sabund = input.getSAbundVector();
5630
5631 //as long as you are not at the end of the file or done wih the lines you want
5632 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5633
5634 if (m->getControl_pressed()) { delete sabund; return NULL; }
5635
5636 if (lastLabel == "") { lastLabel = sabund->getLabel(); }
5637
5638 if(allLines == 1 || userLabels.count(sabund->getLabel()) == 1){ //process all lines or this is a line we want
5639
5640 m->mothurOut(sabund->getLabel()+"\n");
5641
5642 processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel());
5643
5644 return sabund;
5645 }
5646
5647 if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5648
5649 string saveLabel = sabund->getLabel();
5650
5651 delete sabund;
5652 sabund = input.getSAbundVector(lastLabel);
5653 m->mothurOut(sabund->getLabel()+"\n");
5654
5655 processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel());
5656
5657 lastLabel = saveLabel;
5658
5659 return sabund;
5660 }
5661
5662 lastLabel = sabund->getLabel();
5663 //prevent memory leak
5664 delete sabund;
5665
5666 if (m->getControl_pressed()) { return NULL; }
5667
5668 //get next line to process
5669 sabund = input.getSAbundVector();
5670 }
5671
5672 if (m->getControl_pressed()) { delete sabund; return NULL; }
5673
5674 //output error messages about any remaining user labels
5675 set<string>::iterator it;
5676 bool needToRun = false;
5677 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5678 m->mothurOut("Your file does not include the label " + *it);
5679 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5680 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5681 }
5682
5683 //run last label if you need to
5684 if (needToRun ) {
5685 delete sabund;
5686 sabund = input.getSAbundVector(lastLabel);
5687 if (sabund != NULL) {
5688 m->mothurOut(sabund->getLabel()+"\n");
5689 processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel());
5690 }
5691 return sabund;
5692 }
5693
5694 return sabund;
5695
5696 }catch(exception& e) {
5697 m->errorOut(e, "Utils", "getNextSAbund");
5698 exit(1);
5699 }
5700 }
5701 /***********************************************************************/
getNextOrder(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5702 OrderVector* Utils::getNextOrder(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5703 try {
5704
5705 OrderVector* order = input.getOrderVector();
5706
5707 //as long as you are not at the end of the file or done wih the lines you want
5708 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5709
5710 if (m->getControl_pressed()) { delete order; return NULL; }
5711
5712 if (lastLabel == "") { lastLabel = order->getLabel(); }
5713
5714 if(allLines == 1 || userLabels.count(order->getLabel()) == 1){ //process all lines or this is a line we want
5715
5716 m->mothurOut(order->getLabel()+"\n");
5717
5718 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5719
5720 return order;
5721 }
5722
5723 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5724
5725 string saveLabel = order->getLabel();
5726
5727 delete order;
5728 order = input.getOrderVector(lastLabel);
5729 m->mothurOut(order->getLabel()+"\n");
5730
5731 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5732
5733 lastLabel = saveLabel;
5734
5735 return order;
5736 }
5737
5738 lastLabel = order->getLabel();
5739 //prevent memory leak
5740 delete order;
5741
5742 if (m->getControl_pressed()) { return NULL; }
5743
5744 //get next line to process
5745 order = input.getOrderVector();
5746 }
5747
5748 if (m->getControl_pressed()) { delete order; return NULL; }
5749
5750 //output error messages about any remaining user labels
5751 set<string>::iterator it;
5752 bool needToRun = false;
5753 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5754 m->mothurOut("Your file does not include the label " + *it);
5755 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5756 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5757 }
5758
5759 //run last label if you need to
5760 if (needToRun ) {
5761 delete order;
5762 order = input.getOrderVector(lastLabel);
5763 if (order != NULL) {
5764 m->mothurOut(order->getLabel()+"\n");
5765 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5766 }
5767 return order;
5768 }
5769
5770 return order;
5771
5772 }catch(exception& e) {
5773 m->errorOut(e, "Utils", "getNextOrder");
5774 exit(1);
5775 }
5776 }
5777 /***********************************************************************/
getNextSharedOrder(InputData & input,bool allLines,set<string> & userLabels,set<string> & processedLabels,string & lastLabel)5778 SharedOrderVector* Utils::getNextSharedOrder(InputData& input, bool allLines, set<string>& userLabels, set<string>& processedLabels, string& lastLabel) {//input, allLines, userLabels, processedLabels
5779 try {
5780
5781 SharedOrderVector* order = input.getSharedOrderVector();
5782
5783 //as long as you are not at the end of the file or done wih the lines you want
5784 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
5785
5786 if (m->getControl_pressed()) { delete order; return NULL; }
5787
5788 if (lastLabel == "") { lastLabel = order->getLabel(); }
5789
5790 if(allLines == 1 || userLabels.count(order->getLabel()) == 1){ //process all lines or this is a line we want
5791
5792 m->mothurOut(order->getLabel()+"\n");
5793
5794 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5795
5796 return order;
5797 }
5798
5799 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { //use smart distancing to find previous small distance if user labels differ from the labels in file.
5800
5801 string saveLabel = order->getLabel();
5802
5803 delete order;
5804 order = input.getSharedOrderVector(lastLabel);
5805 m->mothurOut(order->getLabel()+"\n");
5806
5807 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5808
5809 lastLabel = saveLabel;
5810
5811 return order;
5812 }
5813
5814 lastLabel = order->getLabel();
5815 //prevent memory leak
5816 delete order;
5817
5818 if (m->getControl_pressed()) { return NULL; }
5819
5820 //get next line to process
5821 order = input.getSharedOrderVector();
5822 }
5823
5824 if (m->getControl_pressed()) { delete order; return NULL; }
5825
5826 //output error messages about any remaining user labels
5827 set<string>::iterator it;
5828 bool needToRun = false;
5829 for (it = userLabels.begin(); it != userLabels.end(); it++) {
5830 m->mothurOut("Your file does not include the label " + *it);
5831 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
5832 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
5833 }
5834
5835 //run last label if you need to
5836 if (needToRun ) {
5837 delete order;
5838 order = input.getSharedOrderVector(lastLabel);
5839 if (order != NULL) {
5840 m->mothurOut(order->getLabel()+"\n");
5841 processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel());
5842 }
5843 return order;
5844 }
5845
5846 return order;
5847
5848 }catch(exception& e) {
5849 m->errorOut(e, "Utils", "getNextSharedOrder");
5850 exit(1);
5851 }
5852 }
5853 /***********************************************************************/
5854 //this function determines if the user has given us labels that are smaller than the given label.
5855 //if so then it returns true so that the calling function can run the previous valid distance.
5856 //it's a "smart" distance function. It also checks for invalid labels.
anyLabelsToProcess(string label,set<string> & userLabels,string errorOff)5857 bool Utils::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
5858 try {
5859
5860 set<string>::iterator it;
5861 vector<float> orderFloat;
5862 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
5863 map<string, float>::iterator it2;
5864 float labelFloat;
5865 bool smaller = false;
5866
5867 //unique is the smallest line
5868 if (label == "unique") { return false; }
5869 else {
5870 if (convertTestFloat(label, labelFloat)) {
5871 convert(label, labelFloat);
5872 }else { //cant convert
5873 return false;
5874 }
5875 }
5876
5877 //go through users set and make them floats
5878 for(it = userLabels.begin(); it != userLabels.end();) {
5879
5880 float temp;
5881 if ((*it != "unique") && (convertTestFloat(*it, temp) )){
5882 convert(*it, temp);
5883 orderFloat.push_back(temp);
5884 userMap[*it] = temp;
5885 it++;
5886 }else if (*it == "unique") {
5887 orderFloat.push_back(-1.0);
5888 userMap["unique"] = -1.0;
5889 it++;
5890 }else {
5891 if (errorOff == "") { cout << (*it + " is not a valid label.\n"); }
5892 userLabels.erase(it++);
5893 }
5894 }
5895
5896 //sort order
5897 sort(orderFloat.begin(), orderFloat.end());
5898
5899 /*************************************************/
5900 //is this label bigger than any of the users labels
5901 /*************************************************/
5902
5903 //loop through order until you find a label greater than label
5904 for (int i = 0; i < orderFloat.size(); i++) {
5905 if (orderFloat[i] < labelFloat) {
5906 smaller = true;
5907 if (isEqual(orderFloat[i], -1)) {
5908 if (errorOff == "") { cout << ("Your file does not include the label unique.\n"); }
5909 userLabels.erase("unique");
5910 }
5911 else {
5912 if (errorOff == "") { cout << ("Your file does not include the label. \n"); }
5913 string s = "";
5914 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
5915 if (isEqual(it2->second, orderFloat[i])) {
5916 s = it2->first;
5917 //remove small labels
5918 userLabels.erase(s);
5919 break;
5920 }
5921 }
5922 if (errorOff == "") {cout << ( s + ". I will use the next smallest distance. \n"); }
5923 }
5924 //since they are sorted once you find a bigger one stop looking
5925 }else { break; }
5926 }
5927
5928 return smaller;
5929
5930 }
5931 catch(exception& e) {
5932 m->errorOut(e, "Utils", "anyLabelsToProcess");
5933 exit(1);
5934 }
5935 }
5936 /**************************************************************************************************/
5937 // query = v2.15.2 minversion = v2.13.5
isVsearchVersionValid(string query,string minversion)5938 bool Utils::isVsearchVersionValid(string query, string minversion) {
5939 try {
5940
5941 bool good = true;
5942
5943 vector<string> versionVector;
5944 splitAtChar(minversion, versionVector, '.');
5945
5946 //check file version
5947 vector<string> queryVector;
5948 splitAtChar(query, queryVector, '.');
5949
5950 if (versionVector.size() != queryVector.size()) { good = false; }
5951 else if (versionVector.size() != 3) { good = false; }
5952 else {
5953 if (versionVector[0] != queryVector[0]) { good = false; return good; } //major version - v2 or v1
5954
5955 //minor version - 13 or 15
5956 int queryNum, minVersionNum;
5957 convert(versionVector[1], minVersionNum);
5958 convert(queryVector[1], queryNum);
5959
5960 //if query minor version is older (smaller) than minversion
5961 if (minVersionNum > queryNum) { good = false; }
5962 else if (minVersionNum == queryNum) { //if major and minor versions match, check patches
5963
5964 //patch version
5965 convert(versionVector[2], minVersionNum);
5966 convert(queryVector[2], queryNum);
5967
5968 if (minVersionNum > queryNum) { good = false; }
5969 }
5970 }
5971
5972 return good;
5973 }
5974 catch(exception& e) {
5975 m->errorOut(e, "Utils", "checkReleaseVersion");
5976 exit(1);
5977 }
5978 }
5979 /**************************************************************************************************/
checkReleaseVersion(string line,string version)5980 bool Utils::checkReleaseVersion(string line, string version) {
5981 try {
5982
5983 bool good = true;
5984
5985 //before we added this check
5986 if (line[0] != '#') { good = false; }
5987 else {
5988 //rip off #
5989 line = line.substr(1);
5990
5991 vector<string> versionVector;
5992 splitAtChar(version, versionVector, '.');
5993
5994 //check file version
5995 vector<string> linesVector;
5996 splitAtChar(line, linesVector, '.');
5997
5998 if (versionVector.size() != linesVector.size()) { good = false; }
5999 else {
6000 for (int j = 0; j < versionVector.size(); j++) {
6001 int num1, num2;
6002 convert(versionVector[j], num1);
6003 convert(linesVector[j], num2);
6004
6005 //if mothurs version is newer than this files version, then we want to remake it
6006 if (num1 > num2) { good = false; break; }
6007 }
6008 }
6009
6010 }
6011 return good;
6012 }
6013 catch(exception& e) {
6014 m->errorOut(e, "Utils", "checkReleaseVersion");
6015 exit(1);
6016 }
6017 }
6018 /**************************************************************************************************/
getTimeStamp(string filename)6019 int Utils::getTimeStamp(string filename) {
6020 try {
6021 int timeStamp = 0;
6022
6023 #if defined NON_WINDOWS
6024 struct stat st;
6025 int errorCode = stat (filename.c_str(), &st);
6026 if (errorCode != 0) {
6027 m->mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); m->setControl_pressed(true);
6028 }else {
6029 timeStamp = st.st_mtime;
6030 }
6031 #else
6032 HANDLE hFile;
6033
6034 hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL,
6035 OPEN_EXISTING, 0, NULL);
6036
6037 if(hFile == INVALID_HANDLE_VALUE) {
6038 m->mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); m->setControl_pressed(true);
6039 CloseHandle(hFile); return timeStamp;
6040 }
6041
6042 FILETIME ftCreate, ftAccess, ftWrite;
6043 SYSTEMTIME stUTC;
6044 DWORD dwRet;
6045
6046 // Retrieve the file times for the file.
6047 bool success = GetFileTime(hFile, &ftCreate, &ftAccess, &ftWrite);
6048
6049 if (success) {
6050 FileTimeToSystemTime(&ftWrite, &stUTC);
6051
6052 tm time;
6053 time.tm_sec = stUTC.wSecond;
6054 time.tm_min = stUTC.wMinute;
6055 time.tm_hour = stUTC.wHour;
6056 time.tm_mday = stUTC.wDay;
6057 time.tm_mon = stUTC.wMonth - 1;
6058 time.tm_year = stUTC.wYear - 1900;
6059 time.tm_isdst = -1;
6060 time_t t = mktime(&time);
6061
6062 timeStamp = t;
6063 }
6064 else { m->mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); m->setControl_pressed(true); }
6065 CloseHandle(hFile);
6066 #endif
6067
6068 return timeStamp;
6069 }
6070 catch(exception& e) {
6071 m->errorOut(e, "Utils", "getTimeStamp");
6072 exit(1);
6073 }
6074 }
6075 /**************************************************************************************************/
6076 //Referenced - https://genome.sph.umich.edu/w/images/d/d5/Biostat615-Fall2011-lecture03-handout.pdf
geometricMean(vector<float> & abunds,double zeroReplacementValue)6077 double Utils::geometricMean(vector<float>& abunds, double zeroReplacementValue) {
6078 try{
6079 double sum = 0;
6080 for (int j = 0; j < abunds.size(); j++) {
6081 if (isEqual(abunds[j], 0)) { abunds[j] += zeroReplacementValue; }
6082 sum += log(abunds[j]);
6083 }
6084 sum /= abunds.size();
6085 sum = exp(sum);
6086
6087 return sum;
6088 }
6089 catch(exception& e) {
6090 m->errorOut(e, "Utils", "geometricMean");
6091 exit(1);
6092 }
6093 }
6094 /**************************************************************************************************/
getAverages(vector<vector<double>> & dists)6095 vector<double> Utils::getAverages(vector< vector<double> >& dists) {
6096 try{
6097 vector<double> averages; //averages.resize(numComp, 0.0);
6098 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
6099
6100 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
6101 for (int i = 0; i < dists[thisIter].size(); i++) {
6102 averages[i] += dists[thisIter][i];
6103 }
6104 }
6105
6106 //finds average.
6107 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
6108
6109 return averages;
6110 }
6111 catch(exception& e) {
6112 m->errorOut(e, "Utils", "getAverages");
6113 exit(1);
6114 }
6115 }
6116 /**************************************************************************************************/
getAverage(vector<double> dists)6117 double Utils::getAverage(vector<double> dists) {
6118 try{
6119 double average = 0;
6120
6121 for (int i = 0; i < dists.size(); i++) {
6122 average += dists[i];
6123 }
6124
6125 //finds average.
6126 average /= (double) dists.size();
6127
6128 return average;
6129 }
6130 catch(exception& e) {
6131 m->errorOut(e, "Utils", "getAverage");
6132 exit(1);
6133 }
6134 }
6135
6136 /**************************************************************************************************/
getStandardDeviation(vector<vector<double>> & dists)6137 vector<double> Utils::getStandardDeviation(vector< vector<double> >& dists) {
6138 try{
6139
6140 vector<double> averages = getAverages(dists);
6141
6142 //find standard deviation
6143 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
6144 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
6145
6146 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6147 for (int j = 0; j < dists[thisIter].size(); j++) {
6148 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
6149 }
6150 }
6151 for (int i = 0; i < stdDev.size(); i++) {
6152 stdDev[i] /= (double) dists.size();
6153 stdDev[i] = sqrt(stdDev[i]);
6154 }
6155
6156 return stdDev;
6157 }
6158 catch(exception& e) {
6159 m->errorOut(e, "Utils", "getAverages");
6160 exit(1);
6161 }
6162 }
6163 /**************************************************************************************************/
getStandardDeviation(vector<vector<double>> & dists,vector<double> & averages)6164 vector<double> Utils::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
6165 try{
6166 //find standard deviation
6167 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
6168 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
6169
6170 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6171 for (int j = 0; j < dists[thisIter].size(); j++) {
6172 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
6173 }
6174 }
6175 for (int i = 0; i < stdDev.size(); i++) {
6176 stdDev[i] /= (double) dists.size();
6177 stdDev[i] = sqrt(stdDev[i]);
6178 }
6179
6180 return stdDev;
6181 }
6182 catch(exception& e) {
6183 m->errorOut(e, "Utils", "getStandardDeviation");
6184 exit(1);
6185 }
6186 }
6187 /**************************************************************************************************/
getAverages(vector<vector<vector<seqDist>>> & calcDistsTotals,string mode)6188 vector< vector<seqDist> > Utils::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
6189 try{
6190
6191 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
6192 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
6193 //calcAverages[i].resize(calcDistsTotals[0][i].size());
6194 vector<seqDist> temp;
6195 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6196 seqDist tempDist;
6197 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6198 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6199 tempDist.dist = 0.0;
6200 temp.push_back(tempDist);
6201 }
6202 calcAverages.push_back(temp);
6203 }
6204
6205 if (mode == "average") {
6206 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
6207 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
6208 for (int j = 0; j < calcAverages[i].size(); j++) {
6209 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
6210 }
6211 }
6212 }
6213
6214 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
6215 for (int j = 0; j < calcAverages[i].size(); j++) {
6216 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
6217 }
6218 }
6219 }else { //find median
6220 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
6221 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
6222 vector<double> dists;
6223 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
6224 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
6225 }
6226 sort(dists.begin(), dists.end());
6227 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
6228 }
6229 }
6230 }
6231
6232 return calcAverages;
6233 }
6234 catch(exception& e) {
6235 m->errorOut(e, "Utils", "getAverages");
6236 exit(1);
6237 }
6238 }
6239 /**************************************************************************************************/
getAverages(vector<vector<vector<seqDist>>> & calcDistsTotals)6240 vector< vector<seqDist> > Utils::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
6241 try{
6242
6243 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
6244 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
6245 //calcAverages[i].resize(calcDistsTotals[0][i].size());
6246 vector<seqDist> temp;
6247 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6248 seqDist tempDist;
6249 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6250 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6251 tempDist.dist = 0.0;
6252 temp.push_back(tempDist);
6253 }
6254 calcAverages.push_back(temp);
6255 }
6256
6257
6258 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
6259 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
6260 for (int j = 0; j < calcAverages[i].size(); j++) {
6261 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
6262 }
6263 }
6264 }
6265
6266 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
6267 for (int j = 0; j < calcAverages[i].size(); j++) {
6268 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
6269 }
6270 }
6271
6272 return calcAverages;
6273 }
6274 catch(exception& e) {
6275 m->errorOut(e, "Utils", "getAverages");
6276 exit(1);
6277 }
6278 }
6279 /**************************************************************************************************/
getStandardDeviation(vector<vector<vector<seqDist>>> & calcDistsTotals)6280 vector< vector<seqDist> > Utils::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
6281 try{
6282
6283 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
6284
6285 //find standard deviation
6286 vector< vector<seqDist> > stdDev;
6287 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
6288 vector<seqDist> temp;
6289 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6290 seqDist tempDist;
6291 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6292 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6293 tempDist.dist = 0.0;
6294 temp.push_back(tempDist);
6295 }
6296 stdDev.push_back(temp);
6297 }
6298
6299 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6300 for (int i = 0; i < stdDev.size(); i++) {
6301 for (int j = 0; j < stdDev[i].size(); j++) {
6302 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
6303 }
6304 }
6305 }
6306
6307 for (int i = 0; i < stdDev.size(); i++) { //finds average.
6308 for (int j = 0; j < stdDev[i].size(); j++) {
6309 stdDev[i][j].dist /= (float) calcDistsTotals.size();
6310 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
6311 }
6312 }
6313
6314 return stdDev;
6315 }
6316 catch(exception& e) {
6317 m->errorOut(e, "Utils", "getAverages");
6318 exit(1);
6319 }
6320 }
6321 /**************************************************************************************************/
getStandardDeviation(vector<vector<vector<seqDist>>> & calcDistsTotals,vector<vector<seqDist>> & calcAverages)6322 vector< vector<seqDist> > Utils::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
6323 try{
6324 //find standard deviation
6325 vector< vector<seqDist> > stdDev;
6326 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
6327 vector<seqDist> temp;
6328 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
6329 seqDist tempDist;
6330 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
6331 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
6332 tempDist.dist = 0.0;
6333 temp.push_back(tempDist);
6334 }
6335 stdDev.push_back(temp);
6336 }
6337
6338 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
6339 for (int i = 0; i < stdDev.size(); i++) {
6340 for (int j = 0; j < stdDev[i].size(); j++) {
6341 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
6342 }
6343 }
6344 }
6345
6346 for (int i = 0; i < stdDev.size(); i++) { //finds average.
6347 for (int j = 0; j < stdDev[i].size(); j++) {
6348 stdDev[i][j].dist /= (float) calcDistsTotals.size();
6349 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
6350 }
6351 }
6352
6353 return stdDev;
6354 }
6355 catch(exception& e) {
6356 m->errorOut(e, "Utils", "getAverages");
6357 exit(1);
6358 }
6359 }
6360
6361 /**************************************************************************************************/
isContainingOnlyDigits(string input)6362 bool Utils::isContainingOnlyDigits(string input) {
6363 try{
6364
6365 //are you a digit in ascii code
6366 for (int i = 0;i < input.length(); i++){
6367 if( input[i]>47 && input[i]<58){}
6368 else { return false; }
6369 }
6370
6371 return true;
6372 }
6373 catch(exception& e) {
6374 m->errorOut(e, "Utils", "isContainingOnlyDigits");
6375 exit(1);
6376 }
6377 }
6378 /**************************************************************************************************/
6379 /*M02352_41_000000000-AT06G_1_2104_18738_21630 Eukaryota(100);Archaeplastida(100);Chloroplastida(100);Chlorophyta(100);Mamiellophyceae(100);Mamiellales(100);Ostreococcus(100);Ostreococcus tauri(100);
6380
6381 When I run remove.lineage with:
6382 taxon=Chloroplast-Mitochondria-unknown-Bacteria-Archaea-Metazoa-Charophyta
6383
6384 The word "Chloroplast" in the taxon string gets matched to the lineage Chloroplastida in the taxonomy (above) and wipes out all of the green algae.*/
6385
findTaxon(vector<Taxon> tax,vector<Taxon> stax)6386 bool Utils::findTaxon(vector<Taxon> tax, vector<Taxon> stax) {
6387 try {
6388 removeQuotes(tax); removeQuotes(stax);
6389
6390 //looking to find something like "unknown" or "Proteobacteria"
6391 if (stax.size() == 1) {
6392 string searchTax = stax[0].name;
6393 auto it = find_if(tax.begin(), tax.end(), [&searchTax](const Taxon& obj) { return obj.name == searchTax;});
6394
6395 if (it != tax.end()) { return true; }
6396 else { return false; }
6397
6398 }else { //looking to find something like "Bacteria;Proteobacteria;Alphaproteobacteria;Rickettsiales;Anaplasmataceae;Wolbachia;"
6399
6400 if (stax.size() > tax.size()) { return false; } //we are looking for a more specific taxonomy, not a match
6401 else {
6402 for (int i = 0; i < stax.size(); i++) {
6403 if (stax[i].name != tax[i].name) { return false; }
6404 }
6405 return true;
6406 }
6407 }
6408
6409 return false;
6410 }
6411 catch(exception& e) {
6412 m->errorOut(e, "Utils", "findTaxon");
6413 exit(1);
6414 }
6415 }
6416 /**************************************************************************************************/
searchTax(vector<Taxon> userTaxons,vector<bool> taxonsHasConfidence,vector<vector<Taxon>> searchTaxons)6417 bool Utils::searchTax(vector<Taxon> userTaxons, vector<bool> taxonsHasConfidence, vector< vector<Taxon> > searchTaxons) {
6418 try {
6419 bool userDataHasConfidence = hasConfidenceScore(userTaxons);
6420
6421 for (int j = 0; j < searchTaxons.size(); j++) {
6422
6423 bool foundTaxonMatch = findTaxon(userTaxons, searchTaxons[j]);
6424
6425 if (foundTaxonMatch) {
6426 //searchTaxon or user taxons don't include confidence scores so ingnore them
6427 if (!taxonsHasConfidence[j] || !userDataHasConfidence) {
6428 return true; //since you belong to at least one of the taxons we want you are included so no need to search for other
6429 }else {
6430 bool good = true;
6431
6432 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
6433 //we want to "line them up", so we will find the the index where the searchstring starts
6434 int index = 0;
6435 for (int i = 0; i < userTaxons.size(); i++) {
6436
6437 if (userTaxons[i].name == searchTaxons[j][0].name) {
6438 index = i;
6439 int spot = 0;
6440 bool goodspot = true;
6441 //is this really the start, or are we dealing with a taxon of the same name?
6442 while ((spot < searchTaxons[j].size()) && ((i+spot) < userTaxons.size())) {
6443 if (userTaxons[i+spot].name != searchTaxons[j][spot].name) { goodspot = false; break; }
6444 else { spot++; }
6445 }
6446
6447 if (goodspot) { break; }
6448 }
6449 }
6450
6451 for (int i = 0; i < searchTaxons[j].size(); i++) {
6452
6453 if ((i+index) < userTaxons.size()) { //just in case, should never be false
6454 if (userTaxons[i+index].confidence < searchTaxons[j][i].confidence) { //is the users cutoff less than the search taxons
6455 good = false;
6456 break;
6457 }
6458 }else { good = false; break; }
6459 }
6460
6461 //passed the test so add you
6462 if (good) { return true; }
6463 }
6464 }
6465 }
6466
6467 return false;
6468 }
6469 catch(exception& e) {
6470 m->errorOut(e, "Utils", "searchTax");
6471 exit(1);
6472 }
6473 }
6474
6475 /**************************************************************************************************/
getTaxons(string tax,bool & hasConfidence)6476 vector<Taxon> Utils::getTaxons(string tax, bool& hasConfidence) {
6477 try {
6478
6479 vector<Taxon> t;
6480 string taxon = "";
6481 int taxLength = tax.length();
6482
6483 for(int i=0;i<taxLength;i++){
6484 if(tax[i] == ';'){
6485 string newtaxon = taxon; float confidence = 0;
6486 hasConfidence = hasConfidenceScore(newtaxon, confidence);
6487
6488 Taxon temp(newtaxon, confidence); t.push_back(temp);
6489 taxon = "";
6490 }
6491 else{ taxon += tax[i]; }
6492 }
6493
6494 if (taxon != "") {
6495 float confidence = 0;
6496 hasConfidence = hasConfidenceScore(taxon, confidence);
6497
6498 Taxon temp(taxon, confidence); t.push_back(temp);
6499 }
6500
6501 return t;
6502 }
6503 catch(exception& e) {
6504 m->errorOut(e, "Utils", "getTaxons");
6505 exit(1);
6506 }
6507 }
6508 /**************************************************************************************************/
hasConfidenceScore(vector<Taxon> taxons)6509 bool Utils::hasConfidenceScore(vector<Taxon> taxons) {
6510 try {
6511
6512 for (int i = 0; i < taxons.size(); i++) {
6513 if (m->getControl_pressed()) { break; }
6514
6515 if (taxons[i].confidence > 0) { return true; }
6516 }
6517
6518 return false;
6519 }
6520 catch(exception& e) {
6521 m->errorOut(e, "Utils", "hasConfidenceScore");
6522 exit(1);
6523 }
6524 }
6525 /**************************************************************************************************/
hasConfidenceScore(string & taxon,float & confidence)6526 bool Utils::hasConfidenceScore(string& taxon, float& confidence) {
6527 try {
6528 int openParen = taxon.find_last_of('(');
6529 int closeParen = taxon.find_last_of(')');
6530
6531 if ((openParen != string::npos) && (closeParen != string::npos)) {
6532 string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
6533 if (isPositiveNumeric(confidenceScore)) { //its a confidence
6534 taxon = taxon.substr(0, openParen); //rip off confidence
6535 mothurConvert(confidenceScore, confidence);
6536 return true;
6537 }else {
6538 confidence = 0; //its part of the taxon
6539 }
6540 }else{ confidence = 0; }
6541
6542 return false;
6543 }
6544 catch(exception& e) {
6545 m->errorOut(e, "Utils", "hasConfidenceScore");
6546 exit(1);
6547 }
6548 }
6549 /**************************************************************************************************/
removeConfidences(string & tax)6550 float Utils::removeConfidences(string& tax) {
6551 try {
6552 string temp = tax; float dummy; if (!hasConfidenceScore(temp, dummy)) { return 0; }
6553
6554 string taxon;
6555 string newTax = "";
6556 string confidenceScore = "0";
6557
6558 //remove last ";"
6559 if (tax.length() > 1) { tax = tax.substr(0, tax.length()-1); }
6560 vector<string> taxons; splitAtChar(tax, taxons, ';');
6561
6562 for (int i = 0; i < taxons.size(); i++) {
6563
6564 if (m->getControl_pressed()) { return 0; }
6565
6566 taxon = taxons[i];
6567
6568 int pos = taxon.find_last_of('(');
6569 if (pos != -1) {
6570 //is it a number?
6571 int pos2 = taxon.find_last_of(')');
6572 if (pos2 != -1) {
6573 string temp = taxon.substr(pos+1, (pos2-(pos+1)));
6574 if (isPositiveNumeric(temp)) {
6575 taxon = taxon.substr(0, pos); //rip off confidence
6576 confidenceScore = temp;
6577 }
6578 }
6579 }
6580 taxon += ";";
6581
6582 newTax += taxon;
6583 }
6584
6585 tax = newTax;
6586
6587 float confidence = 0; mothurConvert(confidenceScore, confidence);
6588
6589 return confidence;
6590 }
6591 catch(exception& e) {
6592 m->errorOut(e, "Utils", "removeConfidences");
6593 exit(1);
6594 }
6595 }
6596 /**************************************************************************************************/
removeQuotes(vector<Taxon> & tax)6597 void Utils::removeQuotes(vector<Taxon>& tax) {
6598 try {
6599
6600 string taxon;
6601 string newTax = "";
6602
6603 for (int i = 0; i < tax.size(); i++) {
6604
6605 if (m->getControl_pressed()) { return; }
6606
6607 tax[i].name = removeQuotes(tax[i].name);
6608 }
6609
6610 return;
6611 }
6612 catch(exception& e) {
6613 m->errorOut(e, "Utils", "removeQuotes");
6614 exit(1);
6615 }
6616 }
6617 /**************************************************************************************************/
removeQuotes(string tax)6618 string Utils::removeQuotes(string tax) {
6619 try {
6620
6621 string taxon;
6622 string newTax = "";
6623
6624 for (int i = 0; i < tax.length(); i++) {
6625
6626 if (m->getControl_pressed()) { return newTax; }
6627
6628 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
6629
6630 }
6631
6632 return newTax;
6633 }
6634 catch(exception& e) {
6635 m->errorOut(e, "Utils", "removeQuotes");
6636 exit(1);
6637 }
6638 }
6639 /**************************************************************************************************/
6640 // function for calculating standard deviation
getStandardDeviation(vector<int> & featureVector)6641 double Utils::getStandardDeviation(vector<int>& featureVector){
6642 try {
6643 //finds sum
6644 double average = 0;
6645 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
6646 average /= (double) featureVector.size();
6647
6648 //find standard deviation
6649 double stdDev = 0;
6650 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
6651 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
6652 }
6653
6654 stdDev /= (double) featureVector.size();
6655 stdDev = sqrt(stdDev);
6656
6657 return stdDev;
6658 }
6659 catch(exception& e) {
6660 m->errorOut(e, "Utils", "getStandardDeviation");
6661 exit(1);
6662 }
6663 }
6664 /*****************************************************************/
6665 //this code is a mess and should be rethought...-slw
parseTreeFile(string filename)6666 vector<string> Utils::parseTreeFile(string filename) {
6667
6668 //only takes names from the first tree and assumes that all trees use the same names.
6669 try {
6670 //string filename = current->getTreeFile();
6671 ifstream filehandle;
6672 Utils util; util.openInputFile(filename, filehandle);
6673 int comment;
6674 char c;
6675 comment = 0;
6676
6677 vector<string> Treenames;
6678 if((c = filehandle.peek()) != '#') { //ifyou are not a nexus file
6679
6680 while ((c = filehandle.peek()) != ';') {
6681 if (m->getControl_pressed()) { filehandle.close(); return Treenames; }
6682 // get past comments
6683 if(c == '[') { comment = 1; }
6684 if(c == ']') { comment = 0; }
6685 if((c == '(') && (comment != 1)){ break; }
6686 filehandle.get();
6687 }
6688
6689 Treenames = readTreeString(filehandle);
6690
6691 }else if((c = filehandle.peek()) == '#') { //ifyou are a nexus file
6692 string holder = "";
6693
6694 // get past comments
6695 while(holder != "translate" && holder != "Translate"){
6696 if (m->getControl_pressed()) { filehandle.close(); return Treenames; }
6697 if(holder == "[" || holder == "[!") { comment = 1; }
6698 if(holder == "]") { comment = 0; }
6699 filehandle >> holder;
6700
6701 //if there is no translate then you must read tree string otherwise use translate to get names
6702 if((holder == "tree") && (comment != 1)){
6703 //pass over the "tree rep.6878900 = "
6704 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
6705
6706 if(c == EOF) { break; }
6707 filehandle.putback(c); //put back first ( of tree.
6708 Treenames = readTreeString(filehandle);
6709
6710 break;
6711 }
6712
6713 if (filehandle.eof()) { break; }
6714 }
6715
6716 //use nexus translation rather than parsing tree to save time
6717 if((holder == "translate") || (holder == "Translate")) {
6718
6719 string number, name, h;
6720 h = ""; // so it enters the loop the first time
6721 while((h != ";") && (number != ";")) {
6722 if (m->getControl_pressed()) { filehandle.close(); return Treenames; }
6723 filehandle >> number;
6724 filehandle >> name;
6725
6726 //c = , until done with translation then c = ;
6727 h = name.substr(name.length()-1, name.length());
6728 name.erase(name.end()-1); //erase the comma
6729 Treenames.push_back(number);
6730 }
6731 if(number == ";") { Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
6732 }
6733 }
6734 filehandle.close();
6735
6736 return Treenames;
6737 }
6738 catch(exception& e) {
6739 m->errorOut(e, "Utils", "parseTreeFile");
6740 exit(1);
6741 }
6742 }
6743 /*******************************************************/
readTreeString(ifstream & filehandle)6744 vector<string> Utils::readTreeString(ifstream& filehandle) {
6745 try {
6746 char c;
6747 string name; //, k
6748 vector<string> Treenames;
6749
6750 while((c = filehandle.peek()) != ';') {
6751 if (m->getControl_pressed()) { return Treenames; }
6752
6753 if(c == ')') {
6754 //to pass over labels in trees
6755 c=filehandle.get();
6756 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
6757 filehandle.putback(c);
6758 }
6759 if(c == ';') { return Treenames; }
6760 if(c == -1) { return Treenames; }
6761 //if you are a name
6762 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
6763 name = "";
6764 c = filehandle.get();
6765 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
6766 name += c;
6767 c = filehandle.get();
6768 }
6769
6770 if (name != "\r" ) { Treenames.push_back(name); }
6771 filehandle.putback(c);
6772 }
6773
6774 if(c == ':') { //read until you reach the end of the branch length
6775 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { c = filehandle.get(); }
6776 filehandle.putback(c);
6777 }
6778 c = filehandle.get();
6779 if(c == ';') { return Treenames; }
6780 if(c == ')') { filehandle.putback(c); }
6781 if (filehandle.eof()) { break; }
6782 }
6783 return Treenames;
6784 }
6785 catch(exception& e) {
6786 m->errorOut(e, "Utils", "readTreeString");
6787 exit(1);
6788 }
6789 }
6790 /*********************************************************************************************/
6791