1 //
2 //  counttable.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 6/26/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8 
9 #include "counttable.h"
10 
11 /************************************************************/
12 //used by tree commands
createTable(set<string> & n,map<string,string> & g,set<string> & gs)13 int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
14     try {
15         hasGroups = false;
16         int numGroups = 0;
17         groups.clear();
18         totalGroups.clear();
19         indexGroupMap.clear();
20         indexNameMap.clear();
21         counts.clear();
22         for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it);  hasGroups = true; }
23         numGroups = groups.size();
24         totalGroups.resize(numGroups, 0);
25 
26 		//sort groups to keep consistent with how we store the groups in groupmap
27         sort(groups.begin(), groups.end());
28         for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
29 
30         uniques = 0;
31         total = 0;
32         bool error = false;
33         //n contains treenames
34         for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
35 
36             if (m->getControl_pressed()) { break; }
37 
38             string seqName = *it;
39 
40             vector<countTableItem> groupCounts;
41             map<string, string>::iterator itGroup = g.find(seqName);
42 
43             if (itGroup != g.end()) {
44                 groupCounts.push_back(countTableItem(1, indexGroupMap[itGroup->second]));
45                 totalGroups[indexGroupMap[itGroup->second]]++;
46             }else {
47                 //look for it in names of groups to see if the user accidently used the wrong file
48                 if (util.inUsersGroups(seqName, groups)) {
49                     m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
50                 }
51                 m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct.\n");
52             }
53 
54             map<string, int>::iterator it2 = indexNameMap.find(seqName);
55             if (it2 == indexNameMap.end()) {
56                 if (hasGroups) { counts.push_back(groupCounts); }
57                 indexNameMap[seqName] = uniques;
58                 totals.push_back(1);
59                 total++;
60                 uniques++;
61             }else {
62                 error = true;
63                 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n");
64             }
65 
66         }
67         if (error) { m->setControl_pressed(true); }
68         else { //check for zero groups
69             if (hasGroups) {
70                 for (int i = 0; i < totalGroups.size(); i++) {
71                     if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
72                 }
73             }
74         }
75         return 0;
76     }
77 	catch(exception& e) {
78 		m->errorOut(e, "CountTable", "createTable");
79 		exit(1);
80 	}
81 }
82 /************************************************************/
testGroups(string file)83 bool CountTable::testGroups(string file) {
84     try {
85         vector<string> nothing;
86         return testGroups(file, nothing);
87     }
88     catch(exception& e) {
89         m->errorOut(e, "CountTable", "testGroups");
90         exit(1);
91     }
92 }
93 
94 /************************************************************/
testGroups(string file,vector<string> & groups)95 bool CountTable::testGroups(string file, vector<string>& groups) {
96     try {
97         m = MothurOut::getInstance(); hasGroups = false; total = 0;
98         ifstream in;
99         util.openInputFile(file, in);
100 
101         string headers = util.getline(in); util.gobble(in);
102 
103         if (headers[0] == '#') { //is this a count file in compressed form
104             isCompressed = true;
105 
106             //read headers
107             headers = util.getline(in); util.gobble(in); //gets compressed group name map line
108             headers = util.getline(in); util.gobble(in);
109         }
110 
111         vector<string> columnHeaders = util.splitWhiteSpace(headers);
112 
113         if (columnHeaders.size() > 2) {
114             hasGroups = true;
115 
116             for (int i = 2; i < columnHeaders.size(); i++) {
117                 groups.push_back(columnHeaders[i]);
118             }
119             //sort groups to keep consistent with how we store the groups in groupmap
120             sort(groups.begin(), groups.end());
121         }
122 
123         return hasGroups;
124     }
125 	catch(exception& e) {
126 		m->errorOut(e, "CountTable", "testGroups");
127 		exit(1);
128 	}
129 }
130 
131 /************************************************************/
132 
setNamesOfGroups(vector<string> mygroups)133 bool CountTable::setNamesOfGroups(vector<string> mygroups) {
134     try {
135         //remove groups from table not in new groups we are setting
136         for (int i = 0; i < groups.size();) {
137             if (util.inUsersGroups(groups[i], mygroups)) { ++i; }
138             else { removeGroup(groups[i]);  }
139         }
140 
141         //add any new groups in new groups list to table
142         for (int i = 0; i < mygroups.size(); i++) {
143             if (util.inUsersGroups(mygroups[i], groups)) {}
144             else { addGroup(mygroups[i]);  }
145         }
146 
147         //false if error
148         return (!m->getControl_pressed());
149     }
150     catch(exception& e) {
151         m->errorOut(e, "CountTable", "setNamesOfGroups");
152         exit(1);
153     }
154 }
155 
156 /************************************************************/
157 
createTable(string namefile,string groupfile,vector<string> selectedGroups,bool createGroup)158 int CountTable::createTable(string namefile, string groupfile, vector<string> selectedGroups, bool createGroup) {
159     try {
160 
161         GroupMap* groupMap;
162         int numGroups = 0;
163         groups.clear();
164         totalGroups.clear();
165         indexGroupMap.clear();
166         indexNameMap.clear();
167         counts.clear();
168         map<int, string> originalGroupIndexes;
169         uniques = 0;
170         total = 0;
171         bool error = false;
172         bool pickedGroups = false;
173         if (selectedGroups.size() != 0) { pickedGroups = true; }
174 
175         if (groupfile != "") {
176             hasGroups = true;
177             groupMap = new GroupMap(groupfile); groupMap->readMap(selectedGroups);
178             numGroups = groupMap->getNumGroups();
179             groups = groupMap->getNamesOfGroups();
180             totalGroups.resize(numGroups, 0);
181         }else if(createGroup) {
182             hasGroups = true;
183             numGroups = 1;
184             groups.push_back("Group1");
185             totalGroups.resize(numGroups, 0);
186         }
187 
188         //sort groups to keep consistent with how we store the groups in groupmap
189         sort(groups.begin(), groups.end());
190         for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
191 
192         if ((namefile == "") && (groupfile == "")) { m->mothurOut("[ERROR]: No name or group file given. You must provide a name or group file to create a count file, please correct.\n");  m->setControl_pressed(true); return 0; }
193 
194         else if (namefile != "") {
195 
196             ifstream in; util.openInputFile(namefile, in);
197 
198             while (!in.eof()) {
199                 if (m->getControl_pressed()) { break; }
200 
201                 string firstCol, secondCol;
202                 in >> firstCol; util.gobble(in); in >> secondCol; util.gobble(in);
203 
204                 util.checkName(firstCol);
205                 util.checkName(secondCol);
206 
207                 vector<string> names;
208                 util.splitAtChar(secondCol, names, ',');
209 
210                 map<string, int> groupCounts;
211                 for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0;  } //initialize groupCounts
212 
213                 int thisTotal = 0;
214                 if (groupfile != "") {
215 
216                     //get counts for each of the users groups
217                     for (int i = 0; i < names.size(); i++) {
218                         string group = groupMap->getGroup(names[i]);
219 
220                         if (group == "not found") {
221                             if (!pickedGroups) { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct.\n");  error=true; }
222                             //else - ignore because we assume this read is from a group we are not interested in
223                         }else { //this is a read from a group we want to save
224                             map<string, int>::iterator it = groupCounts.find(group);
225 
226                             //if not found, then this sequence is not from a group we care about
227                             if (it != groupCounts.end()) { it->second++; }
228                             thisTotal++;
229                         }
230                     }
231                 }else if (createGroup) {
232                     thisTotal = names.size();
233                     groupCounts["Group1"] = thisTotal;
234                 }else { thisTotal = names.size();  }
235 
236                 //if group info, then read it
237                 vector<countTableItem> thisGroupsCount;
238                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
239                     int groupIndex = indexGroupMap[it->first];
240                     int abund = it->second;
241                     if (abund != 0) {
242                         countTableItem thisAbund(it->second, groupIndex);
243                         thisGroupsCount.push_back(thisAbund);
244                         totalGroups[groupIndex] += abund;
245                     }
246 
247                 }
248 
249                 map<string, int>::iterator it = indexNameMap.find(firstCol);
250                 if (it == indexNameMap.end()) {
251 
252                     if (hasGroups) {  counts.push_back(thisGroupsCount);  }
253                     indexNameMap[firstCol] = uniques;
254                     totals.push_back(thisTotal);
255                     total += thisTotal;
256                     uniques++;
257 
258                 }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct.\n"); }
259             }
260             in.close();
261 
262         }else if ((namefile == "") && (groupfile != "")) { //create count file from group only
263 
264             vector<string> names = groupMap->getNamesSeqs(); //only contains names from selectedGroups or all groups if selectedGroups is empty
265 
266             for (int i = 0; i < names.size(); i++) {
267                if (m->getControl_pressed()) { break; }
268 
269                 vector<countTableItem> abunds;
270                 string group = groupMap->getGroup(names[i]);
271                 int groupIndex = indexGroupMap[group];
272                 totalGroups[groupIndex]++;
273                 countTableItem thisAbund(1, groupIndex);
274                 abunds.push_back(thisAbund);
275 
276                 map<string, int>::iterator it = indexNameMap.find(names[i]);
277                 if (it == indexNameMap.end()) {
278 
279                     counts.push_back(abunds);
280                     indexNameMap[names[i]] = uniques;
281                     totals.push_back(1);
282                     total++;
283                     uniques++;
284 
285                 }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + names[i] + ", sequence names must be unique. Please correct.\n"); }
286             }
287         }
288 
289         if (error) { m->setControl_pressed(true); }
290         else { //check for zero groups
291             if (hasGroups) {
292                 for (int i = 0; i < totalGroups.size(); i++) {
293                     if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
294                 }
295             }
296         }
297         if (groupfile != "") { delete groupMap; }
298 
299         return total;
300     }
301 	catch(exception& e) {
302 		m->errorOut(e, "CountTable", "createTable");
303 		exit(1);
304 	}
305 }
306 /************************************************************/
readTable(string file,string format)307 int CountTable::readTable(string file, string format) {
308     try {
309         if (format == "fasta") {
310             filename = file;
311             ifstream in;
312             util.openInputFile(filename, in);
313 
314             hasGroups = false;
315             groups.clear();
316             totalGroups.clear();
317             indexGroupMap.clear();
318             indexNameMap.clear();
319             counts.clear();
320             bool error = false;
321             uniques = 0;
322             total = 0;
323             while (!in.eof()) {
324 
325                 if (m->getControl_pressed()) { break; }
326 
327                 Sequence seq(in); util.gobble(in);
328                 string name = seq.getName();
329                 if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(1) + "\n"); }
330 
331                 map<string, int>::iterator it = indexNameMap.find(name);
332                 if (it == indexNameMap.end()) {
333                     indexNameMap[name] = uniques;
334                     totals.push_back(1);
335                     total ++;
336                     uniques++;
337                 }else {
338                     error = true;
339                     m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n");
340                 }
341             }
342             in.close();
343 
344             if (error) { m->setControl_pressed(true); }
345         }else { m->mothurOut("[ERROR]: Unsupported format: " + format + ", please correct.\n"); m->setControl_pressed(true);  }
346 
347         return total;
348     }
349     catch(exception& e) {
350         m->errorOut(e, "CountTable", "readTable");
351         exit(1);
352     }
353 }
354 /************************************************************/
readTable(string file,bool readGroups,bool mothurRunning)355 int CountTable::readTable(string file, bool readGroups, bool mothurRunning) {
356     try {
357 
358         readTable(file, readGroups, mothurRunning, nullVector);
359 
360         return total;
361     }
362     catch(exception& e) {
363         m->errorOut(e, "CountTable", "readTable");
364         exit(1);
365     }
366 }
367 /************************************************************/
readTable(ifstream & in,bool readGroups,bool mothurRunning)368 int CountTable::readTable(ifstream& in, bool readGroups, bool mothurRunning) {
369     try {
370         readTable(in, readGroups, mothurRunning, nullVector);
371         return total;
372     }
373     catch(exception& e) {
374         m->errorOut(e, "CountTable", "readTable");
375         exit(1);
376     }
377 }
378 /************************************************************/
isCountTable(string file)379 bool CountTable::isCountTable(string file) {
380     try {
381 
382         filename = file;
383         ifstream in;
384         util.openInputFile(filename, in);
385 
386         string headers = util.getline(in); util.gobble(in);
387 
388         if (headers[0] == '#') { //is this a count file in compressed form
389             isCompressed = true;
390 
391             //read headers
392             headers = util.getline(in); util.gobble(in); //gets compressed group name map line
393             headers = util.getline(in); util.gobble(in);
394         }
395         vector<string> columnHeaders = util.splitWhiteSpace(headers);
396         in.close();
397 
398         bool isCount = true;
399         if (columnHeaders.size() >= 2) {
400             vector<string> defaultHeaders = getHardCodedHeaders();
401             if (defaultHeaders.size() >= 2) {
402                 if ((columnHeaders[0] != defaultHeaders[0]) && (columnHeaders[0] != "OTU_Label")) { isCount = false; }
403                 if (columnHeaders[1] != defaultHeaders[1]) { isCount = false; }
404             }else { isCount = false; }
405         }else { isCount = false; }
406 
407         return isCount;
408 
409     }
410     catch(exception& e) {
411         m->errorOut(e, "CountTable", "isCountTable");
412         exit(1);
413     }
414 }
415 /************************************************************/
readTable(string file,bool readGroups,bool mothurRunning,vector<string> selectedGroups)416 int CountTable::readTable(string file, bool readGroups, bool mothurRunning, vector<string> selectedGroups) {
417     try {
418         filename = file;
419         ifstream in;
420         util.openInputFile(filename, in);
421 
422         readTable(in, readGroups, mothurRunning, selectedGroups);
423 
424         in.close();
425 
426         return 0;
427     }
428     catch(exception& e) {
429         m->errorOut(e, "CountTable", "readTable");
430         exit(1);
431     }
432 }
433 /************************************************************/
readTable(ifstream & in,bool readGroups,bool mothurRunning,vector<string> selectedGroups)434 int CountTable::readTable(ifstream& in, bool readGroups, bool mothurRunning, vector<string> selectedGroups) {
435     try {
436         if (!readGroups) { selectedGroups.clear(); }
437 
438         string headers = util.getline(in); util.gobble(in);
439 
440         map<string, int> headerIndex2Group;
441         //#1,F003D000	2,F003D002	3,F003D004	4,F003D006	5,F003D008	6,F003D142	7,F003D144	8,F003D146	9,F003D148	10,F003D150
442         if (headers[0] == '#') { //is this a count file in compressed form
443             isCompressed = true;
444 
445             //read headers
446             headers = util.getline(in); util.gobble(in); //gets compressed group name map line
447             headers = headers.substr(1);
448 
449             vector<string> groupNameHeaders = util.splitWhiteSpace(headers);
450 
451             for (int i = 0; i < groupNameHeaders.size(); i++) {
452                 string groupIndex = ""; string groupName = groupNameHeaders[i];
453                 util.splitAtComma(groupIndex, groupName);
454                 int a; util.mothurConvert(groupIndex, a);
455                 headerIndex2Group[groupName] = a-1;
456             }
457 
458             headers = util.getline(in); util.gobble(in);
459         }
460 
461         vector<string> columnHeaders = util.splitWhiteSpace(headers);
462 
463         int numGroupsInFile = 0;
464         groups.clear();
465         totalGroups.clear();
466         indexGroupMap.clear();
467         indexNameMap.clear();
468         counts.clear();
469         map<int, string> originalGroupIndexes;
470         if ((columnHeaders.size() > 2) && readGroups) { hasGroups = true; numGroupsInFile = columnHeaders.size() - 2;  }
471 
472         set<string> setOfSelectedGroups;
473         if (readGroups) {
474             for (int i = 2; i < columnHeaders.size(); i++) {
475                 bool saveGroup = true;
476                 if (selectedGroups.size() != 0) {
477                     if (!(util.inUsersGroups(columnHeaders[i], selectedGroups))) { saveGroup = false; }
478                 } //is this group in selected groups
479 
480                 if (saveGroup) {
481                     groups.push_back(columnHeaders[i]);
482                     if (isCompressed) {
483                         map<string, int>::iterator it = headerIndex2Group.find(columnHeaders[i]);
484                         if (it != headerIndex2Group.end()) {
485                             originalGroupIndexes[it->second] = columnHeaders[i];
486                         }
487                     }
488                     else {  originalGroupIndexes[i-2] = columnHeaders[i];  }
489                     totalGroups.push_back(0);
490                     setOfSelectedGroups.insert(columnHeaders[i]);
491                 }
492             }
493         }
494 
495         //sort groups to keep consistent with how we store the groups in groupmap
496         sort(groups.begin(), groups.end());
497         for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
498         int numGroupsSelected = groups.size();
499 
500         bool error = false;
501         string name;
502         int thisTotal = 0;
503         uniques = 0;
504         total = 0;
505         while (!in.eof()) {
506 
507             if (m->getControl_pressed()) { break; }
508 
509             in >> name; util.gobble(in); in >> thisTotal; util.gobble(in);
510             if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
511 
512             if ((thisTotal == 0) && !mothurRunning) { error=true; m->mothurOut("[ERROR]: Your count table contains a sequence named " + name + " with a total=0. Please correct.\n");
513             }
514 
515             //if group info, then read it
516             vector<int> groupCounts; groupCounts.resize(numGroupsSelected, 0);
517             if (columnHeaders.size() > 2) { //file contains groups
518                 if (readGroups) { //user wants to save them
519                     if (selectedGroups.size() != 0) {
520                         //read this seqs groups abundances
521                         thisTotal = 0;
522                         if (isCompressed) {
523                             string groupInfo = util.getline(in); util.gobble(in);
524                             vector<string> groupNodes = util.splitWhiteSpace(groupInfo);
525 
526                             vector<countTableItem> abunds;
527                             for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count
528                                 string abund = groupNodes[i]; string thisgroup = "";
529                                 util.splitAtComma(thisgroup, abund);
530                                 int a; util.mothurConvert(abund, a);
531                                 int g; util.mothurConvert(thisgroup, g); g--;
532                                 string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted
533 
534                                 if (setOfSelectedGroups.count(groupName) != 0) { //we selected this group
535                                     int thisIndex = indexGroupMap[groupName];
536                                     countTableItem item(a, thisIndex);
537                                     abunds.push_back(item);
538                                     totalGroups[thisIndex] += a;
539                                     thisTotal += a;
540                                 }
541                             }
542 
543                             groupCounts = expandAbunds(abunds);
544                         }else {
545                             for (int i = 0; i < numGroupsInFile; i++) {
546                                 int thisGroupAbund = 0;
547                                 in >> thisGroupAbund; util.gobble(in);
548                                 string groupName = originalGroupIndexes[i]; //order of groups in file may not be sorted
549 
550                                 if (setOfSelectedGroups.count(groupName) != 0) { //we selected this group
551                                     int thisIndex = indexGroupMap[groupName];
552                                     groupCounts[thisIndex] = thisGroupAbund;
553                                     totalGroups[thisIndex] += thisGroupAbund;
554                                     thisTotal += thisGroupAbund;
555                                 }
556                             }
557                         }
558                     }else {
559 
560                             if (isCompressed) {
561                                 string groupInfo = util.getline(in); util.gobble(in);
562                                 vector<string> groupNodes = util.splitWhiteSpace(groupInfo);
563 
564                                 vector<countTableItem> abunds;
565                                 for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count
566                                     string abund = groupNodes[i]; string thisgroup = "";
567                                     util.splitAtComma(thisgroup, abund);
568                                     int a; util.mothurConvert(abund, a);
569                                     int g; util.mothurConvert(thisgroup, g); g--;
570                                     string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted
571                                     int thisIndex = indexGroupMap[groupName];
572                                     countTableItem item(a, thisIndex);
573 
574                                     abunds.push_back(item);
575                                     totalGroups[thisIndex] += a;
576                                 }
577 
578                                 groupCounts = expandAbunds(abunds);
579                             }
580                             else {
581                                 for (int i = 0; i < numGroupsInFile; i++) {
582                                     int thisIndex = indexGroupMap[originalGroupIndexes[i]];
583                                     in >> groupCounts[thisIndex]; util.gobble(in);
584                                     totalGroups[thisIndex] += groupCounts[thisIndex];
585                                 }
586                             }
587 
588                     }
589                 }else { //read and discard
590                     util.getline(in); util.gobble(in);
591                 }
592             }
593 
594             map<string, int>::iterator it = indexNameMap.find(name);
595             if (it == indexNameMap.end()) {
596                 bool saveSeq = true;
597                 if (hasGroups && readGroups) {
598                     vector<countTableItem> thisGroupsCount = compressAbunds(groupCounts);
599                     if (thisGroupsCount.size() == 0) {  saveSeq = false; }
600                     else { counts.push_back(thisGroupsCount); }
601                 }
602                 if (saveSeq) {
603                     indexNameMap[name] = uniques;
604                     totals.push_back(thisTotal);
605                     total += thisTotal;
606                     uniques++;
607                 }
608             }else {
609                 error = true;
610                 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n");
611             }
612         }
613 
614         if (error) { m->setControl_pressed(true); }
615         else { //check for zero groups
616             if (hasGroups && readGroups) {
617                 for (int i = 0; i < totalGroups.size(); i++) {
618                     if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n");
619                         removeGroup(groups[i]);
620                         i--;
621                     }
622                 }
623             }
624         }
625 
626         //if the file has groups, but we didn't read them
627         if (!readGroups) { hasGroups = false; }
628 
629         return 0;
630     }
631 	catch(exception& e) {
632 		m->errorOut(e, "CountTable", "readTable");
633 		exit(1);
634 	}
635 }
636 /************************************************************/
readTable(string file,bool readGroups,bool mothurRunning,set<string> selectedSeqs)637 int CountTable::readTable(string file, bool readGroups, bool mothurRunning, set<string> selectedSeqs) {
638     try {
639         filename = file;
640         ifstream in;
641         util.openInputFile(filename, in);
642 
643         string headers = util.getline(in); util.gobble(in);
644 
645         map<string, int> headerIndex2Group;
646         //#1,F003D000	2,F003D002	3,F003D004	4,F003D006	5,F003D008	6,F003D142	7,F003D144	8,F003D146	9,F003D148	10,F003D150
647         if (headers[0] == '#') { //is this a count file in compressed form
648             isCompressed = true;
649 
650             //read headers
651             headers = util.getline(in); util.gobble(in); //gets compressed group name map line
652             headers = headers.substr(1);
653 
654             vector<string> groupNameHeaders = util.splitWhiteSpace(headers);
655 
656             for (int i = 0; i < groupNameHeaders.size(); i++) {
657                 string groupIndex = ""; string groupName = groupNameHeaders[i];
658                 util.splitAtComma(groupIndex, groupName);
659                 int a; util.mothurConvert(groupIndex, a);
660                 headerIndex2Group[groupName] = a-1;
661             }
662 
663             headers = util.getline(in); util.gobble(in);
664         }
665 
666         vector<string> columnHeaders = util.splitWhiteSpace(headers);
667 
668         int numGroupsInFile = 0;
669         groups.clear();
670         totalGroups.clear();
671         indexGroupMap.clear();
672         indexNameMap.clear();
673         counts.clear();
674         map<int, string> originalGroupIndexes;
675         if ((columnHeaders.size() > 2) && readGroups) { hasGroups = true; numGroupsInFile = columnHeaders.size() - 2;  }
676 
677 
678         if (readGroups) {
679             for (int i = 2; i < columnHeaders.size(); i++) {
680                 groups.push_back(columnHeaders[i]);
681 
682                 if (isCompressed) {
683                     map<string, int>::iterator it = headerIndex2Group.find(columnHeaders[i]);
684                     if (it != headerIndex2Group.end()) {
685                         originalGroupIndexes[it->second] = columnHeaders[i];
686                     }
687                 }else { originalGroupIndexes[i-2] = columnHeaders[i];  }
688                 totalGroups.push_back(0);
689             }
690         }
691 
692         //sort groups to keep consistent with how we store the groups in groupmap
693         sort(groups.begin(), groups.end());
694         for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
695         int numGroups = groups.size();
696 
697         bool error = false;
698         string name;
699         int thisTotal;
700         uniques = 0;
701         total = 0;
702         while (!in.eof()) {
703 
704             if (m->getControl_pressed()) { break; }
705 
706             in >> name; util.gobble(in); in >> thisTotal; util.gobble(in);
707             if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
708 
709             if ((thisTotal == 0) && !mothurRunning) { error=true; m->mothurOut("[ERROR]: Your count table contains a sequence named " + name + " with a total=0. Please correct.\n");
710             }
711 
712             vector<int> groupCounts; groupCounts.resize(numGroups, 0);
713             if (columnHeaders.size() > 2) { //file contains groups
714                 if (readGroups) { //user wants to save them
715                     if (isCompressed) {
716                         string groupInfo = util.getline(in); util.gobble(in);
717                         vector<string> groupNodes = util.splitWhiteSpace(groupInfo);
718 
719                         vector<countTableItem> abunds;
720                         for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count
721                             string abund = groupNodes[i]; string thisgroup = "";
722                             util.splitAtComma(thisgroup, abund);
723                             int a; util.mothurConvert(abund, a);
724                             int g; util.mothurConvert(thisgroup, g); g--;
725                             string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted
726                             int thisIndex = indexGroupMap[groupName];
727                             countTableItem item(a, thisIndex);
728 
729                             abunds.push_back(item);
730                             totalGroups[thisIndex] += a;
731                         }
732 
733                         groupCounts = expandAbunds(abunds);
734                     }
735                     else {
736                         for (int i = 0; i < numGroupsInFile; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; util.gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; }
737                     }
738                 }else { util.getline(in); util.gobble(in); }//read and discard
739             }
740 
741             map<string, int>::iterator it = indexNameMap.find(name);
742             if (it == indexNameMap.end()) {
743                 bool saveSeq = true;
744                 if (selectedSeqs.count(name) == 0) { //don't save
745                     saveSeq = false;
746                 }
747                 if (saveSeq) {
748                     if (hasGroups && readGroups) {
749                         vector<countTableItem> thisGroupsCount = compressAbunds(groupCounts);
750                         counts.push_back(thisGroupsCount);
751                     }
752                     indexNameMap[name] = uniques;
753                     totals.push_back(thisTotal);
754                     total += thisTotal;
755                     uniques++;
756                 }
757             }else {
758                 error = true;
759                 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n");
760             }
761         }
762         in.close();
763 
764         if (error) { m->setControl_pressed(true); }
765         else { //check for zero groups
766             if (hasGroups && readGroups) {
767                 for (int i = 0; i < totalGroups.size(); i++) {
768                     if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
769                 }
770             }
771         }
772 
773         //if the file has groups, but we didn't read them
774          if (!readGroups) { hasGroups = false; }
775 
776         return 0;
777     }
778     catch(exception& e) {
779         m->errorOut(e, "CountTable", "readTable");
780         exit(1);
781     }
782 }
783 /************************************************************/
784 
zeroOutTable()785 int CountTable::zeroOutTable() {
786   try {
787 
788 		for(int i=0;i<counts.size();i++){
789 			for(int j=0;j<counts[0].size();j++){
790                 counts[j].clear();
791 			}
792 		}
793 
794 		totals.assign(totals.size(), 0);
795 
796 		return 0;
797 	}
798 	catch(exception& e) {
799 		m->errorOut(e, "CountTable", "zeroOutTable");
800 		exit(1);
801 	}
802 }
803 /************************************************************/
804 
clearTable()805 int CountTable::clearTable() {
806     try {
807         hasGroups = false;
808         total = 0;
809         uniques = 0;
810         groups.clear();
811         counts.clear();
812         totals.clear();
813         totalGroups.clear();
814         indexNameMap.clear();
815         indexGroupMap.clear();
816 
817         return 0;
818     }
819     catch(exception& e) {
820         m->errorOut(e, "CountTable", "clearTable");
821         exit(1);
822     }
823 }
824 /************************************************************/
825 //zeroed reads are not printed
printTable(string file)826 vector<string> CountTable::printTable(string file) {
827     try {
828 
829         //remove group if all reads are removed
830         for (int i = 0; i < totalGroups.size(); i++) {
831             if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
832         }
833 
834         if (isCompressed) { return printCompressedTable(file); }
835 
836         ofstream out;
837         util.openOutputFile(file, out);
838 
839         vector<string> namesInTable;
840 
841         if (total != 0) {
842             printHeaders(out);
843 
844             map<int, string> reverse; //use this to preserve order
845             for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
846 
847             for (int i = 0; i < totals.size(); i++) {
848                 if (totals[i] != 0) {
849 
850                     map<int, string>::iterator itR = reverse.find(i);
851 
852                     if (itR != reverse.end()) {
853 
854                         namesInTable.push_back(itR->second);
855 
856                         out << itR->second << '\t' << totals[i];
857 
858                         if (hasGroups) { printGroupAbunds(out, i); }
859 
860                         out << endl;
861                     }
862                 }
863             }
864         }
865         out.close();
866         return namesInTable;
867     }
868 	catch(exception& e) {
869 		m->errorOut(e, "CountTable", "printTable");
870 		exit(1);
871 	}
872 }
873 /************************************************************/
874 //zeroed reads are not printed
printNoGroupsTable(string file)875 vector<string> CountTable::printNoGroupsTable(string file) {
876     try {
877 
878         ofstream out;
879         util.openOutputFile(file, out);
880 
881         vector<string> namesInTable;
882 
883         if (total != 0) {
884             vector<string> headers = getHardCodedHeaders();
885             out << headers[0] << '\t' << headers[1] << endl;
886 
887             map<int, string> reverse; //use this to preserve order
888             for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
889 
890             for (int i = 0; i < totals.size(); i++) {
891                 if (totals[i] != 0) {
892 
893                     map<int, string>::iterator itR = reverse.find(i);
894 
895                     if (itR != reverse.end()) {
896 
897                         namesInTable.push_back(itR->second);
898 
899                         out << itR->second << '\t' << totals[i] << endl;
900                     }
901                 }
902             }
903         }
904         out.close();
905         return namesInTable;
906     }
907     catch(exception& e) {
908         m->errorOut(e, "CountTable", "printTable");
909         exit(1);
910     }
911 }
912 /************************************************************/
913 //zeroed reads are not printed
printTable(string file,bool compressedFormat)914 vector<string> CountTable::printTable(string file, bool compressedFormat) {
915     try {
916 
917         //remove group if all reads are removed
918         for (int i = 0; i < totalGroups.size(); i++) {
919             if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
920         }
921 
922         if (compressedFormat) { return printCompressedTable(file); }
923 
924         ofstream out;
925         util.openOutputFile(file, out);
926 
927         vector<string> namesInTable;
928 
929         if (total != 0) {
930             printHeaders(out);
931 
932             map<int, string> reverse; //use this to preserve order
933             for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
934 
935             for (int i = 0; i < totals.size(); i++) {
936 
937                 if (totals[i] != 0) {
938 
939                     map<int, string>::iterator itR = reverse.find(i);
940 
941                     if (itR != reverse.end()) {
942                         namesInTable.push_back(itR->second);
943 
944                         out << itR->second << '\t' << totals[i];
945 
946                         if (hasGroups) { printGroupAbunds(out, i); }
947 
948                         out << endl;
949                     }
950                 }
951             }
952         }
953         out.close();
954         return namesInTable;
955     }
956     catch(exception& e) {
957         m->errorOut(e, "CountTable", "printTable");
958         exit(1);
959     }
960 }
961 /************************************************************/
962 //zeroed seqs are not printed
printCompressedTable(string file,vector<string> groupsToPrint)963 vector<string> CountTable::printCompressedTable(string file, vector<string> groupsToPrint) {
964     try {
965         ofstream out;
966         util.openOutputFile(file, out);
967 
968         vector<string> namesInTable;
969 
970         bool pickedGroups = false;
971         set<int> selectedGroupsIndicies;
972         if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups
973 
974         if (total != 0) {
975             if (hasGroups) {
976 
977                 map<int, string> reverse;
978                 for (map<string, int>::iterator it = indexGroupMap.begin(); it !=indexGroupMap.end(); it++) { reverse[it->second] = it->first; }
979 
980                 map<int, string>::iterator it = reverse.begin();
981                 string group1Name = it->second;
982                 if (pickedGroups) { //find selected groups indicies
983                     for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
984                         if (util.inUsersGroups(it->second, groupsToPrint)) { group1Name = it->second; break; }
985                     }
986                 }
987 
988                 out << "#Compressed Format: groupIndex,abundance. For example 1,6 would mean the read has an abundance of 6 for group " + group1Name + "." << endl;
989                 out << "#";
990 
991                 for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
992                     if (pickedGroups) { //find selected groups indicies
993                         if (util.inUsersGroups(it->second, groupsToPrint)) {
994                             selectedGroupsIndicies.insert(it->first);
995 
996                             out << it->first+1 << "," << it->second << "\t";
997                         }
998                     }else { out << it->first+1 << "," << it->second << "\t"; }
999                 }
1000                 out << endl;
1001             }
1002 
1003             printHeaders(out, groupsToPrint);
1004 
1005             map<int, string> reverse; //use this to preserve order
1006             for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
1007 
1008             for (int i = 0; i < totals.size(); i++) {
1009                 if (totals[i] != 0) {
1010                     if (pickedGroups) {
1011                         string groupOutput = "";
1012                         long long thisTotal = 0;
1013                         for (int j = 0; j < counts[i].size(); j++) {
1014                             if (selectedGroupsIndicies.count(counts[i][j].group) != 0) { //this is a group we want
1015                                 groupOutput += '\t' + toString(counts[i][j].group+1) + ',' + toString(counts[i][j].abund);
1016                                 thisTotal += counts[i][j].abund;
1017                             }
1018                         }
1019 
1020                         if (thisTotal != 0) {
1021                             map<int, string>::iterator itR = reverse.find(i);
1022 
1023                             if (itR != reverse.end()) {
1024                                 namesInTable.push_back(itR->second);
1025 
1026                                 out << itR->second << '\t' << thisTotal << groupOutput << endl;
1027                             }
1028                         }
1029                     }
1030                     else {
1031                         map<int, string>::iterator itR = reverse.find(i);
1032 
1033                         if (itR != reverse.end()) {
1034                             namesInTable.push_back(itR->second);
1035 
1036                             out << itR->second << '\t' << totals[i];
1037                             if (hasGroups) {
1038                                 for (int j = 0; j < counts[i].size(); j++) {
1039                                     out  << '\t' << counts[i][j].group+1 << ',' << counts[i][j].abund;
1040                                 }
1041                             }
1042                             out << endl;
1043                         }
1044                     }
1045                 }
1046             }
1047         }
1048         out.close();
1049 
1050         return namesInTable;
1051     }
1052     catch(exception& e) {
1053         m->errorOut(e, "CountTable", "printCompressedTable");
1054         exit(1);
1055     }
1056 }
1057 /************************************************************/
1058 //returns index of countTableItem for group passed in. If group is not present in seq, returns index of next group or -1
find(int seq,int group,bool returnNext)1059 int CountTable::find(int seq, int group, bool returnNext) {
1060     try {
1061 
1062         //if (!returnNext) { return find(seq, group); }
1063         int index = -1;
1064 
1065         for (int i = 0; i < counts[seq].size(); i++) {
1066             if (counts[seq][i].group >= group) { //found it or done looking
1067 
1068                 if (counts[seq][i].group == group) { index = i;  }
1069                 break;
1070             }
1071         }
1072 
1073         return index;
1074     }
1075     catch(exception& e) {
1076         m->errorOut(e, "CountTable", "find");
1077         exit(1);
1078     }
1079 }/************************************************************/
1080 //returns abundance of countTableItem for seq and group passed in. If group is not present in seq, returns 0
getAbund(int seq,int group)1081 int CountTable::getAbund(int seq, int group) {
1082     try {
1083         int index = find(seq, group, false);
1084 
1085         if (index != -1) { //this seq has a non zero abundance for this group
1086             return counts[seq][index].abund;
1087         }
1088 
1089         return 0;
1090     }
1091     catch(exception& e) {
1092         m->errorOut(e, "CountTable", "getAbund");
1093         exit(1);
1094     }
1095 }
1096 /************************************************************/
expandAbunds(vector<countTableItem> & items)1097 vector<int> CountTable::expandAbunds(vector<countTableItem>& items) {
1098     try {
1099         vector<int> abunds; abunds.resize(groups.size(), 0); //prefill with 0's
1100 
1101         for (int i = 0; i < items.size(); i++) { //for each non zero entry
1102             abunds[items[i].group] = items[i].abund; //set abund for group
1103         }
1104 
1105         return abunds;
1106     }
1107     catch(exception& e) {
1108         m->errorOut(e, "CountTable", "expandAbunds");
1109         exit(1);
1110     }
1111 }
1112 /************************************************************/
expandAbunds(int index)1113 vector<int> CountTable::expandAbunds(int index) {
1114     try {
1115         vector<int> abunds; abunds.resize(groups.size(), 0); //prefill with 0's
1116 
1117 
1118         for (int i = 0; i < counts[index].size(); i++) { //for each non zero entry
1119             abunds[counts[index][i].group] = counts[index][i].abund; //set abund for group
1120         }
1121 
1122         return abunds;
1123     }
1124     catch(exception& e) {
1125         m->errorOut(e, "CountTable", "expandAbunds");
1126         exit(1);
1127     }
1128 }
1129 /************************************************************/
1130 //assumes same order as groups
compressAbunds(vector<int> abunds)1131 vector<countTableItem> CountTable::compressAbunds(vector<int> abunds) {
1132     try {
1133         vector<countTableItem> row;
1134 
1135         for (int i = 0; i < abunds.size(); i++) {
1136             if (abunds[i] != 0) {
1137                 countTableItem thisAbund(abunds[i], i);
1138                 row.push_back(thisAbund);
1139             }
1140         }
1141 
1142         return row;
1143     }
1144     catch(exception& e) {
1145         m->errorOut(e, "CountTable", "compressAbunds");
1146         exit(1);
1147     }
1148 }
1149 /************************************************************/
printGroupAbunds(ofstream & out,int index)1150 void CountTable::printGroupAbunds(ofstream& out, int index) {
1151     try {
1152 
1153         vector<int> abunds = expandAbunds(index);
1154 
1155         for (int i = 0; i < abunds.size(); i++) { out << '\t' << abunds[i]; }
1156     }
1157     catch(exception& e) {
1158         m->errorOut(e, "CountTable", "printGroupAbunds");
1159         exit(1);
1160     }
1161 }
1162 /************************************************************/
printSortedTable(string file)1163 vector<string> CountTable::printSortedTable(string file) {
1164     try {
1165         //remove group if all reads are removed
1166         for (int i = 0; i < totalGroups.size(); i++) {
1167             if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
1168         }
1169 
1170         ofstream out;
1171         util.openOutputFile(file, out);
1172         printHeaders(out);
1173 
1174         vector<string> namesInTable;
1175 
1176         for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) {
1177             string seqName = it->first;
1178             int index = it->second;
1179 
1180             if (totals[index] != 0) {
1181                 namesInTable.push_back(seqName);
1182 
1183                 out << seqName << '\t' << totals[index];
1184                 if (hasGroups) {
1185                     printGroupAbunds(out, index);
1186                 }
1187                 out << endl;
1188             }
1189         }
1190         out.close();
1191 
1192         return namesInTable;
1193     }
1194     catch(exception& e) {
1195         m->errorOut(e, "CountTable", "printSortedTable");
1196         exit(1);
1197     }
1198 }
1199 
1200 /************************************************************/
getHardCodedHeaders()1201 vector<string> CountTable::getHardCodedHeaders() {
1202     try {
1203         vector<string> headers; headers.push_back("Representative_Sequence"); headers.push_back("total");
1204         return headers;
1205     }
1206     catch(exception& e) {
1207         m->errorOut(e, "CountTable", "printHeaders");
1208         exit(1);
1209     }
1210 }
1211 /************************************************************/
printHeaders(ofstream & out,vector<string> selectedGroups)1212 int CountTable::printHeaders(ofstream& out, vector<string> selectedGroups) {
1213     try {
1214         //remove group if all reads are removed
1215         for (int i = 0; i < totalGroups.size(); i++) {
1216             if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
1217         }
1218 
1219         bool pickedGroups = false;
1220         if (selectedGroups.size() != 0) { pickedGroups = true; }
1221 
1222         out << "Representative_Sequence\ttotal";
1223         if (hasGroups) {
1224             for (int i = 0; i < groups.size(); i++) {
1225                 if (pickedGroups) {
1226                     if (util.inUsersGroups(groups[i], selectedGroups)) {  out << '\t' << groups[i]; }
1227                 }
1228                 else { out << '\t' << groups[i]; }
1229             }
1230         }
1231         out << endl;
1232         return 0;
1233     }
1234 	catch(exception& e) {
1235 		m->errorOut(e, "CountTable", "printHeaders");
1236 		exit(1);
1237 	}
1238 }
1239 /************************************************************/
printSeq(ofstream & out,string seqName)1240 int CountTable::printSeq(ofstream& out, string seqName) {
1241     try {
1242 		map<string, int>::iterator it = indexNameMap.find(seqName);
1243         if (it == indexNameMap.end()) {
1244             m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1245         }else {
1246             if (totals[it->second] != 0) {
1247                 out << it->first << '\t' << totals[it->second];
1248 
1249                 if (hasGroups) { printGroupAbunds(out, it->second); }
1250 
1251                 out << endl;
1252             }
1253         }
1254         return 0;
1255     }
1256 	catch(exception& e) {
1257 		m->errorOut(e, "CountTable", "printSeq");
1258 		exit(1);
1259 	}
1260 }
1261 
1262 /************************************************************/
1263 //group counts for a seq
getGroupCounts(string seqName)1264 vector<int> CountTable::getGroupCounts(string seqName) {
1265     try {
1266         vector<countTableItem> temp = getItems(seqName);
1267         return (expandAbunds(temp));
1268 
1269     }
1270 	catch(exception& e) {
1271 		m->errorOut(e, "CountTable", "getGroupCounts");
1272 		exit(1);
1273 	}
1274 }
1275 /************************************************************/
1276 //group counts for a seq
getItems(string seqName)1277 vector<countTableItem> CountTable::getItems(string seqName) {
1278     try {
1279         vector<countTableItem> temp;
1280         if (hasGroups) {
1281             map<string, int>::iterator it = indexNameMap.find(seqName);
1282             if (it == indexNameMap.end()) {
1283                 //look for it in names of groups to see if the user accidently used the wrong file
1284                 if (util.inUsersGroups(seqName, groups)) {
1285                     m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1286                 }
1287                 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1288             }else {
1289                 temp = counts[it->second];
1290             }
1291         }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1292 
1293         return temp;
1294     }
1295     catch(exception& e) {
1296         m->errorOut(e, "CountTable", "getGroupCounts");
1297         exit(1);
1298     }
1299 }
1300 /************************************************************/
1301 //total number of sequences for the group
getGroupCount(string groupName)1302 int CountTable::getGroupCount(string groupName) {
1303     try {
1304         if (hasGroups) {
1305             map<string, int>::iterator it = indexGroupMap.find(groupName);
1306             if (it == indexGroupMap.end()) {
1307                 m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1308             }else {
1309                 return totalGroups[it->second];
1310             }
1311         }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->setControl_pressed(true); }
1312 
1313         return 0;
1314     }
1315 	catch(exception& e) {
1316 		m->errorOut(e, "CountTable", "getGroupCount");
1317 		exit(1);
1318 	}
1319 }
1320 /************************************************************/
1321 //total number of sequences for the seq for the group
getGroupCount(string seqName,string groupName)1322 int CountTable::getGroupCount(string seqName, string groupName) {
1323     try {
1324         if (hasGroups) {
1325             map<string, int>::iterator it = indexGroupMap.find(groupName);
1326             if (it == indexGroupMap.end()) {
1327                 m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1328             }else {
1329                 map<string, int>::iterator it2 = indexNameMap.find(seqName);
1330                 if (it2 == indexNameMap.end()) {
1331                     //look for it in names of groups to see if the user accidently used the wrong file
1332                     if (util.inUsersGroups(seqName, groups)) {
1333                         m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1334                     }
1335                     m->mothurOut("[ERROR]: seq " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1336                 }else {
1337                     return expandAbunds(it2->second)[it->second];
1338                 }
1339             }
1340         }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->setControl_pressed(true); }
1341 
1342         return 0;
1343     }
1344 	catch(exception& e) {
1345 		m->errorOut(e, "CountTable", "getGroupCount");
1346 		exit(1);
1347 	}
1348 }
1349 /************************************************************/
1350 //set the number of sequences for the seq for the group
setAbund(string seqName,string groupName,int num)1351 int CountTable::setAbund(string seqName, string groupName, int num) {
1352     try {
1353         if (hasGroups) {
1354             map<string, int>::iterator it = indexGroupMap.find(groupName);
1355             if (it == indexGroupMap.end()) {
1356                 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1357             }else {
1358                 map<string, int>::iterator it2 = indexNameMap.find(seqName);
1359                 if (it2 == indexNameMap.end()) {
1360                     //look for it in names of groups to see if the user accidently used the wrong file
1361                     if (util.inUsersGroups(seqName, groups)) {
1362                         m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1363                     }
1364                     m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1365                 }else {
1366                     int indexOfGroup = find(it2->second, it->second, false);
1367                     int oldCount = 0;
1368 
1369                     if (indexOfGroup == -1) { //create item for this group
1370                         countTableItem newItem(num, it->second);
1371                         counts[it2->second].push_back(newItem);
1372                         sortRow(it2->second);
1373                     }else { //update total for group
1374                         oldCount = counts[it2->second][indexOfGroup].abund;
1375                         counts[it2->second][indexOfGroup].abund = num;
1376                     }
1377 
1378                     totalGroups[it->second] += (num - oldCount);
1379                     total += (num - oldCount);
1380                     totals[it2->second] += (num - oldCount);
1381                 }
1382             }
1383         }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->setControl_pressed(true); }
1384 
1385         return 0;
1386     }
1387 	catch(exception& e) {
1388 		m->errorOut(e, "CountTable", "set");
1389 		exit(1);
1390 	}
1391 }
1392 /************************************************************/
1393 //add group
addGroup(string groupName)1394 int CountTable::addGroup(string groupName) {
1395     try {
1396         bool sanity = util.inUsersGroups(groupName, groups);
1397         if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->setControl_pressed(true);  return 0; }
1398 
1399         groups.push_back(groupName);
1400         if (!hasGroups) { counts.resize(uniques);  }
1401 
1402         totalGroups.push_back(0);
1403         indexGroupMap[groupName] = groups.size()-1;
1404         map<string, int> originalGroupMap = indexGroupMap;
1405 
1406         //important to play well with others, :)
1407         sort(groups.begin(), groups.end());
1408 
1409         //fix indexGroupMap && totalGroups
1410         vector<int> newTotals; newTotals.resize(groups.size(), 0);
1411         for (int i = 0; i < groups.size(); i++) {
1412             indexGroupMap[groups[i]] = i;
1413             //find original spot of group[i]
1414             int index = originalGroupMap[groups[i]];
1415             newTotals[i] = totalGroups[index];
1416         }
1417         totalGroups = newTotals;
1418 
1419         hasGroups = true;
1420 
1421         return 0;
1422     }
1423 	catch(exception& e) {
1424 		m->errorOut(e, "CountTable", "addGroup");
1425 		exit(1);
1426 	}
1427 }
1428 /************************************************************/
1429 //remove group
removeGroup(string groupName)1430 int CountTable::removeGroup(string groupName) {
1431     try {
1432         if (hasGroups) {
1433             //save for later in case removing a group means we need to remove a seq.
1434             map<int, string> reverse;
1435             map<string, int>::iterator it;
1436             for (it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
1437 
1438             it = indexGroupMap.find(groupName);
1439             if (it == indexGroupMap.end()) {
1440                 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1441             }else {
1442                 int indexOfGroupToRemove = it->second;
1443                 map<string, int> currentGroupIndex = indexGroupMap;
1444                 vector<string> newGroups;
1445                 for (int i = 0; i < groups.size(); i++) {
1446                     if (groups[i] != groupName) {
1447                         newGroups.push_back(groups[i]);
1448                         indexGroupMap[groups[i]] = newGroups.size()-1;
1449                     }
1450                 }
1451                 indexGroupMap.erase(groupName);
1452                 groups = newGroups;
1453                 totalGroups.erase(totalGroups.begin()+indexOfGroupToRemove);
1454 
1455                 int thisIndex = 0;
1456                 map<string, int> newIndexNameMap;
1457                 for (int i = 0; i < counts.size(); i++) {
1458 
1459                     if (m->getControl_pressed()) { break; }
1460 
1461                     int indexOfGroup = -1; bool found = false;
1462                     for (int j = 0; j < counts[i].size(); j++) {
1463                         if (counts[i][j].group >= indexOfGroupToRemove) { //found it or done looking
1464 
1465                             indexOfGroup = j;
1466                             if (counts[i][j].group == indexOfGroupToRemove) {   found = true; }
1467                             break;
1468                         }
1469                     }
1470 
1471                     if (found) { //you have an abundance for this group
1472                         int num = counts[i][indexOfGroup].abund;
1473                         counts[i].erase(counts[i].begin()+indexOfGroup);
1474                         totals[i] -= num;
1475                         total -= num;
1476 
1477                         if (totals[i] == 0) { //your sequences are only from the group we want to remove, then remove you.
1478                             counts.erase(counts.begin()+i);
1479                             totals.erase(totals.begin()+i);
1480                             uniques--;
1481                             i--;
1482                             if (i == -1) { i = 0; }
1483                             indexOfGroup = counts[i].size(); //don't adjust the the group indexes because we removed the read
1484                         }else { newIndexNameMap[reverse[thisIndex]] = i; }
1485                     }else { //you don't have this group, nothing to remove
1486 
1487                         if (indexOfGroup == -1) { indexOfGroup = counts[i].size(); }
1488                         newIndexNameMap[reverse[thisIndex]] = i;
1489                     }
1490 
1491                     for (int j = indexOfGroup; j < counts[i].size(); j++) { counts[i][j].group -= 1; }
1492 
1493                     thisIndex++;
1494                 }
1495                 indexNameMap = newIndexNameMap;
1496 
1497                 if (groups.size() == 0) { hasGroups = false; }
1498             }
1499         }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove group " + groupName + ".\n"); m->setControl_pressed(true); }
1500 
1501         return 0;
1502     }
1503     catch(exception& e) {
1504         m->errorOut(e, "CountTable", "removeGroup");
1505         exit(1);
1506     }
1507 }
1508 /***********************************************************************/
removeGroup(int minSize)1509 int CountTable::removeGroup(int minSize){
1510     try {
1511 
1512         if (hasGroups) {
1513             for (int i = 0; i < totalGroups.size(); i++) {
1514                 if (totalGroups[i] < minSize) { removeGroup(groups[i]); }
1515             }
1516         }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove groups.\n"); m->setControl_pressed(true); }
1517 
1518         return groups.size();
1519     }
1520     catch(exception& e) {
1521         m->errorOut(e, "SharedRAbundVector", "removeGroup - minSize");
1522         exit(1);
1523     }
1524 }
1525 /************************************************************/
1526 //vector of groups for the seq
getGroups(string seqName)1527 vector<string> CountTable::getGroups(string seqName) {
1528     try {
1529         vector<string> thisGroups;
1530         map<string, int>::iterator it = indexNameMap.find(seqName);
1531         if (it == indexNameMap.end()) {
1532             m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1533         }else {
1534             if (hasGroups) {
1535                 int index = it->second;
1536                 for (int i = 0; i < counts[index].size(); i++) {
1537                     thisGroups.push_back(groups[counts[index][i].group]);
1538                 }
1539             }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->setControl_pressed(true); }
1540         }
1541 
1542         return thisGroups;
1543     }
1544 	catch(exception& e) {
1545 		m->errorOut(e, "CountTable", "getGroups");
1546 		exit(1);
1547 	}
1548 }
1549 /************************************************************/
1550 //total number of seqs represented by seq
renameSeq(string oldSeqName,string newSeqName)1551 int CountTable::renameSeq(string oldSeqName, string newSeqName) {
1552     try {
1553 
1554         map<string, int>::iterator it = indexNameMap.find(oldSeqName);
1555         if (it == indexNameMap.end()) {
1556             if (hasGroupInfo()) {
1557                 //look for it in names of groups to see if the user accidently used the wrong file
1558                 if (util.inUsersGroups(oldSeqName, groups)) {
1559                     m->mothurOut("[WARNING]: Your group or design file contains a group named " + oldSeqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1560                 }
1561             }
1562             m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1563         }else {
1564             int index = it->second;
1565             indexNameMap.erase(it);
1566             indexNameMap[newSeqName] = index;
1567         }
1568 
1569         return 0;
1570     }
1571 	catch(exception& e) {
1572 		m->errorOut(e, "CountTable", "renameSeq");
1573 		exit(1);
1574 	}
1575 }
1576 
1577 /************************************************************/
1578 //total number of seqs represented by seq
getNumSeqs(string seqName)1579 int CountTable::getNumSeqs(string seqName) {
1580     try {
1581 
1582         map<string, int>::iterator it = indexNameMap.find(seqName);
1583         if (it == indexNameMap.end()) {
1584             if (hasGroupInfo()) {
1585                 //look for it in names of groups to see if the user accidently used the wrong file
1586                 if (util.inUsersGroups(seqName, groups)) {
1587                     m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1588                 }
1589             }
1590             m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1591         }else {
1592             return totals[it->second];
1593         }
1594 
1595         return 0;
1596     }
1597 	catch(exception& e) {
1598 		m->errorOut(e, "CountTable", "getNumSeqs");
1599 		exit(1);
1600 	}
1601 }
1602 /************************************************************/
1603 //set total number of seqs represented by seq
setNumSeqs(string seqName,int abund)1604 int CountTable::setNumSeqs(string seqName, int abund) {
1605     try {
1606 
1607         map<string, int>::iterator it = indexNameMap.find(seqName);
1608         if (it == indexNameMap.end()) {
1609             m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); return -1;
1610         }else {
1611             int diff = totals[it->second] - abund;
1612             totals[it->second] = abund;
1613             total-=diff;
1614         }
1615 
1616         return 0;
1617     }
1618     catch(exception& e) {
1619         m->errorOut(e, "CountTable", "setNumSeqs");
1620         exit(1);
1621     }
1622 }
1623 /************************************************************/
zeroOutSeq(string seqName)1624 int CountTable::zeroOutSeq(string seqName) {
1625     try {
1626 
1627         map<string, int>::iterator it = indexNameMap.find(seqName);
1628         if (it == indexNameMap.end()) {
1629             m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); return -1;
1630         }else {
1631             int abund = totals[it->second];
1632             totals[it->second] = 0;
1633             total-=abund;
1634 
1635             if (hasGroups) {
1636                 int seqIndexIntoCounts = it->second;
1637                 for (int i = 0; i < counts[seqIndexIntoCounts].size(); i++) {
1638                     totalGroups[counts[seqIndexIntoCounts][i].group] -= counts[seqIndexIntoCounts][i].abund;
1639                 }
1640                 counts[seqIndexIntoCounts].clear();
1641             }
1642         }
1643 
1644         return 0;
1645     }
1646     catch(exception& e) {
1647         m->errorOut(e, "CountTable", "zeroOutSeq");
1648         exit(1);
1649     }
1650 }
1651 /************************************************************/
1652 //returns unique index for sequence like get in NameAssignment
get(string seqName)1653 int CountTable::get(string seqName) {
1654     try {
1655 
1656         map<string, int>::iterator it = indexNameMap.find(seqName);
1657         if (it == indexNameMap.end()) {
1658             if (hasGroupInfo()) {
1659                 //look for it in names of groups to see if the user accidently used the wrong file
1660                 if (util.inUsersGroups(seqName, groups)) {
1661                     m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1662                 }
1663             }
1664             m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1665         }else { return it->second; }
1666 
1667         return -1;
1668     }
1669 	catch(exception& e) {
1670 		m->errorOut(e, "CountTable", "get");
1671 		exit(1);
1672 	}
1673 }
1674 /************************************************************/
1675 //add seqeunce without group info
push_back(string seqName)1676 int CountTable::push_back(string seqName) {
1677     try {
1678         map<string, int>::iterator it = indexNameMap.find(seqName);
1679         if (it == indexNameMap.end()) {
1680             if (hasGroups) {  m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + ".\n");  m->setControl_pressed(true);  }
1681             indexNameMap[seqName] = uniques;
1682             totals.push_back(1);
1683             total++;
1684             uniques++;
1685         }else {
1686             m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n");  m->setControl_pressed(true);
1687         }
1688 
1689         return 1;
1690     }
1691 	catch(exception& e) {
1692 		m->errorOut(e, "CountTable", "push_back");
1693 		exit(1);
1694 	}
1695 }
1696 /************************************************************/
1697 //
inTable(string seqName)1698 bool CountTable::inTable(string seqName) {
1699     try {
1700         map<string, int>::iterator it = indexNameMap.find(seqName);
1701         if (it != indexNameMap.end()) { return true; }
1702         return false;
1703 
1704     }
1705     catch(exception& e) {
1706         m->errorOut(e, "CountTable", "inTable");
1707         exit(1);
1708     }
1709 }
1710 
1711 /************************************************************/
1712 //remove sequence
remove(string seqName)1713 int CountTable::remove(string seqName) {
1714     try {
1715         map<string, int>::iterator it = indexNameMap.find(seqName);
1716         if (it != indexNameMap.end()) {
1717             int seqIndexIntoCounts = it->second;
1718             uniques--;
1719             if (hasGroups){ //remove this sequences counts from group totals
1720                 for (int i = 0; i < counts[seqIndexIntoCounts].size(); i++) {
1721                     totalGroups[counts[seqIndexIntoCounts][i].group] -= counts[seqIndexIntoCounts][i].abund;
1722                 }
1723             }
1724 
1725             //save for later in case removing a group means we need to remove a seq.
1726             map<int, string> reverse;
1727             for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 !=indexNameMap.end(); it2++) { reverse[it2->second] = it2->first;  }
1728 
1729             int newIndex = 0;
1730             map<string, int> newIndexNameMap;
1731             for (int i = 0; i < counts.size(); i++) {
1732                 if (i == seqIndexIntoCounts) { }//you are the seq we are trying to remove
1733                 else {   newIndexNameMap[reverse[i]] = newIndex; newIndex++;  }
1734             }
1735             indexNameMap = newIndexNameMap;
1736 
1737             counts.erase(counts.begin()+seqIndexIntoCounts);
1738             int thisTotal = totals[seqIndexIntoCounts];
1739             totals.erase(totals.begin()+seqIndexIntoCounts);
1740             total -= thisTotal;
1741 
1742             //remove group if all reads are removed
1743             for (int i = 0; i < totalGroups.size(); i++) {
1744                 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
1745             }
1746 
1747         }else {
1748             if (hasGroupInfo()) {
1749                 //look for it in names of groups to see if the user accidently used the wrong file
1750                 if (util.inUsersGroups(seqName, groups)) {
1751                     m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1752                 }
1753             }
1754             m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove.\n");  m->setControl_pressed(true);
1755         }
1756 
1757         return 0;
1758     }
1759 	catch(exception& e) {
1760 		m->errorOut(e, "CountTable", "remove");
1761 		exit(1);
1762 	}
1763 }
1764 /************************************************************/
1765 //add seqeunce without group info
push_back(string seqName,int thisTotal)1766 int CountTable::push_back(string seqName, int thisTotal) {
1767     try {
1768         map<string, int>::iterator it = indexNameMap.find(seqName);
1769         if (it == indexNameMap.end()) {
1770             if (hasGroups) {  m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + ".\n"); m->setControl_pressed(true);  }
1771             indexNameMap[seqName] = uniques;
1772             totals.push_back(thisTotal);
1773             total+=thisTotal;
1774             uniques++;
1775         }else {
1776             m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n");  m->setControl_pressed(true);
1777         }
1778 
1779         return thisTotal;
1780     }
1781 	catch(exception& e) {
1782 		m->errorOut(e, "CountTable", "push_back");
1783 		exit(1);
1784 	}
1785 }
1786 /************************************************************/
1787 //add sequence with group info
push_back(string seqName,vector<int> groupCounts,bool ignoreDup=false)1788 int CountTable::push_back(string seqName, vector<int> groupCounts, bool ignoreDup=false) {
1789     try {
1790         int thisTotal = 0;
1791         map<string, int>::iterator it = indexNameMap.find(seqName);
1792         if (it == indexNameMap.end()) {
1793             if ((hasGroups) && (groupCounts.size() != getNumGroups())) {  m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct.\n");  m->setControl_pressed(true);  }
1794 
1795             for (int i = 0; i < getNumGroups(); i++) {   totalGroups[i] += groupCounts[i];  thisTotal += groupCounts[i]; }
1796             if (hasGroups) {  counts.push_back(compressAbunds(groupCounts));  }
1797             indexNameMap[seqName] = uniques;
1798             totals.push_back(thisTotal);
1799             total+= thisTotal;
1800             uniques++;
1801         }else {
1802             if (ignoreDup) {
1803                 m->mothurOut("[WARNING]: Your count table contains more than 1 sequence named " + seqName + ".  Mothur requires sequence names to be unique. I will only add it once.\n");
1804             }else {  m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n");  m->setControl_pressed(true);  }
1805         }
1806 
1807         return thisTotal;
1808     }
1809     catch(exception& e) {
1810         m->errorOut(e, "CountTable", "push_back");
1811         exit(1);
1812     }
1813 }
1814 
1815 /************************************************************/
1816 //add sequence with group info
push_back(string seqName,vector<int> groupCounts)1817 int CountTable::push_back(string seqName, vector<int> groupCounts) {
1818     try {
1819         int thisTotal = 0;
1820         map<string, int>::iterator it = indexNameMap.find(seqName);
1821         if (it == indexNameMap.end()) {
1822             if ((hasGroups) && (groupCounts.size() != getNumGroups())) {  m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct.\n");  m->setControl_pressed(true);  }
1823 
1824             for (int i = 0; i < getNumGroups(); i++) {   totalGroups[i] += groupCounts[i];  thisTotal += groupCounts[i]; }
1825             if (hasGroups) {  counts.push_back(compressAbunds(groupCounts));  }
1826             indexNameMap[seqName] = uniques;
1827             totals.push_back(thisTotal);
1828             total+= thisTotal;
1829             uniques++;
1830         }else {
1831             m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n");  m->setControl_pressed(true);
1832         }
1833 
1834         return thisTotal;
1835     }
1836 	catch(exception& e) {
1837 		m->errorOut(e, "CountTable", "push_back");
1838 		exit(1);
1839 	}
1840 }
1841 /************************************************************/
1842 //returns size of smallest group. If no groups, returns total num seqs (includes non uniques)
getNumSeqsSmallestGroup()1843 int CountTable::getNumSeqsSmallestGroup() {
1844     try {
1845         int smallestGroupSize = MOTHURMAX;
1846 
1847         if (hasGroups) {
1848             for (int i = 0; i < totalGroups.size(); i++) {
1849                 if (totalGroups[i] < smallestGroupSize) { smallestGroupSize = totalGroups[i]; }
1850             }
1851         }
1852         else { return total; }
1853 
1854         return smallestGroupSize;
1855     }
1856     catch(exception& e) {
1857         m->errorOut(e, "CountTable", "getNumSeqsSmallestGroup");
1858         exit(1);
1859     }
1860 }
1861 
1862 /************************************************************/
1863 //create ListVector from uniques
getListVector()1864 ListVector CountTable::getListVector() {
1865     try {
1866         ListVector list(indexNameMap.size(), "ASV");
1867         for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
1868             if (m->getControl_pressed()) { break; }
1869             list.set(it->second, it->first);
1870         }
1871         return list;
1872     }
1873 	catch(exception& e) {
1874 		m->errorOut(e, "CountTable", "getListVector");
1875 		exit(1);
1876 	}
1877 }
1878 
1879 /************************************************************/
1880 //returns the names of all unique sequences in file
getNamesOfSeqs()1881 vector<string> CountTable::getNamesOfSeqs() {
1882     try {
1883         vector<string> names;
1884         for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
1885             names.push_back(it->first);
1886         }
1887 
1888         return names;
1889     }
1890 	catch(exception& e) {
1891 		m->errorOut(e, "CountTable", "getNamesOfSeqs");
1892 		exit(1);
1893 	}
1894 }
1895 /************************************************************/
1896 //returns the names of all unique sequences in file mapped to their seqCounts
getNameMap()1897 map<string, int> CountTable::getNameMap() {
1898     try {
1899         map<string, int> names;
1900         for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
1901             names[it->first] = totals[it->second];
1902         }
1903 
1904         return names;
1905     }
1906 	catch(exception& e) {
1907 		m->errorOut(e, "CountTable", "getNameMap");
1908 		exit(1);
1909 	}
1910 }
1911 /************************************************************/
1912 //returns the names of all unique sequences in file mapped to their seqCounts
getNameMap(string group)1913 map<string, int> CountTable::getNameMap(string group) {
1914     try {
1915         map<string, int> names;
1916 
1917         if (hasGroups) {
1918             map<string, int>::iterator it = indexGroupMap.find(group);
1919             if (it == indexGroupMap.end()) {
1920                 m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1921             }else {
1922                 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
1923                     int abund = getAbund(it2->second, it->second);
1924                     if (abund != 0) {  names[it2->first] = abund; }
1925                 }
1926             }
1927         }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->setControl_pressed(true); }
1928 
1929         return names;
1930     }
1931     catch(exception& e) {
1932         m->errorOut(e, "CountTable", "getNameMap");
1933         exit(1);
1934     }
1935 }
1936 /************************************************************/
1937 //returns the names of all unique sequences in file
getNamesOfSeqs(string group)1938 vector<string> CountTable::getNamesOfSeqs(string group) {
1939     try {
1940         vector<string> names;
1941         if (hasGroups) {
1942             map<string, int>::iterator it = indexGroupMap.find(group);
1943             if (it == indexGroupMap.end()) {
1944                 m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1945             }else {
1946                 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
1947                     if (getAbund(it2->second, it->second) != 0) {  names.push_back(it2->first); }
1948                 }
1949             }
1950         }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->setControl_pressed(true); }
1951 
1952         return names;
1953     }
1954 	catch(exception& e) {
1955 		m->errorOut(e, "CountTable", "getNamesOfSeqs");
1956 		exit(1);
1957 	}
1958 }
1959 /************************************************************/
1960 //returns the names of all unique sequences in file
getNamesOfSeqs(vector<string> chosenGroups)1961 vector<string> CountTable::getNamesOfSeqs(vector<string> chosenGroups) {
1962     try {
1963         vector<string> names;
1964         if (hasGroups) {
1965             set<string> uniqueNames;
1966             for (int i = 0; i < chosenGroups.size(); i++) {
1967                 vector<string> namesFromThisGroup = getNamesOfSeqs(chosenGroups[i]);
1968                 for (int j = 0; j < namesFromThisGroup.size(); j++) { uniqueNames.insert(namesFromThisGroup[j]);  }
1969             }
1970 
1971             //only adds names once. seqs are likely present in more than one group, but we only want to enter them once
1972             for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { names.push_back(*it); }
1973 
1974         }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->setControl_pressed(true); }
1975 
1976         return names;
1977     }
1978     catch(exception& e) {
1979         m->errorOut(e, "CountTable", "getNamesOfSeqs");
1980         exit(1);
1981     }
1982 }
1983 
1984 /************************************************************/
1985 //merges counts of seq1 and seq2, saving in seq1
mergeCounts(string seq1,string seq2)1986 int CountTable::mergeCounts(string seq1, string seq2) {
1987     try {
1988         map<string, int>::iterator it = indexNameMap.find(seq1);
1989         if (it == indexNameMap.end()) {
1990             if (hasGroupInfo()) {
1991                 //look for it in names of groups to see if the user accidently used the wrong file
1992                 if (util.inUsersGroups(seq1, groups)) {
1993                     m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq1 + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1994                 }
1995             }
1996             m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1997         }else {
1998             map<string, int>::iterator it2 = indexNameMap.find(seq2);
1999             if (it2 == indexNameMap.end()) {
2000                 if (hasGroupInfo()) {
2001                     //look for it in names of groups to see if the user accidently used the wrong file
2002                     if (util.inUsersGroups(seq2, groups)) {
2003                         m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq2 + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
2004                     }
2005                 }
2006                 m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
2007             }else {
2008                 if (hasGroupInfo()) { //if no group data then counts are empty
2009                     //merge data
2010                     vector<int> countsSeq1 = expandAbunds(it->second);
2011                     vector<int> countsSeq2 = expandAbunds(it2->second);
2012 
2013                     for (int i = 0; i < groups.size(); i++) { countsSeq1[i] += countsSeq2[i]; }
2014 
2015                     counts[it->second] = compressAbunds(countsSeq1);
2016                 }
2017                 totals[it->second] += totals[it2->second];
2018                 uniques--;
2019                 indexNameMap.erase(it2);
2020             }
2021         }
2022         return 0;
2023     }
2024 	catch(exception& e) {
2025 		m->errorOut(e, "CountTable", "getNamesOfSeqs");
2026 		exit(1);
2027 	}
2028 }
2029 /************************************************************/
copy(CountTable * ct)2030 int CountTable::copy(CountTable* ct) {
2031     try {
2032         vector<string> thisGroups = ct->getNamesOfGroups();
2033         for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
2034         vector<string> names = ct->getNamesOfSeqs();
2035 
2036         for (int i = 0; i < names.size(); i++) {
2037             vector<int> thisCounts = ct->getGroupCounts(names[i]);
2038             push_back(names[i], thisCounts, false);
2039         }
2040 
2041         isCompressed = ct->isTableCompressed();
2042 
2043         return 0;
2044     }
2045 	catch(exception& e) {
2046 		m->errorOut(e, "CountTable", "copy");
2047 		exit(1);
2048 	}
2049 }
2050 /***********************************************************************/
2051 
sortCountTable()2052 int CountTable::sortCountTable(){
2053     try {
2054 
2055         //sorts each rows abunds by group
2056         //counts[i] = (1,4),(1,2),(3,7) -> (1,2),(1,4),(3,7)
2057         for (int i = 0; i < counts.size(); i++) {  sort(counts[i].begin(), counts[i].end(), compareGroups); }
2058 
2059         return 0;
2060     }
2061     catch(exception& e) {
2062         m->errorOut(e, "CountTable", "sortCountTable");
2063         exit(1);
2064     }
2065 }
2066 /***********************************************************************/
2067 
sortRow(int index)2068 int CountTable::sortRow(int index){
2069     try {
2070 
2071         //saves time in getSmallestCell, by making it so you dont search the repeats
2072         sort(counts[index].begin(), counts[index].end(), compareGroups);
2073 
2074         return 0;
2075     }
2076     catch(exception& e) {
2077         m->errorOut(e, "CountTable", "sortRow");
2078         exit(1);
2079     }
2080 }
2081 
2082 /************************************************************/
2083