1 //
2 // counttable.cpp
3 // Mothur
4 //
5 // Created by Sarah Westcott on 6/26/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "counttable.h"
10
11 /************************************************************/
12 //used by tree commands
createTable(set<string> & n,map<string,string> & g,set<string> & gs)13 int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
14 try {
15 hasGroups = false;
16 int numGroups = 0;
17 groups.clear();
18 totalGroups.clear();
19 indexGroupMap.clear();
20 indexNameMap.clear();
21 counts.clear();
22 for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it); hasGroups = true; }
23 numGroups = groups.size();
24 totalGroups.resize(numGroups, 0);
25
26 //sort groups to keep consistent with how we store the groups in groupmap
27 sort(groups.begin(), groups.end());
28 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
29
30 uniques = 0;
31 total = 0;
32 bool error = false;
33 //n contains treenames
34 for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
35
36 if (m->getControl_pressed()) { break; }
37
38 string seqName = *it;
39
40 vector<countTableItem> groupCounts;
41 map<string, string>::iterator itGroup = g.find(seqName);
42
43 if (itGroup != g.end()) {
44 groupCounts.push_back(countTableItem(1, indexGroupMap[itGroup->second]));
45 totalGroups[indexGroupMap[itGroup->second]]++;
46 }else {
47 //look for it in names of groups to see if the user accidently used the wrong file
48 if (util.inUsersGroups(seqName, groups)) {
49 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
50 }
51 m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct.\n");
52 }
53
54 map<string, int>::iterator it2 = indexNameMap.find(seqName);
55 if (it2 == indexNameMap.end()) {
56 if (hasGroups) { counts.push_back(groupCounts); }
57 indexNameMap[seqName] = uniques;
58 totals.push_back(1);
59 total++;
60 uniques++;
61 }else {
62 error = true;
63 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n");
64 }
65
66 }
67 if (error) { m->setControl_pressed(true); }
68 else { //check for zero groups
69 if (hasGroups) {
70 for (int i = 0; i < totalGroups.size(); i++) {
71 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
72 }
73 }
74 }
75 return 0;
76 }
77 catch(exception& e) {
78 m->errorOut(e, "CountTable", "createTable");
79 exit(1);
80 }
81 }
82 /************************************************************/
testGroups(string file)83 bool CountTable::testGroups(string file) {
84 try {
85 vector<string> nothing;
86 return testGroups(file, nothing);
87 }
88 catch(exception& e) {
89 m->errorOut(e, "CountTable", "testGroups");
90 exit(1);
91 }
92 }
93
94 /************************************************************/
testGroups(string file,vector<string> & groups)95 bool CountTable::testGroups(string file, vector<string>& groups) {
96 try {
97 m = MothurOut::getInstance(); hasGroups = false; total = 0;
98 ifstream in;
99 util.openInputFile(file, in);
100
101 string headers = util.getline(in); util.gobble(in);
102
103 if (headers[0] == '#') { //is this a count file in compressed form
104 isCompressed = true;
105
106 //read headers
107 headers = util.getline(in); util.gobble(in); //gets compressed group name map line
108 headers = util.getline(in); util.gobble(in);
109 }
110
111 vector<string> columnHeaders = util.splitWhiteSpace(headers);
112
113 if (columnHeaders.size() > 2) {
114 hasGroups = true;
115
116 for (int i = 2; i < columnHeaders.size(); i++) {
117 groups.push_back(columnHeaders[i]);
118 }
119 //sort groups to keep consistent with how we store the groups in groupmap
120 sort(groups.begin(), groups.end());
121 }
122
123 return hasGroups;
124 }
125 catch(exception& e) {
126 m->errorOut(e, "CountTable", "testGroups");
127 exit(1);
128 }
129 }
130
131 /************************************************************/
132
setNamesOfGroups(vector<string> mygroups)133 bool CountTable::setNamesOfGroups(vector<string> mygroups) {
134 try {
135 //remove groups from table not in new groups we are setting
136 for (int i = 0; i < groups.size();) {
137 if (util.inUsersGroups(groups[i], mygroups)) { ++i; }
138 else { removeGroup(groups[i]); }
139 }
140
141 //add any new groups in new groups list to table
142 for (int i = 0; i < mygroups.size(); i++) {
143 if (util.inUsersGroups(mygroups[i], groups)) {}
144 else { addGroup(mygroups[i]); }
145 }
146
147 //false if error
148 return (!m->getControl_pressed());
149 }
150 catch(exception& e) {
151 m->errorOut(e, "CountTable", "setNamesOfGroups");
152 exit(1);
153 }
154 }
155
156 /************************************************************/
157
createTable(string namefile,string groupfile,vector<string> selectedGroups,bool createGroup)158 int CountTable::createTable(string namefile, string groupfile, vector<string> selectedGroups, bool createGroup) {
159 try {
160
161 GroupMap* groupMap;
162 int numGroups = 0;
163 groups.clear();
164 totalGroups.clear();
165 indexGroupMap.clear();
166 indexNameMap.clear();
167 counts.clear();
168 map<int, string> originalGroupIndexes;
169 uniques = 0;
170 total = 0;
171 bool error = false;
172 bool pickedGroups = false;
173 if (selectedGroups.size() != 0) { pickedGroups = true; }
174
175 if (groupfile != "") {
176 hasGroups = true;
177 groupMap = new GroupMap(groupfile); groupMap->readMap(selectedGroups);
178 numGroups = groupMap->getNumGroups();
179 groups = groupMap->getNamesOfGroups();
180 totalGroups.resize(numGroups, 0);
181 }else if(createGroup) {
182 hasGroups = true;
183 numGroups = 1;
184 groups.push_back("Group1");
185 totalGroups.resize(numGroups, 0);
186 }
187
188 //sort groups to keep consistent with how we store the groups in groupmap
189 sort(groups.begin(), groups.end());
190 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
191
192 if ((namefile == "") && (groupfile == "")) { m->mothurOut("[ERROR]: No name or group file given. You must provide a name or group file to create a count file, please correct.\n"); m->setControl_pressed(true); return 0; }
193
194 else if (namefile != "") {
195
196 ifstream in; util.openInputFile(namefile, in);
197
198 while (!in.eof()) {
199 if (m->getControl_pressed()) { break; }
200
201 string firstCol, secondCol;
202 in >> firstCol; util.gobble(in); in >> secondCol; util.gobble(in);
203
204 util.checkName(firstCol);
205 util.checkName(secondCol);
206
207 vector<string> names;
208 util.splitAtChar(secondCol, names, ',');
209
210 map<string, int> groupCounts;
211 for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; } //initialize groupCounts
212
213 int thisTotal = 0;
214 if (groupfile != "") {
215
216 //get counts for each of the users groups
217 for (int i = 0; i < names.size(); i++) {
218 string group = groupMap->getGroup(names[i]);
219
220 if (group == "not found") {
221 if (!pickedGroups) { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct.\n"); error=true; }
222 //else - ignore because we assume this read is from a group we are not interested in
223 }else { //this is a read from a group we want to save
224 map<string, int>::iterator it = groupCounts.find(group);
225
226 //if not found, then this sequence is not from a group we care about
227 if (it != groupCounts.end()) { it->second++; }
228 thisTotal++;
229 }
230 }
231 }else if (createGroup) {
232 thisTotal = names.size();
233 groupCounts["Group1"] = thisTotal;
234 }else { thisTotal = names.size(); }
235
236 //if group info, then read it
237 vector<countTableItem> thisGroupsCount;
238 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
239 int groupIndex = indexGroupMap[it->first];
240 int abund = it->second;
241 if (abund != 0) {
242 countTableItem thisAbund(it->second, groupIndex);
243 thisGroupsCount.push_back(thisAbund);
244 totalGroups[groupIndex] += abund;
245 }
246
247 }
248
249 map<string, int>::iterator it = indexNameMap.find(firstCol);
250 if (it == indexNameMap.end()) {
251
252 if (hasGroups) { counts.push_back(thisGroupsCount); }
253 indexNameMap[firstCol] = uniques;
254 totals.push_back(thisTotal);
255 total += thisTotal;
256 uniques++;
257
258 }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct.\n"); }
259 }
260 in.close();
261
262 }else if ((namefile == "") && (groupfile != "")) { //create count file from group only
263
264 vector<string> names = groupMap->getNamesSeqs(); //only contains names from selectedGroups or all groups if selectedGroups is empty
265
266 for (int i = 0; i < names.size(); i++) {
267 if (m->getControl_pressed()) { break; }
268
269 vector<countTableItem> abunds;
270 string group = groupMap->getGroup(names[i]);
271 int groupIndex = indexGroupMap[group];
272 totalGroups[groupIndex]++;
273 countTableItem thisAbund(1, groupIndex);
274 abunds.push_back(thisAbund);
275
276 map<string, int>::iterator it = indexNameMap.find(names[i]);
277 if (it == indexNameMap.end()) {
278
279 counts.push_back(abunds);
280 indexNameMap[names[i]] = uniques;
281 totals.push_back(1);
282 total++;
283 uniques++;
284
285 }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + names[i] + ", sequence names must be unique. Please correct.\n"); }
286 }
287 }
288
289 if (error) { m->setControl_pressed(true); }
290 else { //check for zero groups
291 if (hasGroups) {
292 for (int i = 0; i < totalGroups.size(); i++) {
293 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
294 }
295 }
296 }
297 if (groupfile != "") { delete groupMap; }
298
299 return total;
300 }
301 catch(exception& e) {
302 m->errorOut(e, "CountTable", "createTable");
303 exit(1);
304 }
305 }
306 /************************************************************/
readTable(string file,string format)307 int CountTable::readTable(string file, string format) {
308 try {
309 if (format == "fasta") {
310 filename = file;
311 ifstream in;
312 util.openInputFile(filename, in);
313
314 hasGroups = false;
315 groups.clear();
316 totalGroups.clear();
317 indexGroupMap.clear();
318 indexNameMap.clear();
319 counts.clear();
320 bool error = false;
321 uniques = 0;
322 total = 0;
323 while (!in.eof()) {
324
325 if (m->getControl_pressed()) { break; }
326
327 Sequence seq(in); util.gobble(in);
328 string name = seq.getName();
329 if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(1) + "\n"); }
330
331 map<string, int>::iterator it = indexNameMap.find(name);
332 if (it == indexNameMap.end()) {
333 indexNameMap[name] = uniques;
334 totals.push_back(1);
335 total ++;
336 uniques++;
337 }else {
338 error = true;
339 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n");
340 }
341 }
342 in.close();
343
344 if (error) { m->setControl_pressed(true); }
345 }else { m->mothurOut("[ERROR]: Unsupported format: " + format + ", please correct.\n"); m->setControl_pressed(true); }
346
347 return total;
348 }
349 catch(exception& e) {
350 m->errorOut(e, "CountTable", "readTable");
351 exit(1);
352 }
353 }
354 /************************************************************/
readTable(string file,bool readGroups,bool mothurRunning)355 int CountTable::readTable(string file, bool readGroups, bool mothurRunning) {
356 try {
357
358 readTable(file, readGroups, mothurRunning, nullVector);
359
360 return total;
361 }
362 catch(exception& e) {
363 m->errorOut(e, "CountTable", "readTable");
364 exit(1);
365 }
366 }
367 /************************************************************/
readTable(ifstream & in,bool readGroups,bool mothurRunning)368 int CountTable::readTable(ifstream& in, bool readGroups, bool mothurRunning) {
369 try {
370 readTable(in, readGroups, mothurRunning, nullVector);
371 return total;
372 }
373 catch(exception& e) {
374 m->errorOut(e, "CountTable", "readTable");
375 exit(1);
376 }
377 }
378 /************************************************************/
isCountTable(string file)379 bool CountTable::isCountTable(string file) {
380 try {
381
382 filename = file;
383 ifstream in;
384 util.openInputFile(filename, in);
385
386 string headers = util.getline(in); util.gobble(in);
387
388 if (headers[0] == '#') { //is this a count file in compressed form
389 isCompressed = true;
390
391 //read headers
392 headers = util.getline(in); util.gobble(in); //gets compressed group name map line
393 headers = util.getline(in); util.gobble(in);
394 }
395 vector<string> columnHeaders = util.splitWhiteSpace(headers);
396 in.close();
397
398 bool isCount = true;
399 if (columnHeaders.size() >= 2) {
400 vector<string> defaultHeaders = getHardCodedHeaders();
401 if (defaultHeaders.size() >= 2) {
402 if ((columnHeaders[0] != defaultHeaders[0]) && (columnHeaders[0] != "OTU_Label")) { isCount = false; }
403 if (columnHeaders[1] != defaultHeaders[1]) { isCount = false; }
404 }else { isCount = false; }
405 }else { isCount = false; }
406
407 return isCount;
408
409 }
410 catch(exception& e) {
411 m->errorOut(e, "CountTable", "isCountTable");
412 exit(1);
413 }
414 }
415 /************************************************************/
readTable(string file,bool readGroups,bool mothurRunning,vector<string> selectedGroups)416 int CountTable::readTable(string file, bool readGroups, bool mothurRunning, vector<string> selectedGroups) {
417 try {
418 filename = file;
419 ifstream in;
420 util.openInputFile(filename, in);
421
422 readTable(in, readGroups, mothurRunning, selectedGroups);
423
424 in.close();
425
426 return 0;
427 }
428 catch(exception& e) {
429 m->errorOut(e, "CountTable", "readTable");
430 exit(1);
431 }
432 }
433 /************************************************************/
readTable(ifstream & in,bool readGroups,bool mothurRunning,vector<string> selectedGroups)434 int CountTable::readTable(ifstream& in, bool readGroups, bool mothurRunning, vector<string> selectedGroups) {
435 try {
436 if (!readGroups) { selectedGroups.clear(); }
437
438 string headers = util.getline(in); util.gobble(in);
439
440 map<string, int> headerIndex2Group;
441 //#1,F003D000 2,F003D002 3,F003D004 4,F003D006 5,F003D008 6,F003D142 7,F003D144 8,F003D146 9,F003D148 10,F003D150
442 if (headers[0] == '#') { //is this a count file in compressed form
443 isCompressed = true;
444
445 //read headers
446 headers = util.getline(in); util.gobble(in); //gets compressed group name map line
447 headers = headers.substr(1);
448
449 vector<string> groupNameHeaders = util.splitWhiteSpace(headers);
450
451 for (int i = 0; i < groupNameHeaders.size(); i++) {
452 string groupIndex = ""; string groupName = groupNameHeaders[i];
453 util.splitAtComma(groupIndex, groupName);
454 int a; util.mothurConvert(groupIndex, a);
455 headerIndex2Group[groupName] = a-1;
456 }
457
458 headers = util.getline(in); util.gobble(in);
459 }
460
461 vector<string> columnHeaders = util.splitWhiteSpace(headers);
462
463 int numGroupsInFile = 0;
464 groups.clear();
465 totalGroups.clear();
466 indexGroupMap.clear();
467 indexNameMap.clear();
468 counts.clear();
469 map<int, string> originalGroupIndexes;
470 if ((columnHeaders.size() > 2) && readGroups) { hasGroups = true; numGroupsInFile = columnHeaders.size() - 2; }
471
472 set<string> setOfSelectedGroups;
473 if (readGroups) {
474 for (int i = 2; i < columnHeaders.size(); i++) {
475 bool saveGroup = true;
476 if (selectedGroups.size() != 0) {
477 if (!(util.inUsersGroups(columnHeaders[i], selectedGroups))) { saveGroup = false; }
478 } //is this group in selected groups
479
480 if (saveGroup) {
481 groups.push_back(columnHeaders[i]);
482 if (isCompressed) {
483 map<string, int>::iterator it = headerIndex2Group.find(columnHeaders[i]);
484 if (it != headerIndex2Group.end()) {
485 originalGroupIndexes[it->second] = columnHeaders[i];
486 }
487 }
488 else { originalGroupIndexes[i-2] = columnHeaders[i]; }
489 totalGroups.push_back(0);
490 setOfSelectedGroups.insert(columnHeaders[i]);
491 }
492 }
493 }
494
495 //sort groups to keep consistent with how we store the groups in groupmap
496 sort(groups.begin(), groups.end());
497 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
498 int numGroupsSelected = groups.size();
499
500 bool error = false;
501 string name;
502 int thisTotal = 0;
503 uniques = 0;
504 total = 0;
505 while (!in.eof()) {
506
507 if (m->getControl_pressed()) { break; }
508
509 in >> name; util.gobble(in); in >> thisTotal; util.gobble(in);
510 if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
511
512 if ((thisTotal == 0) && !mothurRunning) { error=true; m->mothurOut("[ERROR]: Your count table contains a sequence named " + name + " with a total=0. Please correct.\n");
513 }
514
515 //if group info, then read it
516 vector<int> groupCounts; groupCounts.resize(numGroupsSelected, 0);
517 if (columnHeaders.size() > 2) { //file contains groups
518 if (readGroups) { //user wants to save them
519 if (selectedGroups.size() != 0) {
520 //read this seqs groups abundances
521 thisTotal = 0;
522 if (isCompressed) {
523 string groupInfo = util.getline(in); util.gobble(in);
524 vector<string> groupNodes = util.splitWhiteSpace(groupInfo);
525
526 vector<countTableItem> abunds;
527 for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count
528 string abund = groupNodes[i]; string thisgroup = "";
529 util.splitAtComma(thisgroup, abund);
530 int a; util.mothurConvert(abund, a);
531 int g; util.mothurConvert(thisgroup, g); g--;
532 string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted
533
534 if (setOfSelectedGroups.count(groupName) != 0) { //we selected this group
535 int thisIndex = indexGroupMap[groupName];
536 countTableItem item(a, thisIndex);
537 abunds.push_back(item);
538 totalGroups[thisIndex] += a;
539 thisTotal += a;
540 }
541 }
542
543 groupCounts = expandAbunds(abunds);
544 }else {
545 for (int i = 0; i < numGroupsInFile; i++) {
546 int thisGroupAbund = 0;
547 in >> thisGroupAbund; util.gobble(in);
548 string groupName = originalGroupIndexes[i]; //order of groups in file may not be sorted
549
550 if (setOfSelectedGroups.count(groupName) != 0) { //we selected this group
551 int thisIndex = indexGroupMap[groupName];
552 groupCounts[thisIndex] = thisGroupAbund;
553 totalGroups[thisIndex] += thisGroupAbund;
554 thisTotal += thisGroupAbund;
555 }
556 }
557 }
558 }else {
559
560 if (isCompressed) {
561 string groupInfo = util.getline(in); util.gobble(in);
562 vector<string> groupNodes = util.splitWhiteSpace(groupInfo);
563
564 vector<countTableItem> abunds;
565 for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count
566 string abund = groupNodes[i]; string thisgroup = "";
567 util.splitAtComma(thisgroup, abund);
568 int a; util.mothurConvert(abund, a);
569 int g; util.mothurConvert(thisgroup, g); g--;
570 string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted
571 int thisIndex = indexGroupMap[groupName];
572 countTableItem item(a, thisIndex);
573
574 abunds.push_back(item);
575 totalGroups[thisIndex] += a;
576 }
577
578 groupCounts = expandAbunds(abunds);
579 }
580 else {
581 for (int i = 0; i < numGroupsInFile; i++) {
582 int thisIndex = indexGroupMap[originalGroupIndexes[i]];
583 in >> groupCounts[thisIndex]; util.gobble(in);
584 totalGroups[thisIndex] += groupCounts[thisIndex];
585 }
586 }
587
588 }
589 }else { //read and discard
590 util.getline(in); util.gobble(in);
591 }
592 }
593
594 map<string, int>::iterator it = indexNameMap.find(name);
595 if (it == indexNameMap.end()) {
596 bool saveSeq = true;
597 if (hasGroups && readGroups) {
598 vector<countTableItem> thisGroupsCount = compressAbunds(groupCounts);
599 if (thisGroupsCount.size() == 0) { saveSeq = false; }
600 else { counts.push_back(thisGroupsCount); }
601 }
602 if (saveSeq) {
603 indexNameMap[name] = uniques;
604 totals.push_back(thisTotal);
605 total += thisTotal;
606 uniques++;
607 }
608 }else {
609 error = true;
610 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n");
611 }
612 }
613
614 if (error) { m->setControl_pressed(true); }
615 else { //check for zero groups
616 if (hasGroups && readGroups) {
617 for (int i = 0; i < totalGroups.size(); i++) {
618 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n");
619 removeGroup(groups[i]);
620 i--;
621 }
622 }
623 }
624 }
625
626 //if the file has groups, but we didn't read them
627 if (!readGroups) { hasGroups = false; }
628
629 return 0;
630 }
631 catch(exception& e) {
632 m->errorOut(e, "CountTable", "readTable");
633 exit(1);
634 }
635 }
636 /************************************************************/
readTable(string file,bool readGroups,bool mothurRunning,set<string> selectedSeqs)637 int CountTable::readTable(string file, bool readGroups, bool mothurRunning, set<string> selectedSeqs) {
638 try {
639 filename = file;
640 ifstream in;
641 util.openInputFile(filename, in);
642
643 string headers = util.getline(in); util.gobble(in);
644
645 map<string, int> headerIndex2Group;
646 //#1,F003D000 2,F003D002 3,F003D004 4,F003D006 5,F003D008 6,F003D142 7,F003D144 8,F003D146 9,F003D148 10,F003D150
647 if (headers[0] == '#') { //is this a count file in compressed form
648 isCompressed = true;
649
650 //read headers
651 headers = util.getline(in); util.gobble(in); //gets compressed group name map line
652 headers = headers.substr(1);
653
654 vector<string> groupNameHeaders = util.splitWhiteSpace(headers);
655
656 for (int i = 0; i < groupNameHeaders.size(); i++) {
657 string groupIndex = ""; string groupName = groupNameHeaders[i];
658 util.splitAtComma(groupIndex, groupName);
659 int a; util.mothurConvert(groupIndex, a);
660 headerIndex2Group[groupName] = a-1;
661 }
662
663 headers = util.getline(in); util.gobble(in);
664 }
665
666 vector<string> columnHeaders = util.splitWhiteSpace(headers);
667
668 int numGroupsInFile = 0;
669 groups.clear();
670 totalGroups.clear();
671 indexGroupMap.clear();
672 indexNameMap.clear();
673 counts.clear();
674 map<int, string> originalGroupIndexes;
675 if ((columnHeaders.size() > 2) && readGroups) { hasGroups = true; numGroupsInFile = columnHeaders.size() - 2; }
676
677
678 if (readGroups) {
679 for (int i = 2; i < columnHeaders.size(); i++) {
680 groups.push_back(columnHeaders[i]);
681
682 if (isCompressed) {
683 map<string, int>::iterator it = headerIndex2Group.find(columnHeaders[i]);
684 if (it != headerIndex2Group.end()) {
685 originalGroupIndexes[it->second] = columnHeaders[i];
686 }
687 }else { originalGroupIndexes[i-2] = columnHeaders[i]; }
688 totalGroups.push_back(0);
689 }
690 }
691
692 //sort groups to keep consistent with how we store the groups in groupmap
693 sort(groups.begin(), groups.end());
694 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
695 int numGroups = groups.size();
696
697 bool error = false;
698 string name;
699 int thisTotal;
700 uniques = 0;
701 total = 0;
702 while (!in.eof()) {
703
704 if (m->getControl_pressed()) { break; }
705
706 in >> name; util.gobble(in); in >> thisTotal; util.gobble(in);
707 if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
708
709 if ((thisTotal == 0) && !mothurRunning) { error=true; m->mothurOut("[ERROR]: Your count table contains a sequence named " + name + " with a total=0. Please correct.\n");
710 }
711
712 vector<int> groupCounts; groupCounts.resize(numGroups, 0);
713 if (columnHeaders.size() > 2) { //file contains groups
714 if (readGroups) { //user wants to save them
715 if (isCompressed) {
716 string groupInfo = util.getline(in); util.gobble(in);
717 vector<string> groupNodes = util.splitWhiteSpace(groupInfo);
718
719 vector<countTableItem> abunds;
720 for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count
721 string abund = groupNodes[i]; string thisgroup = "";
722 util.splitAtComma(thisgroup, abund);
723 int a; util.mothurConvert(abund, a);
724 int g; util.mothurConvert(thisgroup, g); g--;
725 string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted
726 int thisIndex = indexGroupMap[groupName];
727 countTableItem item(a, thisIndex);
728
729 abunds.push_back(item);
730 totalGroups[thisIndex] += a;
731 }
732
733 groupCounts = expandAbunds(abunds);
734 }
735 else {
736 for (int i = 0; i < numGroupsInFile; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; util.gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; }
737 }
738 }else { util.getline(in); util.gobble(in); }//read and discard
739 }
740
741 map<string, int>::iterator it = indexNameMap.find(name);
742 if (it == indexNameMap.end()) {
743 bool saveSeq = true;
744 if (selectedSeqs.count(name) == 0) { //don't save
745 saveSeq = false;
746 }
747 if (saveSeq) {
748 if (hasGroups && readGroups) {
749 vector<countTableItem> thisGroupsCount = compressAbunds(groupCounts);
750 counts.push_back(thisGroupsCount);
751 }
752 indexNameMap[name] = uniques;
753 totals.push_back(thisTotal);
754 total += thisTotal;
755 uniques++;
756 }
757 }else {
758 error = true;
759 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n");
760 }
761 }
762 in.close();
763
764 if (error) { m->setControl_pressed(true); }
765 else { //check for zero groups
766 if (hasGroups && readGroups) {
767 for (int i = 0; i < totalGroups.size(); i++) {
768 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
769 }
770 }
771 }
772
773 //if the file has groups, but we didn't read them
774 if (!readGroups) { hasGroups = false; }
775
776 return 0;
777 }
778 catch(exception& e) {
779 m->errorOut(e, "CountTable", "readTable");
780 exit(1);
781 }
782 }
783 /************************************************************/
784
zeroOutTable()785 int CountTable::zeroOutTable() {
786 try {
787
788 for(int i=0;i<counts.size();i++){
789 for(int j=0;j<counts[0].size();j++){
790 counts[j].clear();
791 }
792 }
793
794 totals.assign(totals.size(), 0);
795
796 return 0;
797 }
798 catch(exception& e) {
799 m->errorOut(e, "CountTable", "zeroOutTable");
800 exit(1);
801 }
802 }
803 /************************************************************/
804
clearTable()805 int CountTable::clearTable() {
806 try {
807 hasGroups = false;
808 total = 0;
809 uniques = 0;
810 groups.clear();
811 counts.clear();
812 totals.clear();
813 totalGroups.clear();
814 indexNameMap.clear();
815 indexGroupMap.clear();
816
817 return 0;
818 }
819 catch(exception& e) {
820 m->errorOut(e, "CountTable", "clearTable");
821 exit(1);
822 }
823 }
824 /************************************************************/
825 //zeroed reads are not printed
printTable(string file)826 vector<string> CountTable::printTable(string file) {
827 try {
828
829 //remove group if all reads are removed
830 for (int i = 0; i < totalGroups.size(); i++) {
831 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
832 }
833
834 if (isCompressed) { return printCompressedTable(file); }
835
836 ofstream out;
837 util.openOutputFile(file, out);
838
839 vector<string> namesInTable;
840
841 if (total != 0) {
842 printHeaders(out);
843
844 map<int, string> reverse; //use this to preserve order
845 for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
846
847 for (int i = 0; i < totals.size(); i++) {
848 if (totals[i] != 0) {
849
850 map<int, string>::iterator itR = reverse.find(i);
851
852 if (itR != reverse.end()) {
853
854 namesInTable.push_back(itR->second);
855
856 out << itR->second << '\t' << totals[i];
857
858 if (hasGroups) { printGroupAbunds(out, i); }
859
860 out << endl;
861 }
862 }
863 }
864 }
865 out.close();
866 return namesInTable;
867 }
868 catch(exception& e) {
869 m->errorOut(e, "CountTable", "printTable");
870 exit(1);
871 }
872 }
873 /************************************************************/
874 //zeroed reads are not printed
printNoGroupsTable(string file)875 vector<string> CountTable::printNoGroupsTable(string file) {
876 try {
877
878 ofstream out;
879 util.openOutputFile(file, out);
880
881 vector<string> namesInTable;
882
883 if (total != 0) {
884 vector<string> headers = getHardCodedHeaders();
885 out << headers[0] << '\t' << headers[1] << endl;
886
887 map<int, string> reverse; //use this to preserve order
888 for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
889
890 for (int i = 0; i < totals.size(); i++) {
891 if (totals[i] != 0) {
892
893 map<int, string>::iterator itR = reverse.find(i);
894
895 if (itR != reverse.end()) {
896
897 namesInTable.push_back(itR->second);
898
899 out << itR->second << '\t' << totals[i] << endl;
900 }
901 }
902 }
903 }
904 out.close();
905 return namesInTable;
906 }
907 catch(exception& e) {
908 m->errorOut(e, "CountTable", "printTable");
909 exit(1);
910 }
911 }
912 /************************************************************/
913 //zeroed reads are not printed
printTable(string file,bool compressedFormat)914 vector<string> CountTable::printTable(string file, bool compressedFormat) {
915 try {
916
917 //remove group if all reads are removed
918 for (int i = 0; i < totalGroups.size(); i++) {
919 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
920 }
921
922 if (compressedFormat) { return printCompressedTable(file); }
923
924 ofstream out;
925 util.openOutputFile(file, out);
926
927 vector<string> namesInTable;
928
929 if (total != 0) {
930 printHeaders(out);
931
932 map<int, string> reverse; //use this to preserve order
933 for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
934
935 for (int i = 0; i < totals.size(); i++) {
936
937 if (totals[i] != 0) {
938
939 map<int, string>::iterator itR = reverse.find(i);
940
941 if (itR != reverse.end()) {
942 namesInTable.push_back(itR->second);
943
944 out << itR->second << '\t' << totals[i];
945
946 if (hasGroups) { printGroupAbunds(out, i); }
947
948 out << endl;
949 }
950 }
951 }
952 }
953 out.close();
954 return namesInTable;
955 }
956 catch(exception& e) {
957 m->errorOut(e, "CountTable", "printTable");
958 exit(1);
959 }
960 }
961 /************************************************************/
962 //zeroed seqs are not printed
printCompressedTable(string file,vector<string> groupsToPrint)963 vector<string> CountTable::printCompressedTable(string file, vector<string> groupsToPrint) {
964 try {
965 ofstream out;
966 util.openOutputFile(file, out);
967
968 vector<string> namesInTable;
969
970 bool pickedGroups = false;
971 set<int> selectedGroupsIndicies;
972 if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups
973
974 if (total != 0) {
975 if (hasGroups) {
976
977 map<int, string> reverse;
978 for (map<string, int>::iterator it = indexGroupMap.begin(); it !=indexGroupMap.end(); it++) { reverse[it->second] = it->first; }
979
980 map<int, string>::iterator it = reverse.begin();
981 string group1Name = it->second;
982 if (pickedGroups) { //find selected groups indicies
983 for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
984 if (util.inUsersGroups(it->second, groupsToPrint)) { group1Name = it->second; break; }
985 }
986 }
987
988 out << "#Compressed Format: groupIndex,abundance. For example 1,6 would mean the read has an abundance of 6 for group " + group1Name + "." << endl;
989 out << "#";
990
991 for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
992 if (pickedGroups) { //find selected groups indicies
993 if (util.inUsersGroups(it->second, groupsToPrint)) {
994 selectedGroupsIndicies.insert(it->first);
995
996 out << it->first+1 << "," << it->second << "\t";
997 }
998 }else { out << it->first+1 << "," << it->second << "\t"; }
999 }
1000 out << endl;
1001 }
1002
1003 printHeaders(out, groupsToPrint);
1004
1005 map<int, string> reverse; //use this to preserve order
1006 for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
1007
1008 for (int i = 0; i < totals.size(); i++) {
1009 if (totals[i] != 0) {
1010 if (pickedGroups) {
1011 string groupOutput = "";
1012 long long thisTotal = 0;
1013 for (int j = 0; j < counts[i].size(); j++) {
1014 if (selectedGroupsIndicies.count(counts[i][j].group) != 0) { //this is a group we want
1015 groupOutput += '\t' + toString(counts[i][j].group+1) + ',' + toString(counts[i][j].abund);
1016 thisTotal += counts[i][j].abund;
1017 }
1018 }
1019
1020 if (thisTotal != 0) {
1021 map<int, string>::iterator itR = reverse.find(i);
1022
1023 if (itR != reverse.end()) {
1024 namesInTable.push_back(itR->second);
1025
1026 out << itR->second << '\t' << thisTotal << groupOutput << endl;
1027 }
1028 }
1029 }
1030 else {
1031 map<int, string>::iterator itR = reverse.find(i);
1032
1033 if (itR != reverse.end()) {
1034 namesInTable.push_back(itR->second);
1035
1036 out << itR->second << '\t' << totals[i];
1037 if (hasGroups) {
1038 for (int j = 0; j < counts[i].size(); j++) {
1039 out << '\t' << counts[i][j].group+1 << ',' << counts[i][j].abund;
1040 }
1041 }
1042 out << endl;
1043 }
1044 }
1045 }
1046 }
1047 }
1048 out.close();
1049
1050 return namesInTable;
1051 }
1052 catch(exception& e) {
1053 m->errorOut(e, "CountTable", "printCompressedTable");
1054 exit(1);
1055 }
1056 }
1057 /************************************************************/
1058 //returns index of countTableItem for group passed in. If group is not present in seq, returns index of next group or -1
find(int seq,int group,bool returnNext)1059 int CountTable::find(int seq, int group, bool returnNext) {
1060 try {
1061
1062 //if (!returnNext) { return find(seq, group); }
1063 int index = -1;
1064
1065 for (int i = 0; i < counts[seq].size(); i++) {
1066 if (counts[seq][i].group >= group) { //found it or done looking
1067
1068 if (counts[seq][i].group == group) { index = i; }
1069 break;
1070 }
1071 }
1072
1073 return index;
1074 }
1075 catch(exception& e) {
1076 m->errorOut(e, "CountTable", "find");
1077 exit(1);
1078 }
1079 }/************************************************************/
1080 //returns abundance of countTableItem for seq and group passed in. If group is not present in seq, returns 0
getAbund(int seq,int group)1081 int CountTable::getAbund(int seq, int group) {
1082 try {
1083 int index = find(seq, group, false);
1084
1085 if (index != -1) { //this seq has a non zero abundance for this group
1086 return counts[seq][index].abund;
1087 }
1088
1089 return 0;
1090 }
1091 catch(exception& e) {
1092 m->errorOut(e, "CountTable", "getAbund");
1093 exit(1);
1094 }
1095 }
1096 /************************************************************/
expandAbunds(vector<countTableItem> & items)1097 vector<int> CountTable::expandAbunds(vector<countTableItem>& items) {
1098 try {
1099 vector<int> abunds; abunds.resize(groups.size(), 0); //prefill with 0's
1100
1101 for (int i = 0; i < items.size(); i++) { //for each non zero entry
1102 abunds[items[i].group] = items[i].abund; //set abund for group
1103 }
1104
1105 return abunds;
1106 }
1107 catch(exception& e) {
1108 m->errorOut(e, "CountTable", "expandAbunds");
1109 exit(1);
1110 }
1111 }
1112 /************************************************************/
expandAbunds(int index)1113 vector<int> CountTable::expandAbunds(int index) {
1114 try {
1115 vector<int> abunds; abunds.resize(groups.size(), 0); //prefill with 0's
1116
1117
1118 for (int i = 0; i < counts[index].size(); i++) { //for each non zero entry
1119 abunds[counts[index][i].group] = counts[index][i].abund; //set abund for group
1120 }
1121
1122 return abunds;
1123 }
1124 catch(exception& e) {
1125 m->errorOut(e, "CountTable", "expandAbunds");
1126 exit(1);
1127 }
1128 }
1129 /************************************************************/
1130 //assumes same order as groups
compressAbunds(vector<int> abunds)1131 vector<countTableItem> CountTable::compressAbunds(vector<int> abunds) {
1132 try {
1133 vector<countTableItem> row;
1134
1135 for (int i = 0; i < abunds.size(); i++) {
1136 if (abunds[i] != 0) {
1137 countTableItem thisAbund(abunds[i], i);
1138 row.push_back(thisAbund);
1139 }
1140 }
1141
1142 return row;
1143 }
1144 catch(exception& e) {
1145 m->errorOut(e, "CountTable", "compressAbunds");
1146 exit(1);
1147 }
1148 }
1149 /************************************************************/
printGroupAbunds(ofstream & out,int index)1150 void CountTable::printGroupAbunds(ofstream& out, int index) {
1151 try {
1152
1153 vector<int> abunds = expandAbunds(index);
1154
1155 for (int i = 0; i < abunds.size(); i++) { out << '\t' << abunds[i]; }
1156 }
1157 catch(exception& e) {
1158 m->errorOut(e, "CountTable", "printGroupAbunds");
1159 exit(1);
1160 }
1161 }
1162 /************************************************************/
printSortedTable(string file)1163 vector<string> CountTable::printSortedTable(string file) {
1164 try {
1165 //remove group if all reads are removed
1166 for (int i = 0; i < totalGroups.size(); i++) {
1167 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
1168 }
1169
1170 ofstream out;
1171 util.openOutputFile(file, out);
1172 printHeaders(out);
1173
1174 vector<string> namesInTable;
1175
1176 for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) {
1177 string seqName = it->first;
1178 int index = it->second;
1179
1180 if (totals[index] != 0) {
1181 namesInTable.push_back(seqName);
1182
1183 out << seqName << '\t' << totals[index];
1184 if (hasGroups) {
1185 printGroupAbunds(out, index);
1186 }
1187 out << endl;
1188 }
1189 }
1190 out.close();
1191
1192 return namesInTable;
1193 }
1194 catch(exception& e) {
1195 m->errorOut(e, "CountTable", "printSortedTable");
1196 exit(1);
1197 }
1198 }
1199
1200 /************************************************************/
getHardCodedHeaders()1201 vector<string> CountTable::getHardCodedHeaders() {
1202 try {
1203 vector<string> headers; headers.push_back("Representative_Sequence"); headers.push_back("total");
1204 return headers;
1205 }
1206 catch(exception& e) {
1207 m->errorOut(e, "CountTable", "printHeaders");
1208 exit(1);
1209 }
1210 }
1211 /************************************************************/
printHeaders(ofstream & out,vector<string> selectedGroups)1212 int CountTable::printHeaders(ofstream& out, vector<string> selectedGroups) {
1213 try {
1214 //remove group if all reads are removed
1215 for (int i = 0; i < totalGroups.size(); i++) {
1216 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
1217 }
1218
1219 bool pickedGroups = false;
1220 if (selectedGroups.size() != 0) { pickedGroups = true; }
1221
1222 out << "Representative_Sequence\ttotal";
1223 if (hasGroups) {
1224 for (int i = 0; i < groups.size(); i++) {
1225 if (pickedGroups) {
1226 if (util.inUsersGroups(groups[i], selectedGroups)) { out << '\t' << groups[i]; }
1227 }
1228 else { out << '\t' << groups[i]; }
1229 }
1230 }
1231 out << endl;
1232 return 0;
1233 }
1234 catch(exception& e) {
1235 m->errorOut(e, "CountTable", "printHeaders");
1236 exit(1);
1237 }
1238 }
1239 /************************************************************/
printSeq(ofstream & out,string seqName)1240 int CountTable::printSeq(ofstream& out, string seqName) {
1241 try {
1242 map<string, int>::iterator it = indexNameMap.find(seqName);
1243 if (it == indexNameMap.end()) {
1244 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1245 }else {
1246 if (totals[it->second] != 0) {
1247 out << it->first << '\t' << totals[it->second];
1248
1249 if (hasGroups) { printGroupAbunds(out, it->second); }
1250
1251 out << endl;
1252 }
1253 }
1254 return 0;
1255 }
1256 catch(exception& e) {
1257 m->errorOut(e, "CountTable", "printSeq");
1258 exit(1);
1259 }
1260 }
1261
1262 /************************************************************/
1263 //group counts for a seq
getGroupCounts(string seqName)1264 vector<int> CountTable::getGroupCounts(string seqName) {
1265 try {
1266 vector<countTableItem> temp = getItems(seqName);
1267 return (expandAbunds(temp));
1268
1269 }
1270 catch(exception& e) {
1271 m->errorOut(e, "CountTable", "getGroupCounts");
1272 exit(1);
1273 }
1274 }
1275 /************************************************************/
1276 //group counts for a seq
getItems(string seqName)1277 vector<countTableItem> CountTable::getItems(string seqName) {
1278 try {
1279 vector<countTableItem> temp;
1280 if (hasGroups) {
1281 map<string, int>::iterator it = indexNameMap.find(seqName);
1282 if (it == indexNameMap.end()) {
1283 //look for it in names of groups to see if the user accidently used the wrong file
1284 if (util.inUsersGroups(seqName, groups)) {
1285 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1286 }
1287 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1288 }else {
1289 temp = counts[it->second];
1290 }
1291 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1292
1293 return temp;
1294 }
1295 catch(exception& e) {
1296 m->errorOut(e, "CountTable", "getGroupCounts");
1297 exit(1);
1298 }
1299 }
1300 /************************************************************/
1301 //total number of sequences for the group
getGroupCount(string groupName)1302 int CountTable::getGroupCount(string groupName) {
1303 try {
1304 if (hasGroups) {
1305 map<string, int>::iterator it = indexGroupMap.find(groupName);
1306 if (it == indexGroupMap.end()) {
1307 m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1308 }else {
1309 return totalGroups[it->second];
1310 }
1311 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1312
1313 return 0;
1314 }
1315 catch(exception& e) {
1316 m->errorOut(e, "CountTable", "getGroupCount");
1317 exit(1);
1318 }
1319 }
1320 /************************************************************/
1321 //total number of sequences for the seq for the group
getGroupCount(string seqName,string groupName)1322 int CountTable::getGroupCount(string seqName, string groupName) {
1323 try {
1324 if (hasGroups) {
1325 map<string, int>::iterator it = indexGroupMap.find(groupName);
1326 if (it == indexGroupMap.end()) {
1327 m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1328 }else {
1329 map<string, int>::iterator it2 = indexNameMap.find(seqName);
1330 if (it2 == indexNameMap.end()) {
1331 //look for it in names of groups to see if the user accidently used the wrong file
1332 if (util.inUsersGroups(seqName, groups)) {
1333 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1334 }
1335 m->mothurOut("[ERROR]: seq " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1336 }else {
1337 return expandAbunds(it2->second)[it->second];
1338 }
1339 }
1340 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1341
1342 return 0;
1343 }
1344 catch(exception& e) {
1345 m->errorOut(e, "CountTable", "getGroupCount");
1346 exit(1);
1347 }
1348 }
1349 /************************************************************/
1350 //set the number of sequences for the seq for the group
setAbund(string seqName,string groupName,int num)1351 int CountTable::setAbund(string seqName, string groupName, int num) {
1352 try {
1353 if (hasGroups) {
1354 map<string, int>::iterator it = indexGroupMap.find(groupName);
1355 if (it == indexGroupMap.end()) {
1356 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1357 }else {
1358 map<string, int>::iterator it2 = indexNameMap.find(seqName);
1359 if (it2 == indexNameMap.end()) {
1360 //look for it in names of groups to see if the user accidently used the wrong file
1361 if (util.inUsersGroups(seqName, groups)) {
1362 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1363 }
1364 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1365 }else {
1366 int indexOfGroup = find(it2->second, it->second, false);
1367 int oldCount = 0;
1368
1369 if (indexOfGroup == -1) { //create item for this group
1370 countTableItem newItem(num, it->second);
1371 counts[it2->second].push_back(newItem);
1372 sortRow(it2->second);
1373 }else { //update total for group
1374 oldCount = counts[it2->second][indexOfGroup].abund;
1375 counts[it2->second][indexOfGroup].abund = num;
1376 }
1377
1378 totalGroups[it->second] += (num - oldCount);
1379 total += (num - oldCount);
1380 totals[it2->second] += (num - oldCount);
1381 }
1382 }
1383 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1384
1385 return 0;
1386 }
1387 catch(exception& e) {
1388 m->errorOut(e, "CountTable", "set");
1389 exit(1);
1390 }
1391 }
1392 /************************************************************/
1393 //add group
addGroup(string groupName)1394 int CountTable::addGroup(string groupName) {
1395 try {
1396 bool sanity = util.inUsersGroups(groupName, groups);
1397 if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->setControl_pressed(true); return 0; }
1398
1399 groups.push_back(groupName);
1400 if (!hasGroups) { counts.resize(uniques); }
1401
1402 totalGroups.push_back(0);
1403 indexGroupMap[groupName] = groups.size()-1;
1404 map<string, int> originalGroupMap = indexGroupMap;
1405
1406 //important to play well with others, :)
1407 sort(groups.begin(), groups.end());
1408
1409 //fix indexGroupMap && totalGroups
1410 vector<int> newTotals; newTotals.resize(groups.size(), 0);
1411 for (int i = 0; i < groups.size(); i++) {
1412 indexGroupMap[groups[i]] = i;
1413 //find original spot of group[i]
1414 int index = originalGroupMap[groups[i]];
1415 newTotals[i] = totalGroups[index];
1416 }
1417 totalGroups = newTotals;
1418
1419 hasGroups = true;
1420
1421 return 0;
1422 }
1423 catch(exception& e) {
1424 m->errorOut(e, "CountTable", "addGroup");
1425 exit(1);
1426 }
1427 }
1428 /************************************************************/
1429 //remove group
removeGroup(string groupName)1430 int CountTable::removeGroup(string groupName) {
1431 try {
1432 if (hasGroups) {
1433 //save for later in case removing a group means we need to remove a seq.
1434 map<int, string> reverse;
1435 map<string, int>::iterator it;
1436 for (it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
1437
1438 it = indexGroupMap.find(groupName);
1439 if (it == indexGroupMap.end()) {
1440 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1441 }else {
1442 int indexOfGroupToRemove = it->second;
1443 map<string, int> currentGroupIndex = indexGroupMap;
1444 vector<string> newGroups;
1445 for (int i = 0; i < groups.size(); i++) {
1446 if (groups[i] != groupName) {
1447 newGroups.push_back(groups[i]);
1448 indexGroupMap[groups[i]] = newGroups.size()-1;
1449 }
1450 }
1451 indexGroupMap.erase(groupName);
1452 groups = newGroups;
1453 totalGroups.erase(totalGroups.begin()+indexOfGroupToRemove);
1454
1455 int thisIndex = 0;
1456 map<string, int> newIndexNameMap;
1457 for (int i = 0; i < counts.size(); i++) {
1458
1459 if (m->getControl_pressed()) { break; }
1460
1461 int indexOfGroup = -1; bool found = false;
1462 for (int j = 0; j < counts[i].size(); j++) {
1463 if (counts[i][j].group >= indexOfGroupToRemove) { //found it or done looking
1464
1465 indexOfGroup = j;
1466 if (counts[i][j].group == indexOfGroupToRemove) { found = true; }
1467 break;
1468 }
1469 }
1470
1471 if (found) { //you have an abundance for this group
1472 int num = counts[i][indexOfGroup].abund;
1473 counts[i].erase(counts[i].begin()+indexOfGroup);
1474 totals[i] -= num;
1475 total -= num;
1476
1477 if (totals[i] == 0) { //your sequences are only from the group we want to remove, then remove you.
1478 counts.erase(counts.begin()+i);
1479 totals.erase(totals.begin()+i);
1480 uniques--;
1481 i--;
1482 if (i == -1) { i = 0; }
1483 indexOfGroup = counts[i].size(); //don't adjust the the group indexes because we removed the read
1484 }else { newIndexNameMap[reverse[thisIndex]] = i; }
1485 }else { //you don't have this group, nothing to remove
1486
1487 if (indexOfGroup == -1) { indexOfGroup = counts[i].size(); }
1488 newIndexNameMap[reverse[thisIndex]] = i;
1489 }
1490
1491 for (int j = indexOfGroup; j < counts[i].size(); j++) { counts[i][j].group -= 1; }
1492
1493 thisIndex++;
1494 }
1495 indexNameMap = newIndexNameMap;
1496
1497 if (groups.size() == 0) { hasGroups = false; }
1498 }
1499 }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove group " + groupName + ".\n"); m->setControl_pressed(true); }
1500
1501 return 0;
1502 }
1503 catch(exception& e) {
1504 m->errorOut(e, "CountTable", "removeGroup");
1505 exit(1);
1506 }
1507 }
1508 /***********************************************************************/
removeGroup(int minSize)1509 int CountTable::removeGroup(int minSize){
1510 try {
1511
1512 if (hasGroups) {
1513 for (int i = 0; i < totalGroups.size(); i++) {
1514 if (totalGroups[i] < minSize) { removeGroup(groups[i]); }
1515 }
1516 }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove groups.\n"); m->setControl_pressed(true); }
1517
1518 return groups.size();
1519 }
1520 catch(exception& e) {
1521 m->errorOut(e, "SharedRAbundVector", "removeGroup - minSize");
1522 exit(1);
1523 }
1524 }
1525 /************************************************************/
1526 //vector of groups for the seq
getGroups(string seqName)1527 vector<string> CountTable::getGroups(string seqName) {
1528 try {
1529 vector<string> thisGroups;
1530 map<string, int>::iterator it = indexNameMap.find(seqName);
1531 if (it == indexNameMap.end()) {
1532 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1533 }else {
1534 if (hasGroups) {
1535 int index = it->second;
1536 for (int i = 0; i < counts[index].size(); i++) {
1537 thisGroups.push_back(groups[counts[index][i].group]);
1538 }
1539 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1540 }
1541
1542 return thisGroups;
1543 }
1544 catch(exception& e) {
1545 m->errorOut(e, "CountTable", "getGroups");
1546 exit(1);
1547 }
1548 }
1549 /************************************************************/
1550 //total number of seqs represented by seq
renameSeq(string oldSeqName,string newSeqName)1551 int CountTable::renameSeq(string oldSeqName, string newSeqName) {
1552 try {
1553
1554 map<string, int>::iterator it = indexNameMap.find(oldSeqName);
1555 if (it == indexNameMap.end()) {
1556 if (hasGroupInfo()) {
1557 //look for it in names of groups to see if the user accidently used the wrong file
1558 if (util.inUsersGroups(oldSeqName, groups)) {
1559 m->mothurOut("[WARNING]: Your group or design file contains a group named " + oldSeqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1560 }
1561 }
1562 m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1563 }else {
1564 int index = it->second;
1565 indexNameMap.erase(it);
1566 indexNameMap[newSeqName] = index;
1567 }
1568
1569 return 0;
1570 }
1571 catch(exception& e) {
1572 m->errorOut(e, "CountTable", "renameSeq");
1573 exit(1);
1574 }
1575 }
1576
1577 /************************************************************/
1578 //total number of seqs represented by seq
getNumSeqs(string seqName)1579 int CountTable::getNumSeqs(string seqName) {
1580 try {
1581
1582 map<string, int>::iterator it = indexNameMap.find(seqName);
1583 if (it == indexNameMap.end()) {
1584 if (hasGroupInfo()) {
1585 //look for it in names of groups to see if the user accidently used the wrong file
1586 if (util.inUsersGroups(seqName, groups)) {
1587 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1588 }
1589 }
1590 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1591 }else {
1592 return totals[it->second];
1593 }
1594
1595 return 0;
1596 }
1597 catch(exception& e) {
1598 m->errorOut(e, "CountTable", "getNumSeqs");
1599 exit(1);
1600 }
1601 }
1602 /************************************************************/
1603 //set total number of seqs represented by seq
setNumSeqs(string seqName,int abund)1604 int CountTable::setNumSeqs(string seqName, int abund) {
1605 try {
1606
1607 map<string, int>::iterator it = indexNameMap.find(seqName);
1608 if (it == indexNameMap.end()) {
1609 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); return -1;
1610 }else {
1611 int diff = totals[it->second] - abund;
1612 totals[it->second] = abund;
1613 total-=diff;
1614 }
1615
1616 return 0;
1617 }
1618 catch(exception& e) {
1619 m->errorOut(e, "CountTable", "setNumSeqs");
1620 exit(1);
1621 }
1622 }
1623 /************************************************************/
zeroOutSeq(string seqName)1624 int CountTable::zeroOutSeq(string seqName) {
1625 try {
1626
1627 map<string, int>::iterator it = indexNameMap.find(seqName);
1628 if (it == indexNameMap.end()) {
1629 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); return -1;
1630 }else {
1631 int abund = totals[it->second];
1632 totals[it->second] = 0;
1633 total-=abund;
1634
1635 if (hasGroups) {
1636 int seqIndexIntoCounts = it->second;
1637 for (int i = 0; i < counts[seqIndexIntoCounts].size(); i++) {
1638 totalGroups[counts[seqIndexIntoCounts][i].group] -= counts[seqIndexIntoCounts][i].abund;
1639 }
1640 counts[seqIndexIntoCounts].clear();
1641 }
1642 }
1643
1644 return 0;
1645 }
1646 catch(exception& e) {
1647 m->errorOut(e, "CountTable", "zeroOutSeq");
1648 exit(1);
1649 }
1650 }
1651 /************************************************************/
1652 //returns unique index for sequence like get in NameAssignment
get(string seqName)1653 int CountTable::get(string seqName) {
1654 try {
1655
1656 map<string, int>::iterator it = indexNameMap.find(seqName);
1657 if (it == indexNameMap.end()) {
1658 if (hasGroupInfo()) {
1659 //look for it in names of groups to see if the user accidently used the wrong file
1660 if (util.inUsersGroups(seqName, groups)) {
1661 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1662 }
1663 }
1664 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1665 }else { return it->second; }
1666
1667 return -1;
1668 }
1669 catch(exception& e) {
1670 m->errorOut(e, "CountTable", "get");
1671 exit(1);
1672 }
1673 }
1674 /************************************************************/
1675 //add seqeunce without group info
push_back(string seqName)1676 int CountTable::push_back(string seqName) {
1677 try {
1678 map<string, int>::iterator it = indexNameMap.find(seqName);
1679 if (it == indexNameMap.end()) {
1680 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + ".\n"); m->setControl_pressed(true); }
1681 indexNameMap[seqName] = uniques;
1682 totals.push_back(1);
1683 total++;
1684 uniques++;
1685 }else {
1686 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true);
1687 }
1688
1689 return 1;
1690 }
1691 catch(exception& e) {
1692 m->errorOut(e, "CountTable", "push_back");
1693 exit(1);
1694 }
1695 }
1696 /************************************************************/
1697 //
inTable(string seqName)1698 bool CountTable::inTable(string seqName) {
1699 try {
1700 map<string, int>::iterator it = indexNameMap.find(seqName);
1701 if (it != indexNameMap.end()) { return true; }
1702 return false;
1703
1704 }
1705 catch(exception& e) {
1706 m->errorOut(e, "CountTable", "inTable");
1707 exit(1);
1708 }
1709 }
1710
1711 /************************************************************/
1712 //remove sequence
remove(string seqName)1713 int CountTable::remove(string seqName) {
1714 try {
1715 map<string, int>::iterator it = indexNameMap.find(seqName);
1716 if (it != indexNameMap.end()) {
1717 int seqIndexIntoCounts = it->second;
1718 uniques--;
1719 if (hasGroups){ //remove this sequences counts from group totals
1720 for (int i = 0; i < counts[seqIndexIntoCounts].size(); i++) {
1721 totalGroups[counts[seqIndexIntoCounts][i].group] -= counts[seqIndexIntoCounts][i].abund;
1722 }
1723 }
1724
1725 //save for later in case removing a group means we need to remove a seq.
1726 map<int, string> reverse;
1727 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 !=indexNameMap.end(); it2++) { reverse[it2->second] = it2->first; }
1728
1729 int newIndex = 0;
1730 map<string, int> newIndexNameMap;
1731 for (int i = 0; i < counts.size(); i++) {
1732 if (i == seqIndexIntoCounts) { }//you are the seq we are trying to remove
1733 else { newIndexNameMap[reverse[i]] = newIndex; newIndex++; }
1734 }
1735 indexNameMap = newIndexNameMap;
1736
1737 counts.erase(counts.begin()+seqIndexIntoCounts);
1738 int thisTotal = totals[seqIndexIntoCounts];
1739 totals.erase(totals.begin()+seqIndexIntoCounts);
1740 total -= thisTotal;
1741
1742 //remove group if all reads are removed
1743 for (int i = 0; i < totalGroups.size(); i++) {
1744 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
1745 }
1746
1747 }else {
1748 if (hasGroupInfo()) {
1749 //look for it in names of groups to see if the user accidently used the wrong file
1750 if (util.inUsersGroups(seqName, groups)) {
1751 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1752 }
1753 }
1754 m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove.\n"); m->setControl_pressed(true);
1755 }
1756
1757 return 0;
1758 }
1759 catch(exception& e) {
1760 m->errorOut(e, "CountTable", "remove");
1761 exit(1);
1762 }
1763 }
1764 /************************************************************/
1765 //add seqeunce without group info
push_back(string seqName,int thisTotal)1766 int CountTable::push_back(string seqName, int thisTotal) {
1767 try {
1768 map<string, int>::iterator it = indexNameMap.find(seqName);
1769 if (it == indexNameMap.end()) {
1770 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + ".\n"); m->setControl_pressed(true); }
1771 indexNameMap[seqName] = uniques;
1772 totals.push_back(thisTotal);
1773 total+=thisTotal;
1774 uniques++;
1775 }else {
1776 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true);
1777 }
1778
1779 return thisTotal;
1780 }
1781 catch(exception& e) {
1782 m->errorOut(e, "CountTable", "push_back");
1783 exit(1);
1784 }
1785 }
1786 /************************************************************/
1787 //add sequence with group info
push_back(string seqName,vector<int> groupCounts,bool ignoreDup=false)1788 int CountTable::push_back(string seqName, vector<int> groupCounts, bool ignoreDup=false) {
1789 try {
1790 int thisTotal = 0;
1791 map<string, int>::iterator it = indexNameMap.find(seqName);
1792 if (it == indexNameMap.end()) {
1793 if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct.\n"); m->setControl_pressed(true); }
1794
1795 for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; }
1796 if (hasGroups) { counts.push_back(compressAbunds(groupCounts)); }
1797 indexNameMap[seqName] = uniques;
1798 totals.push_back(thisTotal);
1799 total+= thisTotal;
1800 uniques++;
1801 }else {
1802 if (ignoreDup) {
1803 m->mothurOut("[WARNING]: Your count table contains more than 1 sequence named " + seqName + ". Mothur requires sequence names to be unique. I will only add it once.\n");
1804 }else { m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true); }
1805 }
1806
1807 return thisTotal;
1808 }
1809 catch(exception& e) {
1810 m->errorOut(e, "CountTable", "push_back");
1811 exit(1);
1812 }
1813 }
1814
1815 /************************************************************/
1816 //add sequence with group info
push_back(string seqName,vector<int> groupCounts)1817 int CountTable::push_back(string seqName, vector<int> groupCounts) {
1818 try {
1819 int thisTotal = 0;
1820 map<string, int>::iterator it = indexNameMap.find(seqName);
1821 if (it == indexNameMap.end()) {
1822 if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct.\n"); m->setControl_pressed(true); }
1823
1824 for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; }
1825 if (hasGroups) { counts.push_back(compressAbunds(groupCounts)); }
1826 indexNameMap[seqName] = uniques;
1827 totals.push_back(thisTotal);
1828 total+= thisTotal;
1829 uniques++;
1830 }else {
1831 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true);
1832 }
1833
1834 return thisTotal;
1835 }
1836 catch(exception& e) {
1837 m->errorOut(e, "CountTable", "push_back");
1838 exit(1);
1839 }
1840 }
1841 /************************************************************/
1842 //returns size of smallest group. If no groups, returns total num seqs (includes non uniques)
getNumSeqsSmallestGroup()1843 int CountTable::getNumSeqsSmallestGroup() {
1844 try {
1845 int smallestGroupSize = MOTHURMAX;
1846
1847 if (hasGroups) {
1848 for (int i = 0; i < totalGroups.size(); i++) {
1849 if (totalGroups[i] < smallestGroupSize) { smallestGroupSize = totalGroups[i]; }
1850 }
1851 }
1852 else { return total; }
1853
1854 return smallestGroupSize;
1855 }
1856 catch(exception& e) {
1857 m->errorOut(e, "CountTable", "getNumSeqsSmallestGroup");
1858 exit(1);
1859 }
1860 }
1861
1862 /************************************************************/
1863 //create ListVector from uniques
getListVector()1864 ListVector CountTable::getListVector() {
1865 try {
1866 ListVector list(indexNameMap.size(), "ASV");
1867 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
1868 if (m->getControl_pressed()) { break; }
1869 list.set(it->second, it->first);
1870 }
1871 return list;
1872 }
1873 catch(exception& e) {
1874 m->errorOut(e, "CountTable", "getListVector");
1875 exit(1);
1876 }
1877 }
1878
1879 /************************************************************/
1880 //returns the names of all unique sequences in file
getNamesOfSeqs()1881 vector<string> CountTable::getNamesOfSeqs() {
1882 try {
1883 vector<string> names;
1884 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
1885 names.push_back(it->first);
1886 }
1887
1888 return names;
1889 }
1890 catch(exception& e) {
1891 m->errorOut(e, "CountTable", "getNamesOfSeqs");
1892 exit(1);
1893 }
1894 }
1895 /************************************************************/
1896 //returns the names of all unique sequences in file mapped to their seqCounts
getNameMap()1897 map<string, int> CountTable::getNameMap() {
1898 try {
1899 map<string, int> names;
1900 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
1901 names[it->first] = totals[it->second];
1902 }
1903
1904 return names;
1905 }
1906 catch(exception& e) {
1907 m->errorOut(e, "CountTable", "getNameMap");
1908 exit(1);
1909 }
1910 }
1911 /************************************************************/
1912 //returns the names of all unique sequences in file mapped to their seqCounts
getNameMap(string group)1913 map<string, int> CountTable::getNameMap(string group) {
1914 try {
1915 map<string, int> names;
1916
1917 if (hasGroups) {
1918 map<string, int>::iterator it = indexGroupMap.find(group);
1919 if (it == indexGroupMap.end()) {
1920 m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1921 }else {
1922 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
1923 int abund = getAbund(it2->second, it->second);
1924 if (abund != 0) { names[it2->first] = abund; }
1925 }
1926 }
1927 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1928
1929 return names;
1930 }
1931 catch(exception& e) {
1932 m->errorOut(e, "CountTable", "getNameMap");
1933 exit(1);
1934 }
1935 }
1936 /************************************************************/
1937 //returns the names of all unique sequences in file
getNamesOfSeqs(string group)1938 vector<string> CountTable::getNamesOfSeqs(string group) {
1939 try {
1940 vector<string> names;
1941 if (hasGroups) {
1942 map<string, int>::iterator it = indexGroupMap.find(group);
1943 if (it == indexGroupMap.end()) {
1944 m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1945 }else {
1946 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
1947 if (getAbund(it2->second, it->second) != 0) { names.push_back(it2->first); }
1948 }
1949 }
1950 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1951
1952 return names;
1953 }
1954 catch(exception& e) {
1955 m->errorOut(e, "CountTable", "getNamesOfSeqs");
1956 exit(1);
1957 }
1958 }
1959 /************************************************************/
1960 //returns the names of all unique sequences in file
getNamesOfSeqs(vector<string> chosenGroups)1961 vector<string> CountTable::getNamesOfSeqs(vector<string> chosenGroups) {
1962 try {
1963 vector<string> names;
1964 if (hasGroups) {
1965 set<string> uniqueNames;
1966 for (int i = 0; i < chosenGroups.size(); i++) {
1967 vector<string> namesFromThisGroup = getNamesOfSeqs(chosenGroups[i]);
1968 for (int j = 0; j < namesFromThisGroup.size(); j++) { uniqueNames.insert(namesFromThisGroup[j]); }
1969 }
1970
1971 //only adds names once. seqs are likely present in more than one group, but we only want to enter them once
1972 for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { names.push_back(*it); }
1973
1974 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); }
1975
1976 return names;
1977 }
1978 catch(exception& e) {
1979 m->errorOut(e, "CountTable", "getNamesOfSeqs");
1980 exit(1);
1981 }
1982 }
1983
1984 /************************************************************/
1985 //merges counts of seq1 and seq2, saving in seq1
mergeCounts(string seq1,string seq2)1986 int CountTable::mergeCounts(string seq1, string seq2) {
1987 try {
1988 map<string, int>::iterator it = indexNameMap.find(seq1);
1989 if (it == indexNameMap.end()) {
1990 if (hasGroupInfo()) {
1991 //look for it in names of groups to see if the user accidently used the wrong file
1992 if (util.inUsersGroups(seq1, groups)) {
1993 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq1 + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
1994 }
1995 }
1996 m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
1997 }else {
1998 map<string, int>::iterator it2 = indexNameMap.find(seq2);
1999 if (it2 == indexNameMap.end()) {
2000 if (hasGroupInfo()) {
2001 //look for it in names of groups to see if the user accidently used the wrong file
2002 if (util.inUsersGroups(seq2, groups)) {
2003 m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq2 + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n");
2004 }
2005 }
2006 m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
2007 }else {
2008 if (hasGroupInfo()) { //if no group data then counts are empty
2009 //merge data
2010 vector<int> countsSeq1 = expandAbunds(it->second);
2011 vector<int> countsSeq2 = expandAbunds(it2->second);
2012
2013 for (int i = 0; i < groups.size(); i++) { countsSeq1[i] += countsSeq2[i]; }
2014
2015 counts[it->second] = compressAbunds(countsSeq1);
2016 }
2017 totals[it->second] += totals[it2->second];
2018 uniques--;
2019 indexNameMap.erase(it2);
2020 }
2021 }
2022 return 0;
2023 }
2024 catch(exception& e) {
2025 m->errorOut(e, "CountTable", "getNamesOfSeqs");
2026 exit(1);
2027 }
2028 }
2029 /************************************************************/
copy(CountTable * ct)2030 int CountTable::copy(CountTable* ct) {
2031 try {
2032 vector<string> thisGroups = ct->getNamesOfGroups();
2033 for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
2034 vector<string> names = ct->getNamesOfSeqs();
2035
2036 for (int i = 0; i < names.size(); i++) {
2037 vector<int> thisCounts = ct->getGroupCounts(names[i]);
2038 push_back(names[i], thisCounts, false);
2039 }
2040
2041 isCompressed = ct->isTableCompressed();
2042
2043 return 0;
2044 }
2045 catch(exception& e) {
2046 m->errorOut(e, "CountTable", "copy");
2047 exit(1);
2048 }
2049 }
2050 /***********************************************************************/
2051
sortCountTable()2052 int CountTable::sortCountTable(){
2053 try {
2054
2055 //sorts each rows abunds by group
2056 //counts[i] = (1,4),(1,2),(3,7) -> (1,2),(1,4),(3,7)
2057 for (int i = 0; i < counts.size(); i++) { sort(counts[i].begin(), counts[i].end(), compareGroups); }
2058
2059 return 0;
2060 }
2061 catch(exception& e) {
2062 m->errorOut(e, "CountTable", "sortCountTable");
2063 exit(1);
2064 }
2065 }
2066 /***********************************************************************/
2067
sortRow(int index)2068 int CountTable::sortRow(int index){
2069 try {
2070
2071 //saves time in getSmallestCell, by making it so you dont search the repeats
2072 sort(counts[index].begin(), counts[index].end(), compareGroups);
2073
2074 return 0;
2075 }
2076 catch(exception& e) {
2077 m->errorOut(e, "CountTable", "sortRow");
2078 exit(1);
2079 }
2080 }
2081
2082 /************************************************************/
2083