1 //
2 // removeotulabels.cpp
3 // Mothur
4 //
5 // Created by Sarah Westcott on 5/21/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "removeotuscommand.h"
10
11 //**********************************************************************************************************************
setParameters()12 vector<string> RemoveOtusCommand::setParameters(){
13 try {
14 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
15 CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none","constaxonomy",false,false); parameters.push_back(pconstaxonomy);
16 CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none","otucorr",false,false); parameters.push_back(potucorr);
17 CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none","corraxes",false,false); parameters.push_back(pcorraxes);
18 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist);
19 CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
20 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
21 CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24
25 abort = false; calledHelp = false;
26
27 vector<string> tempOutNames;
28 outputTypes["constaxonomy"] = tempOutNames;
29 outputTypes["otucorr"] = tempOutNames;
30 outputTypes["corraxes"] = tempOutNames;
31 outputTypes["shared"] = tempOutNames;
32 outputTypes["list"] = tempOutNames;
33
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 return myArray;
37 }
38 catch(exception& e) {
39 m->errorOut(e, "RemoveOtusCommand", "setParameters");
40 exit(1);
41 }
42 }
43 //**********************************************************************************************************************
getHelpString()44 string RemoveOtusCommand::getHelpString(){
45 try {
46 string helpString = "";
47 helpString += "The remove.otus command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes. It can also be used to select a set of otus from a shared or list file.\n";
48 helpString += "The remove.otus parameters are: constaxonomy, otucorr, corraxes, shared, list, label and accnos.\n";
49 helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
50 helpString += "The otucorr parameter is input the results of the otu.association command.\n";
51 helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
52 helpString += "The label parameter is used to analyze specific labels in your input. \n";
53 helpString += "The remove.otus commmand should be in the following format: \n";
54 helpString += "remove.otus(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
55 return helpString;
56 }
57 catch(exception& e) {
58 m->errorOut(e, "RemoveOtusCommand", "getHelpString");
59 exit(1);
60 }
61 }
62 //**********************************************************************************************************************
getOutputPattern(string type)63 string RemoveOtusCommand::getOutputPattern(string type) {
64 try {
65 string pattern = "";
66
67 if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; }
68 else if (type == "otucorr") { pattern = "[filename],pick,[extension]"; }
69 else if (type == "corraxes") { pattern = "[filename],pick,[extension]"; }
70 else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; }
71 else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; }
72 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); }
73
74 return pattern;
75 }
76 catch(exception& e) {
77 m->errorOut(e, "RemoveOtusCommand", "getOutputPattern");
78 exit(1);
79 }
80 }
81 //**********************************************************************************************************************
RemoveOtusCommand(string option)82 RemoveOtusCommand::RemoveOtusCommand(string option) : Command() {
83 try {
84 if(option == "help") { help(); abort = true; calledHelp = true; }
85 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86 else if(option == "category") { abort = true; calledHelp = true; }
87
88 else {
89 OptionParser parser(option, setParameters());
90 map<string,string> parameters = parser.getParameters();
91
92 ValidParameters validParameter;
93 accnosfile = validParameter.validFile(parameters, "accnos");
94 if (accnosfile == "not open") { abort = true; }
95 else if (accnosfile == "not found") {
96 accnosfile = current->getAccnosFile();
97 if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n"); }
98 else {
99 m->mothurOut("You have no valid accnos file and accnos is required.\n");
100 abort = true;
101 }
102 }else { current->setAccnosFile(accnosfile); }
103
104 constaxonomyfile = validParameter.validFile(parameters, "constaxonomy");
105 if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
106 else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
107
108 corraxesfile = validParameter.validFile(parameters, "corraxes");
109 if (corraxesfile == "not open") { corraxesfile = ""; abort = true; }
110 else if (corraxesfile == "not found") { corraxesfile = ""; }
111
112 otucorrfile = validParameter.validFile(parameters, "otucorr");
113 if (otucorrfile == "not open") { otucorrfile = ""; abort = true; }
114 else if (otucorrfile == "not found") { otucorrfile = ""; }
115
116 listfile = validParameter.validFile(parameters, "list");
117 if (listfile == "not open") { listfile = ""; abort = true; }
118 else if (listfile == "not found") { listfile = ""; }
119 else { current->setListFile(listfile); }
120
121 sharedfile = validParameter.validFile(parameters, "shared");
122 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
123 else if (sharedfile == "not found") { sharedfile = ""; }
124 else { current->setSharedFile(sharedfile); }
125
126
127 if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "") && (sharedfile == "") && (listfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes, otucorr, shared or list.\n"); abort = true; }
128
129 if ((sharedfile != "") || (listfile != "")) {
130 label = validParameter.valid(parameters, "label");
131 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; }
132 }
133 }
134
135 }
136 catch(exception& e) {
137 m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
138 exit(1);
139 }
140 }
141 //**********************************************************************************************************************
142
execute()143 int RemoveOtusCommand::execute(){
144 try {
145
146 if (abort) { if (calledHelp) { return 0; } return 2; }
147
148 //get labels you want to keep
149 otulabels = util.readAccnos(accnosfile);
150 //simplfy labels
151 set<string> newLabels;
152 for (set<string>::iterator it = otulabels.begin(); it != otulabels.end(); it++) { newLabels.insert(util.getSimpleLabel(*it)); }
153 otulabels = newLabels;
154
155 if (m->getDebug()) { m->mothurOut("[DEBUG]: numlabels = " + toString(otulabels.size()) + "\n"); }
156
157 if (m->getControl_pressed()) { return 0; }
158
159 //read through the correct file and output lines you want to keep
160 if (constaxonomyfile != "") { readClassifyOtu(); }
161 if (corraxesfile != "") { readCorrAxes(); }
162 if (otucorrfile != "") { readOtuAssociation(); }
163 if (listfile != "") { readList(); }
164 if (sharedfile != "") { readShared(); }
165
166 if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; }
167
168 //output files created by command
169 m->mothurOut("\nOutput File Names: \n");
170 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine();
171
172 string currentName = "";
173 itTypes = outputTypes.find("list");
174 if (itTypes != outputTypes.end()) {
175 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); }
176 }
177
178 itTypes = outputTypes.find("shared");
179 if (itTypes != outputTypes.end()) {
180 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); }
181 }
182
183 //set constaxonomy file as new current constaxonomyfile
184 itTypes = outputTypes.find("constaxonomy");
185 if (itTypes != outputTypes.end()) {
186 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); }
187 }
188
189 return 0;
190 }
191 catch(exception& e) {
192 m->errorOut(e, "GetOtusCommand", "execute");
193 exit(1);
194 }
195 }
196 //**********************************************************************************************************************
readClassifyOtu()197 int RemoveOtusCommand::readClassifyOtu(){
198 try {
199 string thisOutputDir = outputdir;
200 if (outputdir == "") { thisOutputDir += util.hasPath(constaxonomyfile); }
201 map<string, string> variables;
202 variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxonomyfile));
203 variables["[extension]"] = util.getExtension(constaxonomyfile);
204 string outputFileName = getOutputFileName("constaxonomy", variables);
205 ofstream out;
206 util.openOutputFile(outputFileName, out);
207
208 ifstream in;
209 util.openInputFile(constaxonomyfile, in);
210
211 bool wroteSomething = false;
212 int removedCount = 0;
213
214 //read headers
215 string headers = util.getline(in); util.gobble(in);
216 out << headers << endl;
217
218 while (!in.eof()) {
219
220 if (m->getControl_pressed()) { break; }
221
222 string otu = ""; string tax = "unknown";
223 int size = 0;
224
225 in >> otu >> size; util.gobble(in);
226 tax = util.getline(in); util.gobble(in);
227
228 if (m->getDebug()) { m->mothurOut("[DEBUG]: " + otu + toString(size) + tax + "\n"); }
229
230 if (otulabels.count(util.getSimpleLabel(otu)) == 0) {
231 wroteSomething = true;
232 out << otu << '\t' << size << '\t' << tax << endl;
233 }else { removedCount++; }
234 }
235 in.close();
236 out.close();
237
238 if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file.\n"); }
239 outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
240
241 m->mothurOut("Removed " + toString(removedCount) + " otus from your constaxonomy file.\n");
242
243 return 0;
244
245 }
246 catch(exception& e) {
247 m->errorOut(e, "RemoveOtusCommand", "readClassifyOtu");
248 exit(1);
249 }
250 }
251 //**********************************************************************************************************************
readOtuAssociation()252 int RemoveOtusCommand::readOtuAssociation(){
253 try {
254 string thisOutputDir = outputdir;
255 if (outputdir == "") { thisOutputDir += util.hasPath(otucorrfile); }
256 map<string, string> variables;
257 variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(otucorrfile));
258 variables["[extension]"] = util.getExtension(otucorrfile);
259 string outputFileName = getOutputFileName("otucorr", variables);
260 ofstream out;
261 util.openOutputFile(outputFileName, out);
262
263 ifstream in;
264 util.openInputFile(otucorrfile, in);
265
266 bool wroteSomething = false;
267 int removedCount = 0;
268
269 //read headers
270 string headers = util.getline(in); util.gobble(in);
271 out << headers << endl;
272
273 while (!in.eof()) {
274
275 if (m->getControl_pressed()) { break; }
276
277 string otu1 = "";
278 string otu2 = "";
279 in >> otu1 >> otu2;
280 string line = util.getline(in); util.gobble(in);
281
282 if ((otulabels.count(util.getSimpleLabel(otu1)) == 0) && (otulabels.count(util.getSimpleLabel(otu2)) == 0)){
283 wroteSomething = true;
284
285 out << otu1 << '\t' << otu2 << '\t' << line << endl;
286 }else { removedCount++; }
287 }
288 in.close();
289 out.close();
290
291 if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file.\n"); }
292 outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName);
293
294 m->mothurOut("Removed " + toString(removedCount) + " lines from your otu.corr file.\n");
295
296 return 0;
297
298 }
299 catch(exception& e) {
300 m->errorOut(e, "RemoveOtusCommand", "readOtuAssociation");
301 exit(1);
302 }
303 }
304 //**********************************************************************************************************************
readCorrAxes()305 int RemoveOtusCommand::readCorrAxes(){
306 try {
307 string thisOutputDir = outputdir;
308 if (outputdir == "") { thisOutputDir += util.hasPath(corraxesfile); }
309 map<string, string> variables;
310 variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(corraxesfile));
311 variables["[extension]"] = util.getExtension(corraxesfile);
312 string outputFileName = getOutputFileName("corraxes", variables);
313 ofstream out;
314 util.openOutputFile(outputFileName, out);
315
316
317 ifstream in;
318 util.openInputFile(corraxesfile, in);
319
320 bool wroteSomething = false;
321 int removedCount = 0;
322
323 //read headers
324 string headers = util.getline(in); util.gobble(in);
325 out << headers << endl;
326
327 while (!in.eof()) {
328
329 if (m->getControl_pressed()) { break; }
330
331 string otu = "";
332 in >> otu;
333 string line = util.getline(in); util.gobble(in);
334
335 if (otulabels.count(util.getSimpleLabel(otu)) == 0) {
336 wroteSomething = true;
337
338 out << otu << '\t' << line << endl;
339 }else { removedCount++; }
340 }
341 in.close();
342 out.close();
343
344 if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file.\n"); }
345 outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);
346
347 m->mothurOut("Removed " + toString(removedCount) + " lines from your corr.axes file.\n");
348
349 return 0;
350
351 }
352 catch(exception& e) {
353 m->errorOut(e, "RemoveOtusCommand", "readCorrAxes");
354 exit(1);
355 }
356 }
357 //**********************************************************************************************************************
readShared()358 int RemoveOtusCommand::readShared(){
359 try {
360
361 SharedRAbundVectors* lookup = getShared();
362
363 if (m->getControl_pressed()) { delete lookup; return 0; }
364
365 vector<string> newLabels;
366
367 bool wroteSomething = false;
368 int numRemoved = 0;
369 vector<int> binsToRemove;
370 for (int i = 0; i < lookup->getNumBins(); i++) {
371
372 if (m->getControl_pressed()) { delete lookup; return 0; }
373
374 //is this otu on the list
375 if (otulabels.count(util.getSimpleLabel(lookup->getOTUNames()[i])) == 0) {
376 wroteSomething = true;
377 }else { numRemoved++; binsToRemove.push_back(i); }
378 }
379
380 lookup->removeOTUs(binsToRemove, true);
381
382 string thisOutputDir = outputdir;
383 if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); }
384 map<string, string> variables;
385 variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile));
386 variables["[extension]"] = util.getExtension(sharedfile);
387 variables["[distance]"] = lookup->getLabel();
388 string outputFileName = getOutputFileName("shared", variables);
389 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
390 ofstream out; util.openOutputFile(outputFileName, out); bool printHeaders = true;
391 lookup->print(out, printHeaders);
392 out.close();
393
394 delete lookup;
395
396 if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file.\n"); }
397 m->mothurOut("Removed " + toString(numRemoved) + " OTUs from your shared file.\n");
398
399 return 0;
400 }
401 catch(exception& e) {
402 m->errorOut(e, "RemoveOtusCommand", "readShared");
403 exit(1);
404 }
405 }
406 //**********************************************************************************************************************
readList()407 int RemoveOtusCommand::readList(){
408 try {
409 getListVector();
410
411 if (m->getControl_pressed()) { delete list; return 0;}
412
413 ListVector newList;
414 newList.setLabel(list->getLabel());
415 int removedCount = 0;
416 bool wroteSomething = false;
417
418 vector<string> binLabels = list->getLabels();
419 vector<string> newLabels;
420 for (int i = 0; i < list->getNumBins(); i++) {
421
422 if (m->getControl_pressed()) { delete list; return 0;}
423
424 if (otulabels.count(util.getSimpleLabel(binLabels[i])) == 0) {
425 newList.push_back(list->get(i));
426 newLabels.push_back(binLabels[i]);
427 }else { removedCount++; }
428 }
429
430 string thisOutputDir = outputdir;
431 if (outputdir == "") { thisOutputDir += util.hasPath(listfile); }
432 map<string, string> variables;
433 variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile));
434 variables["[extension]"] = util.getExtension(listfile);
435 variables["[distance]"] = list->getLabel();
436 string outputFileName = getOutputFileName("list", variables);
437 ofstream out;
438 util.openOutputFile(outputFileName, out);
439
440 delete list;
441 //print new listvector
442 if (newList.getNumBins() != 0) {
443 wroteSomething = true;
444 newList.setLabels(newLabels);
445 newList.print(out, false);
446 }
447 out.close();
448
449 if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file.\n"); }
450 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
451
452 m->mothurOut("Removed " + toString(removedCount) + " OTUs from your list file.\n");
453
454 return 0;
455 }
456 catch(exception& e) {
457 m->errorOut(e, "RemoveOtusCommand", "readList");
458 exit(1);
459 }
460 }
461 //**********************************************************************************************************************
getListVector()462 int RemoveOtusCommand::getListVector(){
463 try {
464 InputData input(listfile, "list", nullVector);
465 list = input.getListVector();
466 string lastLabel = list->getLabel();
467
468 if (label == "") { label = lastLabel; return 0; }
469
470 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
471 set<string> labels; labels.insert(label);
472 set<string> processedLabels;
473 set<string> userLabels = labels;
474
475 //as long as you are not at the end of the file or done wih the lines you want
476 while((list != NULL) && (userLabels.size() != 0)) {
477 if (m->getControl_pressed()) { return 0; }
478
479 if(labels.count(list->getLabel()) == 1){
480 processedLabels.insert(list->getLabel());
481 userLabels.erase(list->getLabel());
482 break;
483 }
484
485 if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
486 string saveLabel = list->getLabel();
487
488 delete list;
489 list = input.getListVector(lastLabel);
490
491 processedLabels.insert(list->getLabel());
492 userLabels.erase(list->getLabel());
493
494 //restore real lastlabel to save below
495 list->setLabel(saveLabel);
496 break;
497 }
498
499 lastLabel = list->getLabel();
500
501 //get next line to process
502 //prevent memory leak
503 delete list;
504 list = input.getListVector();
505 }
506
507
508 if (m->getControl_pressed()) { return 0; }
509
510 //output error messages about any remaining user labels
511 bool needToRun = false;
512 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
513 m->mothurOut("Your file does not include the label " + *it);
514 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
515 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
516 }
517
518 //run last label if you need to
519 if (needToRun ) {
520 delete list;
521 list = input.getListVector(lastLabel);
522 }
523
524 return 0;
525 }
526 catch(exception& e) {
527 m->errorOut(e, "RemoveOtusCommand", "getListVector");
528 exit(1);
529 }
530 }
531 //**********************************************************************************************************************
getShared()532 SharedRAbundVectors* RemoveOtusCommand::getShared(){
533 try {
534 InputData input(sharedfile, "sharedfile",nullVector);
535 SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
536 string lastLabel = lookup->getLabel();
537
538 if (label == "") { label = lastLabel; return lookup; }
539
540 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
541 set<string> labels; labels.insert(label);
542 set<string> processedLabels;
543 set<string> userLabels = labels;
544
545 //as long as you are not at the end of the file or done wih the lines you want
546 while((lookup != NULL) && (userLabels.size() != 0)) {
547 if (m->getControl_pressed()) { delete lookup; return NULL; }
548
549 if(labels.count(lookup->getLabel()) == 1){
550 processedLabels.insert(lookup->getLabel());
551 userLabels.erase(lookup->getLabel());
552 break;
553 }
554
555 if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
556 string saveLabel = lookup->getLabel();
557
558 delete lookup;
559 lookup = input.getSharedRAbundVectors(lastLabel);
560
561 processedLabels.insert(lookup->getLabel());
562 userLabels.erase(lookup->getLabel());
563
564 //restore real lastlabel to save below
565 lookup->setLabels(saveLabel);
566 break;
567 }
568
569 lastLabel = lookup->getLabel();
570
571 //get next line to process
572 //prevent memory leak
573 delete lookup;
574 lookup = input.getSharedRAbundVectors();
575 }
576
577
578 if (m->getControl_pressed()) { return 0; }
579
580 //output error messages about any remaining user labels
581 set<string>::iterator it;
582 bool needToRun = false;
583 for (it = userLabels.begin(); it != userLabels.end(); it++) {
584 m->mothurOut("Your file does not include the label " + *it);
585 if (processedLabels.count(lastLabel) != 1) {
586 m->mothurOut(". I will use " + lastLabel + ".\n");
587 needToRun = true;
588 }else {
589 m->mothurOut(". Please refer to " + lastLabel + ".\n");
590 }
591 }
592
593 //run last label if you need to
594 if (needToRun ) {
595 delete lookup;
596 lookup = input.getSharedRAbundVectors(lastLabel);
597 }
598
599 return lookup;
600 }
601 catch(exception& e) {
602 m->errorOut(e, "RemoveOtusCommand", "getShared");
603 exit(1);
604 }
605 }
606 //**********************************************************************************************************************
607
608
609
610