1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9 
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "fastqread.h"
15 #include "inputdata.h"
16 #include "contigsreport.hpp"
17 #include "alignreport.hpp"
18 
19 //**********************************************************************************************************************
setParameters()20 vector<string> GetSeqsCommand::setParameters(){
21 	try {
22 		CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
23         CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
24         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
25         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
26 		CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
27 		CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
28 		CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
29 		CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
30         CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","contigsreport",false,false); parameters.push_back(pcontigsreport);
31 		CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile);
32 		CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
33 		CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
34 		CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
35         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
36         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
37 		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
38 		CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos2);
39 
40         abort = false; calledHelp = false;
41 
42         vector<string> tempOutNames;
43         outputTypes["fasta"] = tempOutNames;
44         outputTypes["fastq"] = tempOutNames;
45         outputTypes["taxonomy"] = tempOutNames;
46         outputTypes["name"] = tempOutNames;
47         outputTypes["group"] = tempOutNames;
48         outputTypes["alignreport"] = tempOutNames;
49         outputTypes["contigsreport"] = tempOutNames;
50         outputTypes["list"] = tempOutNames;
51         outputTypes["qfile"] = tempOutNames;
52         outputTypes["count"] = tempOutNames;
53         outputTypes["accnosreport"] = tempOutNames;
54 
55 		vector<string> myArray;
56 		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
57 		return myArray;
58 	}
59 	catch(exception& e) {
60 		m->errorOut(e, "GetSeqsCommand", "setParameters");
61 		exit(1);
62 	}
63 }
64 //**********************************************************************************************************************
getHelpString()65 string GetSeqsCommand::getHelpString(){
66 	try {
67 		string helpString = "";
68 		helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq, contigsreport or alignreport file.\n";
69 		helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
70 		helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, contigsreport, fastq and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
71 		helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n";
72         helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
73 		helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
74 		helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
75 		;
76 		return helpString;
77 	}
78 	catch(exception& e) {
79 		m->errorOut(e, "GetSeqsCommand", "getHelpString");
80 		exit(1);
81 	}
82 }
83 //**********************************************************************************************************************
getOutputPattern(string type)84 string GetSeqsCommand::getOutputPattern(string type) {
85     try {
86         string pattern = "";
87 
88         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
89         else if (type == "fastq")       {   pattern = "[filename],pick,[extension]";    }
90         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
91         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
92         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
93         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
94         else if (type == "list")        {   pattern = "[filename],[distance],pick,[extension]";    }
95         else if (type == "qfile")       {   pattern = "[filename],pick,[extension]";    }
96         else if (type == "accnosreport")        {   pattern = "[filename],pick.accnos.report";      }
97         else if (type == "alignreport")         {   pattern = "[filename],pick.align.report";       }
98         else if (type == "contigsreport")       {   pattern = "[filename],pick.contigs.report";       }
99         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true);  }
100 
101         return pattern;
102     }
103     catch(exception& e) {
104         m->errorOut(e, "GetSeqsCommand", "getOutputPattern");
105         exit(1);
106     }
107 }
108 //**********************************************************************************************************************
109 
GetSeqsCommand(set<string> n,string ffile,string lfile,string dupsFile,string dupsFileType,string output)110 GetSeqsCommand::GetSeqsCommand(set<string> n, string ffile, string lfile, string dupsFile, string dupsFileType, string output) : Command() {
111     try {
112         names = n;
113         dups = true;
114         outputdir = output;
115         fastafile = ffile;
116         listfile = lfile;
117 
118         abort = false; calledHelp = false;
119         vector<string> tempOutNames;
120         outputTypes["name"] = tempOutNames;
121         outputTypes["count"] = tempOutNames;
122         outputTypes["fasta"] = tempOutNames;
123 
124         if (dupsFile != "") {
125             if (dupsFileType == "count") {
126                 countfile = dupsFile;
127                 readCount();
128             }else { //names
129                 namefile = dupsFile;
130                 readName();
131             }
132         }
133 
134         if (fastafile != "")            {        readFasta();        }
135         if (listfile != "")             {        readList();         }
136 
137     }
138     catch(exception& e) {
139         m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand - mothurRun");
140         exit(1);
141     }
142 }
143 
144 //**********************************************************************************************************************
GetSeqsCommand(string option)145 GetSeqsCommand::GetSeqsCommand(string option) : Command() {
146 	try {
147 		//allow user to run help
148 		if(option == "help") { help(); abort = true; calledHelp = true; }
149 		else if(option == "citation") { citation(); abort = true; calledHelp = true;}
150         else if(option == "category") {  abort = true; calledHelp = true;  }
151 
152 		else {
153 			OptionParser parser(option, setParameters());
154 			map<string,string> parameters = parser.getParameters();
155 
156 			ValidParameters validParameter;
157 
158 			//check for required parameters
159 			accnosfile = validParameter.validFile(parameters, "accnos");
160 			if (accnosfile == "not open") { abort = true; }
161 			else if (accnosfile == "not found") {
162 				accnosfile = current->getAccnosFile();
163 				if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n");  }
164 				else {
165 					m->mothurOut("You have no valid accnos file and accnos is required.\n");  abort = true;
166 				}
167 			}else { current->setAccnosFile(accnosfile); }
168 
169 			if (accnosfile2 == "not found") { accnosfile2 = ""; }
170 
171 			fastafile = validParameter.validFile(parameters, "fasta");
172 			if (fastafile == "not open") { fastafile = ""; abort = true; }
173 			else if (fastafile == "not found") {  fastafile = "";  }
174 			else { current->setFastaFile(fastafile); }
175 
176 			namefile = validParameter.validFile(parameters, "name");
177 			if (namefile == "not open") { namefile = ""; abort = true; }
178 			else if (namefile == "not found") {  namefile = "";  }
179 			else { current->setNameFile(namefile); }
180 
181 			groupfile = validParameter.validFile(parameters, "group");
182 			if (groupfile == "not open") { abort = true; }
183 			else if (groupfile == "not found") {  groupfile = "";  }
184 			else { current->setGroupFile(groupfile); }
185 
186 			alignfile = validParameter.validFile(parameters, "alignreport");
187 			if (alignfile == "not open") { abort = true; }
188 			else if (alignfile == "not found") {  alignfile = "";  }
189 
190             contigsreportfile = validParameter.validFile(parameters, "contigsreport");
191             if (contigsreportfile == "not open") { abort = true; }
192             else if (contigsreportfile == "not found") {  contigsreportfile = "";  }
193             else { current->setContigsReportFile(contigsreportfile); }
194 
195 			listfile = validParameter.validFile(parameters, "list");
196 			if (listfile == "not open") { abort = true; }
197 			else if (listfile == "not found") {  listfile = "";  }
198 			else { current->setListFile(listfile); }
199 
200 			taxfile = validParameter.validFile(parameters, "taxonomy");
201 			if (taxfile == "not open") { taxfile = ""; abort = true; }
202 			else if (taxfile == "not found") {  taxfile = "";  }
203 			else { current->setTaxonomyFile(taxfile); }
204 
205 			qualfile = validParameter.validFile(parameters, "qfile");
206 			if (qualfile == "not open") { abort = true; }
207 			else if (qualfile == "not found") {  qualfile = "";  }
208 			else { current->setQualFile(qualfile); }
209 
210             fastqfile = validParameter.validFile(parameters, "fastq");
211 			if (fastqfile == "not open") { abort = true; }
212 			else if (fastqfile == "not found") {  fastqfile = "";  }
213 
214 			accnosfile2 = validParameter.validFile(parameters, "accnos2");
215 			if (accnosfile2 == "not open") { abort = true; }
216 			else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
217 
218             countfile = validParameter.validFile(parameters, "count");
219             if (countfile == "not open") { countfile = ""; abort = true; }
220             else if (countfile == "not found") { countfile = "";  }
221             else { current->setCountFile(countfile); }
222 
223             if ((namefile != "") && (countfile != "")) {
224                 m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n");  abort = true;
225             }
226 
227             if ((groupfile != "") && (countfile != "")) {
228                 m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n");  abort=true;
229             }
230 
231 
232 			string usedDups = "true";
233 			string temp = validParameter.valid(parameters, "dups");	if (temp == "not found") { temp = "true"; usedDups = ""; }
234 			dups = util.isTrue(temp);
235 
236             format = validParameter.valid(parameters, "format");		if (format == "not found"){	format = "illumina1.8+";	}
237 
238             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  {
239                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting.\n" );
240                 abort=true;
241             }
242 
243 			if ((fastqfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "") && (contigsreportfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, contigsreport, taxonomy, quality, fastq or listfile.\n");  abort = true; }
244 
245             //read accnos file
246             if (!abort) { names = util.readAccnos(accnosfile);  }
247 		}
248 	}
249 	catch(exception& e) {
250 		m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
251 		exit(1);
252 	}
253 }
254 //**********************************************************************************************************************
255 
execute()256 int GetSeqsCommand::execute(){
257 	try {
258 
259 		if (abort) { if (calledHelp) { return 0; }  return 2;	}
260 
261 		//read through the correct file and output lines you want to keep
262 		if (namefile != "")			    {		readName();			}
263 		if (fastafile != "")		    {		readFasta();		}
264         if (fastqfile != "")		    {		readFastq();		}
265 		if (groupfile != "")		    {		readGroup();		}
266         if (countfile != "")		    {		readCount();		}
267 		if (alignfile != "")		    {		readAlign();		}
268         if (contigsreportfile != "")    {       readContigs();      }
269 		if (listfile != "")			    {		readList();			}
270 		if (taxfile != "")			    {		readTax();			}
271 		if (qualfile != "")			    {		readQual();			}
272 		if (accnosfile2 != "")		    {		compareAccnos();	}
273 
274         if (m->getDebug()) { runSanityCheck(); }
275 
276 		if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);  } return 0; }
277 
278 
279 		if (outputNames.size() != 0) {
280 			m->mothurOut("\nOutput File Names:\n");
281 			for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]+"\n"); 	}
282 			m->mothurOutEndLine();
283 
284 			//set fasta file as new current fastafile
285 			string currentName = "";
286 			itTypes = outputTypes.find("fasta");
287 			if (itTypes != outputTypes.end()) {
288 				if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); }
289 			}
290 
291 			itTypes = outputTypes.find("name");
292 			if (itTypes != outputTypes.end()) {
293 				if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); }
294 			}
295 
296 			itTypes = outputTypes.find("group");
297 			if (itTypes != outputTypes.end()) {
298 				if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); }
299 			}
300 
301 			itTypes = outputTypes.find("list");
302 			if (itTypes != outputTypes.end()) {
303 				if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); }
304 			}
305 
306 			itTypes = outputTypes.find("taxonomy");
307 			if (itTypes != outputTypes.end()) {
308 				if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); }
309 			}
310 
311 			itTypes = outputTypes.find("qfile");
312 			if (itTypes != outputTypes.end()) {
313 				if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); }
314 			}
315 
316             itTypes = outputTypes.find("count");
317 			if (itTypes != outputTypes.end()) {
318 				if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); }
319 			}
320 		}
321 
322 		return 0;
323 	}
324 
325 	catch(exception& e) {
326 		m->errorOut(e, "GetSeqsCommand", "execute");
327 		exit(1);
328 	}
329 }
330 //**********************************************************************************************************************
readFastq()331 void GetSeqsCommand::readFastq(){
332 	try {
333 		bool wroteSomething = false;
334 		int selectedCount = 0;
335 
336 		ifstream in; util.openInputFile(fastqfile, in);
337 
338 		string thisOutputDir = outputdir;
339 		if (outputdir == "") {  thisOutputDir += util.hasPath(fastqfile);  }
340 		map<string, string> variables;
341         variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastqfile));
342         variables["[extension]"] = util.getExtension(fastqfile);
343 		string outputFileName = getOutputFileName("fastq", variables);
344 		ofstream out;
345 		util.openOutputFile(outputFileName, out);
346 
347         set<string> uniqueNames;
348 		while(!in.eof()){
349 
350 			if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; }
351 
352 			//read sequence name
353             bool ignore;
354             FastqRead fread(in, ignore, format); util.gobble(in);
355 
356 			if (!ignore) {
357                 string name = fread.getName();
358 
359                 if (names.count(name) != 0) {
360                     if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
361                         wroteSomething = true;
362                         selectedCount++;
363                         fread.printFastq(out);
364                         uniqueNames.insert(name);
365                     }else {
366                         m->mothurOut("[WARNING]: " + name + " is in your fastq file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
367                     }
368                 }
369             }
370 
371 			util.gobble(in);
372 		}
373 		in.close();
374 		out.close();
375 
376 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");   }
377 		outputNames.push_back(outputFileName);  outputTypes["fastq"].push_back(outputFileName);
378 
379 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fastq file.\n");
380 
381 		return;
382 	}
383 	catch(exception& e) {
384 		m->errorOut(e, "GetSeqsCommand", "readFastq");
385 		exit(1);
386 	}
387 }
388 //**********************************************************************************************************************
readFasta()389 void GetSeqsCommand::readFasta(){
390 	try {
391 		string thisOutputDir = outputdir;
392 		if (outputdir == "") {  thisOutputDir += util.hasPath(fastafile);  }
393 		map<string, string> variables;
394         variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile));
395         variables["[extension]"] = util.getExtension(fastafile);
396 		string outputFileName = getOutputFileName("fasta", variables);
397 		ofstream out; util.openOutputFile(outputFileName, out);
398 
399 		ifstream in; util.openInputFile(fastafile, in);
400 		string name;
401 
402 		bool wroteSomething = false;
403 		int selectedCount = 0;
404 
405         if (m->getDebug()) { set<string> temp; sanity["fasta"] = temp; }
406 
407         set<string> uniqueNames; int line = 0; int redundNum = 0;
408 		while(!in.eof()){
409 
410 			if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName);  return; }
411 
412 			Sequence currSeq(in);
413 			name = currSeq.getName();
414 
415             if (!dups) {//adjust name if needed
416                 map<string, string>::iterator it = uniqueMap.find(name);
417                 if (it != uniqueMap.end()) { currSeq.setName(it->second); }
418             }
419 
420             name = currSeq.getName();
421 
422 			if (name != "") {
423                 line++;
424 				//if this name is in the accnos file
425 				if (names.count(name) != 0) {
426                     if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
427                         wroteSomething = true;
428 
429                         currSeq.printSequence(out);
430                         selectedCount++;
431                         uniqueNames.insert(name);
432 
433                         if (m->getDebug()) { sanity["fasta"].insert(name); }
434                     }else {
435                         m->mothurOut("[WARNING]: " + name + " is in your fasta file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
436                         redundNum++;
437                     }
438                 }
439 			}
440 			util.gobble(in);
441 		}
442 		in.close();
443 		out.close();
444 
445 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");  }
446 		outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
447 
448 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file.\n");
449 
450 		return;
451 	}
452 	catch(exception& e) {
453 		m->errorOut(e, "GetSeqsCommand", "readFasta");
454 		exit(1);
455 	}
456 }
457 //**********************************************************************************************************************
readQual()458 void GetSeqsCommand::readQual(){
459 	try {
460 		string thisOutputDir = outputdir;
461 		if (outputdir == "") {  thisOutputDir += util.hasPath(qualfile);  }
462 		map<string, string> variables;
463         variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(qualfile));
464         variables["[extension]"] = util.getExtension(qualfile);
465 		string outputFileName = getOutputFileName("qfile", variables);
466 		ofstream out;
467 		util.openOutputFile(outputFileName, out);
468 
469 		ifstream in;
470 		util.openInputFile(qualfile, in);
471 		string name;
472 
473 		bool wroteSomething = false;
474 		int selectedCount = 0;
475 
476         if (m->getDebug()) { set<string> temp; sanity["qual"] = temp; }
477 
478         set<string> uniqueNames;
479 		while(!in.eof()){
480 
481 			QualityScores qual(in); util.gobble(in);
482 
483             if (!dups) {//adjust name if needed
484                 map<string, string>::iterator it = uniqueMap.find(qual.getName());
485                 if (it != uniqueMap.end()) { qual.setName(it->second); }
486             }
487 
488             string name = qual.getName();
489 
490             if (names.count(name) != 0) {
491                 if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
492                     uniqueNames.insert(name);
493                     wroteSomething = true;
494 
495                     qual.printQScores(out);
496                     selectedCount++;
497                     if (m->getDebug()) { sanity["qual"].insert(name); }
498                 }else {
499                     m->mothurOut("[WARNING]: " + name + " is in your qfile more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
500                 }
501             }
502 
503 			util.gobble(in);
504 		}
505 		in.close();
506 		out.close();
507 
508 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");   }
509 		outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName);
510 
511 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file.\n");
512 
513 		return;
514 	}
515 	catch(exception& e) {
516 		m->errorOut(e, "GetSeqsCommand", "readQual");
517 		exit(1);
518 	}
519 }
520 //**********************************************************************************************************************
readCount()521 void GetSeqsCommand::readCount(){
522 	try {
523 		string thisOutputDir = outputdir;
524 		if (outputdir == "") {  thisOutputDir += util.hasPath(countfile);  }
525 		map<string, string> variables;
526 		variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile));
527         variables["[extension]"] = util.getExtension(countfile);
528 		string outputFileName = getOutputFileName("count", variables);
529 
530         CountTable ct; ct.readTable(countfile, true, false, names);
531 
532         bool wroteSomething = false;
533         int selectedCount = ct.getNumSeqs();
534         if (selectedCount != 0) { wroteSomething = true; }
535 
536         ct.printTable(outputFileName);
537 
538 		if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");   }
539 		outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
540 
541 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file.\n");
542 
543 		return;
544 	}
545 	catch(exception& e) {
546 		m->errorOut(e, "GetSeqsCommand", "readCount");
547 		exit(1);
548 	}
549 }
550 //**********************************************************************************************************************
readList()551 void GetSeqsCommand::readList(){
552 	try {
553 		string thisOutputDir = outputdir;
554 		if (outputdir == "") {  thisOutputDir += util.hasPath(listfile);  }
555         map<string, string> variables;
556 		variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile));
557         variables["[extension]"] = util.getExtension(listfile);
558         InputData input(listfile, "list", nullVector);
559         ListVector* list = input.getListVector();
560 
561 		bool wroteSomething = false;
562 		int selectedCount = 0;
563 
564         if (m->getDebug()) { set<string> temp; sanity["list"] = temp; }
565 
566 		while(list != NULL) {
567 
568             set<string> uniqueNames;
569 			selectedCount = 0;
570 
571             //make a new list vector
572 			ListVector newList;
573 			newList.setLabel(list->getLabel());
574 
575             variables["[distance]"] = list->getLabel();
576             string outputFileName = getOutputFileName("list", variables);
577 
578 			ofstream out;
579 			util.openOutputFile(outputFileName, out);
580 			outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
581 
582             vector<string> binLabels = list->getLabels();
583             vector<string> newBinLabels;
584 
585             if (m->getControl_pressed()) { out.close();  return; }
586 
587 			//for each bin
588 			for (int i = 0; i < list->getNumBins(); i++) {
589 
590 				//parse out names that are in accnos file
591 				string binnames = list->get(i);
592                 vector<string> bnames;
593                 util.splitAtComma(binnames, bnames);
594 
595 				string newNames = "";
596                 for (int j = 0; j < bnames.size(); j++) {
597                     string name = bnames[j];
598 
599                     //if that name is in the .accnos file, add it
600                     if (names.count(name) != 0) {
601                         if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
602                             uniqueNames.insert(name);
603                             newNames += name + ",";
604                             selectedCount++;
605                             if (m->getDebug()) { sanity["list"].insert(name); }
606                         }else {
607                             m->mothurOut("[WARNING]: " + name + " is in your list file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
608                         }
609                     }
610                 }
611 
612 				//if there are names in this bin add to new list
613 				if (newNames != "") {
614 					newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
615 					newList.push_back(newNames);
616                     newBinLabels.push_back(binLabels[i]);
617 				}
618 			}
619 
620 			//print new listvector
621 			if (newList.getNumBins() != 0) {
622 				wroteSomething = true;
623 				newList.setLabels(newBinLabels);
624 				newList.print(out, false);
625 			}
626 
627 
628             out.close();
629 
630             delete list;
631             list = input.getListVector();
632 		}
633 
634 
635 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");  }
636 
637 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file.\n");
638 
639 		return;
640 	}
641 	catch(exception& e) {
642 		m->errorOut(e, "GetSeqsCommand", "readList");
643 		exit(1);
644 	}
645 }
646 //**********************************************************************************************************************
readName()647 void GetSeqsCommand::readName(){
648 	try {
649 		string thisOutputDir = outputdir;
650 		if (outputdir == "") {  thisOutputDir += util.hasPath(namefile);  }
651         map<string, string> variables;
652 		variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile));
653         variables["[extension]"] = util.getExtension(namefile);
654 		string outputFileName = getOutputFileName("name", variables);
655 		ofstream out; util.openOutputFile(outputFileName, out);
656 
657 		ifstream in; util.openInputFile(namefile, in);
658 		string name, firstCol, secondCol;
659 
660 		bool wroteSomething = false;
661 		int selectedCount = 0;
662 
663         if (m->getDebug()) { set<string> temp; sanity["name"] = temp; }
664         if (m->getDebug()) { set<string> temp; sanity["dupname"] = temp; }
665 
666         set<string> uniqueNames;
667 		while(!in.eof()){
668 
669 			if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName);  return; }
670 
671 			in >> firstCol;			util.gobble(in);
672 			in >> secondCol;        util.gobble(in);
673 
674 			string hold = "";
675 			if (dups) { hold = secondCol; }
676 
677 			vector<string> parsedNames;
678 			util.splitAtComma(secondCol, parsedNames);
679 
680 			vector<string> validSecond; vector<string> parsedNames2;
681             bool parsedError = false;
682 			for (int i = 0; i < parsedNames.size(); i++) {
683                 if (names.count(parsedNames[i]) != 0) {
684                     if (uniqueNames.count(parsedNames[i]) == 0) { //this name hasn't been seen yet
685                         uniqueNames.insert(parsedNames[i]);
686                         validSecond.push_back(parsedNames[i]);
687                         parsedNames2.push_back(parsedNames[i]);
688                         if (m->getDebug()) { sanity["dupname"].insert(parsedNames[i]); }
689                     }else {
690                         m->mothurOut("[WARNING]: " + parsedNames[i] + " is in your name file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
691                         parsedError = true;
692                     }
693 				}
694 			}
695             if (parsedError) {
696                 parsedNames = parsedNames2;
697                 hold = "";
698                 if (parsedNames.size() != 0) {
699                     for (int i = 0; i < parsedNames.size()-1; i++) {  hold += parsedNames[i] + ',';  }
700                     hold += parsedNames[parsedNames.size()-1] + '\n';
701                 }
702             }
703 
704 			if (dups && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
705 				for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]); if (m->getDebug()) { sanity["dupname"].insert(parsedNames[i]); } }
706 				out << firstCol << '\t' << hold << endl;
707 				wroteSomething = true;
708 				selectedCount += parsedNames.size();
709                 if (m->getDebug()) { sanity["name"].insert(firstCol); }
710 			}else {
711 
712                 if (validSecond.size() != 0) {
713                     selectedCount += validSecond.size();
714 
715                     //if the name in the first column is in the set then print it and any other names in second column also in set
716                     if (names.count(firstCol) != 0) {
717 
718                         wroteSomething = true;
719 
720                         out << firstCol << '\t';
721 
722                         //you know you have at least one valid second since first column is valid
723                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
724                         out << validSecond[validSecond.size()-1] << endl;
725 
726                         if (m->getDebug()) { sanity["name"].insert(firstCol); }
727 
728 
729                         //make first name in set you come to first column and then add the remaining names to second column
730                     }else {
731 
732                         //you want part of this row
733                         if (validSecond.size() != 0) {
734 
735                             wroteSomething = true;
736 
737                             out << validSecond[0] << '\t';
738                             //we are changing the unique name in the fasta file
739                             uniqueMap[firstCol] = validSecond[0];
740 
741                             //you know you have at least one valid second since first column is valid
742                             for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
743                             out << validSecond[validSecond.size()-1] << endl;
744 
745                             if (m->getDebug()) { sanity["name"].insert(validSecond[0]); }
746                         }
747                     }
748                 }
749 			}
750 		}
751 		in.close(); out.close();
752 
753 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");  }
754 		outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
755 
756 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file.\n");
757 
758 		return;
759 	}
760 	catch(exception& e) {
761 		m->errorOut(e, "GetSeqsCommand", "readName");
762 		exit(1);
763 	}
764 }
765 //**********************************************************************************************************************
readGroup()766 void GetSeqsCommand::readGroup(){
767 	try {
768 		string thisOutputDir = outputdir;
769 		if (outputdir == "") {  thisOutputDir += util.hasPath(groupfile);  }
770 		map<string, string> variables;
771 		variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile));
772         variables["[extension]"] = util.getExtension(groupfile);
773 		string outputFileName = getOutputFileName("group", variables);
774 		ofstream out;
775 		util.openOutputFile(outputFileName, out);
776 
777 
778 		ifstream in;
779 		util.openInputFile(groupfile, in);
780 		string name, group;
781 
782 		bool wroteSomething = false;
783 		int selectedCount = 0;
784 
785         if (m->getDebug()) { set<string> temp; sanity["group"] = temp; }
786 
787         set<string> uniqueNames;
788 		while(!in.eof()){
789 
790 			if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName);  return; }
791 
792 
793 			in >> name;		util.gobble(in);		//read from first column
794 			in >> group;			//read from second column
795 
796 
797             if (names.count(name) != 0) {
798                 if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
799                     uniqueNames.insert(name);
800                     wroteSomething = true;
801 
802                     out << name  << '\t' << group << endl;
803                     selectedCount++;
804 
805                     if (m->getDebug()) {  sanity["group"].insert(name); }
806                 }else {
807                     m->mothurOut("[WARNING]: " + name + " is in your group file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
808                 }
809             }
810 
811 			util.gobble(in);
812 		}
813 		in.close();
814 		out.close();
815 
816 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");  }
817 		outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
818 
819 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file.\n");
820 
821 		return;
822 	}
823 	catch(exception& e) {
824 		m->errorOut(e, "GetSeqsCommand", "readGroup");
825 		exit(1);
826 	}
827 }
828 //**********************************************************************************************************************
readTax()829 void GetSeqsCommand::readTax(){
830 	try {
831 		string thisOutputDir = outputdir;
832 		if (outputdir == "") {  thisOutputDir += util.hasPath(taxfile);  }
833 		map<string, string> variables;
834 		variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile));
835         variables["[extension]"] = util.getExtension(taxfile);
836 		string outputFileName = getOutputFileName("taxonomy", variables);
837 		ofstream out;
838 		util.openOutputFile(outputFileName, out);
839 
840 		ifstream in;
841 		util.openInputFile(taxfile, in);
842 		string name, tax;
843 
844 		bool wroteSomething = false;
845 		int selectedCount = 0;
846 
847         if (m->getDebug()) { set<string> temp; sanity["tax"] = temp; }
848 
849         set<string> uniqueNames;
850 		while(!in.eof()){
851 
852 			if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName);  return; }
853 
854             in >> name; util.gobble(in);
855             tax = util.getline(in); util.gobble(in);
856 
857             if (!dups) {//adjust name if needed
858                 map<string, string>::iterator it = uniqueMap.find(name);
859                 if (it != uniqueMap.end()) { name = it->second; }
860             }
861 
862             if (names.count(name) != 0) {
863                 if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
864                     uniqueNames.insert(name);
865 
866                     wroteSomething = true;
867 
868                     out << name << '\t' << tax << endl;
869                     selectedCount++;
870 
871                     if (m->getDebug()) { sanity["tax"].insert(name); }
872                 }else {
873                     m->mothurOut("[WARNING]: " + name + " is in your taxonomy file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
874                 }
875 			}
876 		}
877 		in.close();
878 		out.close();
879 
880 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");  }
881 		outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
882 
883 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file.\n");
884 
885 		return;
886 
887 	}
888 	catch(exception& e) {
889 		m->errorOut(e, "GetSeqsCommand", "readTax");
890 		exit(1);
891 	}
892 }
893 //**********************************************************************************************************************
894 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
readAlign()895 void GetSeqsCommand::readAlign(){
896 	try {
897 		string thisOutputDir = outputdir;
898 		if (outputdir == "") {  thisOutputDir += util.hasPath(alignfile);  }
899         map<string, string> variables;
900 		variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(alignfile));
901 		string outputFileName = getOutputFileName("alignreport", variables);
902 
903         ofstream out; util.openOutputFile(outputFileName, out);
904         ifstream in; util.openInputFile(alignfile, in);
905 
906         AlignReport report;
907         report.readHeaders(in); util.gobble(in);
908         report.printHeaders(out);
909 
910         bool wroteSomething = false; int selectedCount = 0;
911 
912         set<string> uniqueNames;
913 		while(!in.eof()){
914 
915 			if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName);  return; }
916 
917 			report.read(in); util.gobble(in);
918             string name = report.getQueryName();
919 
920             if (!dups) {//adjust name if needed
921                 map<string, string>::iterator it = uniqueMap.find(name);
922                 if (it != uniqueMap.end()) { name = it->second; }
923             }
924 
925 			//if this name is in the accnos file
926             if (names.count(name) != 0) {
927                 if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
928                     uniqueNames.insert(name);
929                     wroteSomething = true;
930                     selectedCount++;
931 
932                     report.print(out);
933                 }else {
934                     m->mothurOut("[WARNING]: " + name + " is in your alignreport file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
935                 }
936 			}
937 		}
938 		in.close(); out.close();
939 
940 		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");  }
941 		outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
942 
943 		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file.\n");
944 
945 		return;
946 	}
947 	catch(exception& e) {
948 		m->errorOut(e, "GetSeqsCommand", "readAlign");
949 		exit(1);
950 	}
951 }
952 //**********************************************************************************************************************
953 //contigsreport file has a column header line then all other lines contain 8 columns.  we just want the first column since that contains the name
readContigs()954 void GetSeqsCommand::readContigs(){
955     try {
956         string thisOutputDir = outputdir;
957         if (outputdir == "") {  thisOutputDir += util.hasPath(contigsreportfile);  }
958         map<string, string> variables;
959         variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(contigsreportfile));
960         string outputFileName = getOutputFileName("contigsreport", variables);
961         ofstream out; util.openOutputFile(outputFileName, out);
962 
963         bool wroteSomething = false;
964         int selectedCount = 0;
965 
966         set<string> uniqueNames;
967         ifstream in; util.openInputFile(contigsreportfile, in);
968 
969         ContigsReport report;
970         report.readHeaders(in); util.gobble(in);
971         report.printHeaders(out);
972 
973         while(!in.eof()){
974 
975             if (m->getControl_pressed()) { break; }
976 
977             report.read(in); util.gobble(in);
978             string name = report.getName();
979 
980             if (!dups) {//adjust name if needed
981                 map<string, string>::iterator it = uniqueMap.find(name);
982                 if (it != uniqueMap.end()) { name = it->second; }
983             }
984 
985             if (names.count(name) != 0) {
986                 if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet
987                     uniqueNames.insert(name);
988                     wroteSomething = true;  selectedCount++;
989 
990                     report.print(out);
991                 }else {
992                     m->mothurOut("[WARNING]: " + name + " is in your contigsreport file more than once.  Mothur requires sequence names to be unique. I will only add it once.\n");
993                 }
994             }
995         }
996         in.close();
997         out.close();
998 
999         if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file.\n");  ofstream out1; util.openOutputFile(outputFileName, out1); out1.close(); } //reopening file clears header line
1000         outputNames.push_back(outputFileName);  outputTypes["contigsreport"].push_back(outputFileName);
1001 
1002         m->mothurOut("Selected " + toString(selectedCount) + " sequences from your contigsreport file.\n");
1003 
1004         return;
1005     }
1006     catch(exception& e) {
1007         m->errorOut(e, "ListSeqsCommand", "readContigs");
1008         exit(1);
1009     }
1010 }
1011 //**********************************************************************************************************************
1012 //just looking at common mistakes.
runSanityCheck()1013 int GetSeqsCommand::runSanityCheck(){
1014 	try {
1015         string thisOutputDir = outputdir;
1016         if (outputdir == "") {  thisOutputDir += util.hasPath(fastafile);  }
1017         string filename = outputdir + "get.seqs.debug.report";
1018 
1019         ofstream out;
1020 		util.openOutputFile(filename, out);
1021 
1022 
1023         //compare fasta, name, qual and taxonomy if given to make sure they contain the same seqs
1024         if (fastafile != "") {
1025             if (namefile != "") { //compare with fasta
1026                 if (sanity["fasta"] != sanity["name"]) { //create mismatch file
1027                     createMisMatchFile(out, fastafile, namefile, sanity["fasta"], sanity["name"]);
1028                 }
1029             }
1030             if (qualfile != "") {
1031                 if (sanity["fasta"] != sanity["qual"]) { //create mismatch file
1032                     createMisMatchFile(out, fastafile, qualfile, sanity["fasta"], sanity["qual"]);
1033                 }
1034             }
1035             if (taxfile != "") {
1036                 if (sanity["fasta"] != sanity["tax"]) { //create mismatch file
1037                     createMisMatchFile(out, fastafile, taxfile, sanity["fasta"], sanity["tax"]);
1038                 }
1039             }
1040         }
1041 
1042         //compare dupnames, groups and list if given to make sure they match
1043         if (namefile != "") {
1044             if (groupfile != "") {
1045                 if (sanity["dupname"] != sanity["group"]) { //create mismatch file
1046                     createMisMatchFile(out, namefile, groupfile, sanity["dupname"], sanity["group"]);
1047                 }
1048             }
1049             if (listfile != "") {
1050                 if (sanity["dupname"] != sanity["list"]) { //create mismatch file
1051                     createMisMatchFile(out, namefile, listfile, sanity["dupname"], sanity["list"]);
1052                 }
1053             }
1054         }else{
1055 
1056             if ((groupfile != "") && (fastafile != "")) {
1057                 if (sanity["fasta"] != sanity["group"]) { //create mismatch file
1058                     createMisMatchFile(out, fastafile, groupfile, sanity["fasta"], sanity["group"]);
1059                 }
1060             }
1061         }
1062 
1063         out.close();
1064 
1065         if (util.isBlank(filename)) { util.mothurRemove(filename); }
1066         else { m->mothurOut("\n[DEBUG]: " + filename + " contains the file mismatches.\n");outputNames.push_back(filename); outputTypes["debug"].push_back(filename); }
1067 
1068         return 0;
1069     }
1070 	catch(exception& e) {
1071 		m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1072 		exit(1);
1073 	}
1074 }
1075 //**********************************************************************************************************************
1076 //just looking at common mistakes.
createMisMatchFile(ofstream & out,string filename1,string filename2,set<string> set1,set<string> set2)1077 int GetSeqsCommand::createMisMatchFile(ofstream& out, string filename1, string filename2, set<string> set1, set<string> set2){
1078 	try {
1079         out << "****************************************" << endl << endl;
1080         out << "Names unique to " << filename1 << ":\n";
1081 
1082         //remove names in set1 that are also in set2
1083         for (set<string>::iterator it = set1.begin(); it != set1.end();) {
1084             string name = *it;
1085 
1086             if (set2.count(name) == 0)  { out << name << endl;  } //name unique to set1
1087             else                        { set2.erase(name);     } //you are in both so erase
1088             set1.erase(it++);
1089         }
1090 
1091         out << "\nNames unique to " << filename2 << ":\n";
1092         //output results
1093         for (set<string>::iterator it = set2.begin(); it != set2.end(); it++) {  out << *it << endl;  }
1094 
1095         out << "****************************************" << endl << endl;
1096 
1097         return 0;
1098     }
1099 	catch(exception& e) {
1100 		m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1101 		exit(1);
1102 	}
1103 }
1104 //**********************************************************************************************************************
1105 
compareAccnos()1106 int GetSeqsCommand::compareAccnos(){
1107 	try {
1108 
1109 		string thisOutputDir = outputdir;
1110 		if (outputdir == "") {  thisOutputDir += util.hasPath(accnosfile);  }
1111         map<string, string> variables;
1112 		variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(accnosfile));
1113 		string outputFileName = getOutputFileName("accnosreport", variables);
1114 
1115 		ofstream out;
1116 		util.openOutputFile(outputFileName, out);
1117 
1118 		ifstream in;
1119 		util.openInputFile(accnosfile2, in);
1120 		string name;
1121 
1122 		set<string> namesAccnos2;
1123 		set<string> namesDups;
1124 		set<string> namesAccnos = names;
1125 
1126 		map<string, int> nameCount;
1127 
1128 		if (namefile != "") {
1129 			ifstream inName;
1130 			util.openInputFile(namefile, inName);
1131 
1132 
1133 			while(!inName.eof()){
1134 
1135 				if (m->getControl_pressed()) { inName.close(); return 0; }
1136 
1137 				string thisname, repnames;
1138 
1139 				inName >> thisname;		util.gobble(inName);		//read from first column
1140 				inName >> repnames;			//read from second column
1141 
1142 				int num = util.getNumNames(repnames);
1143 				nameCount[thisname] = num;
1144 
1145 				util.gobble(inName);
1146 			}
1147 			inName.close();
1148 		}
1149 
1150 		while(!in.eof()){
1151 			in >> name;
1152 
1153 			if (namesAccnos.count(name) == 0){ //name unique to accnos2
1154 				int pos = name.find_last_of('_');
1155 				string tempName = name;
1156 				if (pos != string::npos) {  tempName = tempName.substr(pos+1);  }
1157 				if (namesAccnos.count(tempName) == 0){
1158 					namesAccnos2.insert(name);
1159 				}else { //you are in both so erase
1160 					namesAccnos.erase(name);
1161 					namesDups.insert(name);
1162 				}
1163 			}else { //you are in both so erase
1164 				namesAccnos.erase(name);
1165 				namesDups.insert(name);
1166 			}
1167 
1168 			util.gobble(in);
1169 		}
1170 		in.close();
1171 
1172 		out << "Names in both files : " + toString(namesDups.size()) << endl;
1173 		m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
1174 
1175 		for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
1176 			out << (*it);
1177 			if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1178 			out << endl;
1179 		}
1180 
1181 		out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
1182 		m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
1183 
1184 		for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
1185 			out << (*it);
1186 			if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1187 			out << endl;
1188 		}
1189 
1190 		out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
1191 		m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
1192 
1193 		for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
1194 			out << (*it);
1195 			if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1196 			out << endl;
1197 		}
1198 
1199 		out.close();
1200 
1201 		outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
1202 
1203 		return 0;
1204 
1205 	}
1206 	catch(exception& e) {
1207 		m->errorOut(e, "GetSeqsCommand", "compareAccnos");
1208 		exit(1);
1209 	}
1210 }
1211 
1212 
1213 //**********************************************************************************************************************
1214 
1215