1 /*
2  *  listseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9 
10 #include "listseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "fastqread.h"
15 
16 //**********************************************************************************************************************
setParameters()17 vector<string> ListSeqsCommand::setParameters(){
18 	try {
19         CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq);
20 		CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta);
21 		CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount);
23 		CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pgroup);
24 		CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(plist);
25 		CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(ptaxonomy);
26 		CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(palignreport);
27         CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(pcontigsreport);
28 		CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
29         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31 
32         abort = false; calledHelp = false;
33 
34         vector<string> tempOutNames;
35         outputTypes["accnos"] = tempOutNames;
36 
37 		vector<string> myArray;
38 		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
39 		return myArray;
40 	}
41 	catch(exception& e) {
42 		m->errorOut(e, "ListSeqsCommand", "setParameters");
43 		exit(1);
44 	}
45 }
46 //**********************************************************************************************************************
getHelpString()47 string ListSeqsCommand::getHelpString(){
48 	try {
49 		string helpString = "";
50 		helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq, alignreport or contigsreport file and outputs a .accnos file containing sequence names.\n";
51 		helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq, contigsreport and alignreport.  You must provide one of these parameters.\n";
52 		helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
53 		helpString += "Example list.seqs(fasta=amazon.fasta).\n";
54 		;
55 		return helpString;
56 	}
57 	catch(exception& e) {
58 		m->errorOut(e, "ListSeqsCommand", "getHelpString");
59 		exit(1);
60 	}
61 }
62 //**********************************************************************************************************************
getOutputPattern(string type)63 string ListSeqsCommand::getOutputPattern(string type) {
64     try {
65         string pattern = "";
66 
67         if (type == "accnos") {  pattern = "[filename],accnos"; }
68         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true);  }
69 
70         return pattern;
71     }
72     catch(exception& e) {
73         m->errorOut(e, "ListSeqsCommand", "getOutputPattern");
74         exit(1);
75     }
76 }
77 //**********************************************************************************************************************
ListSeqsCommand(string option)78 ListSeqsCommand::ListSeqsCommand(string option) : Command()  {
79 	try {
80 
81 		if(option == "help") { help(); abort = true; calledHelp = true; }
82 		else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83         else if(option == "category") {  abort = true; calledHelp = true;  }
84 		else {
85 			OptionParser parser(option, setParameters());
86 			map<string,string> parameters = parser.getParameters();
87 
88 			ValidParameters validParameter;
89 
90 
91 			//check for required parameters
92 			fastafiles = validParameter.validFiles(parameters, "fasta");
93             if (fastafiles.size() != 0) {
94                 if (fastafiles[0] == "not open") { abort = true; }
95                 else { current->setFastaFile(fastafiles[0]); }
96             }
97 
98 			namefiles = validParameter.validFiles(parameters, "name");
99             if (namefiles.size() != 0) {
100                 if (namefiles[0] == "not open") { abort = true; }
101                 else { current->setNameFile(namefiles[0]); }
102             }
103 
104 			groupfiles = validParameter.validFiles(parameters, "group");
105             if (groupfiles.size() != 0) {
106                 if (groupfiles[0] == "not open") { abort = true; }
107                 else { current->setGroupFile(groupfiles[0]); }
108             }
109 
110 			alignfiles = validParameter.validFiles(parameters, "alignreport");
111             if (alignfiles.size() != 0) {
112                 if (alignfiles[0] == "not open") { abort = true; }
113             }
114 
115             contigsreportfiles = validParameter.validFiles(parameters, "contigsreport");
116             if (contigsreportfiles.size() != 0) {
117                 if (contigsreportfiles[0] == "not open") { abort = true; }
118                 else { current->setContigsReportFile(contigsreportfiles[0]); }
119             }
120 
121 			listfiles = validParameter.validFiles(parameters, "list");
122             if (listfiles.size() != 0) {
123                 if (listfiles[0] == "not open") { abort = true; }
124                 else { current->setListFile(listfiles[0]); }
125             }
126 
127 			taxfiles = validParameter.validFiles(parameters, "taxonomy");
128             if (taxfiles.size() != 0) {
129                 if (taxfiles[0] == "not open") { abort = true; }
130                 else { current->setTaxonomyFile(taxfiles[0]); }
131             }
132 
133             countfiles = validParameter.validFiles(parameters, "count");
134             if (countfiles.size() != 0) {
135                 if (countfiles[0] == "not open") { abort = true; }
136                 else { current->setCountFile(countfiles[0]); }
137             }
138 
139             fastqfiles = validParameter.validFiles(parameters, "fastq");
140             if (fastqfiles.size() != 0) {
141                 if (fastqfiles[0] == "not open") { abort = true; }
142             }
143 
144 			if ((fastqfiles.size() == 0) && (countfiles.size() == 0) && (fastafiles.size() == 0) && (namefiles.size() == 0) && (listfiles.size() == 0) && (groupfiles.size() == 0) && (alignfiles.size() == 0) && (taxfiles.size() == 0) && (contigsreportfiles.size() == 0))  { m->mothurOut("You must provide a file.\n"); abort = true; }
145 		}
146 	}
147 	catch(exception& e) {
148 		m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
149 		exit(1);
150 	}
151 }
152 //**********************************************************************************************************************
addName(bool empty,string name,set<string> & names,set<string> & newNames)153 void addName(bool empty, string name, set<string>& names, set<string>& newNames) {
154     if (empty) { newNames.insert(name); } //for first file or single file
155     else {
156         if (names.count(name) != 0) { newNames.insert(name); } //present in files so far so add to newNames
157     }
158 }
159 //**********************************************************************************************************************
readFastq(set<string> & names,ifstream & in,MothurOut * & m)160 void readFastq(set<string>& names, ifstream& in, MothurOut*& m){
161     try {
162         set<string> newNames;
163         bool empty = true;
164         if (names.size() != 0) { empty=false; }
165         Utils util;
166 
167         while(!in.eof()){
168 
169             if (m->getControl_pressed()) { break; }
170 
171             bool ignore;
172             FastqRead fread(in, ignore, "illumina1.8+"); util.gobble(in);
173 
174             if (!ignore) { addName(empty, fread.getName(), names, newNames); }
175         }
176 
177         names = newNames;
178     }
179     catch(exception& e) {
180         m->errorOut(e, "ListSeqsCommand", "readFastq");
181         exit(1);
182     }
183 }
184 //**********************************************************************************************************************
readFasta(set<string> & names,ifstream & in,MothurOut * & m)185 void readFasta(set<string>& names, ifstream& in, MothurOut*& m){
186     try {
187         set<string> newNames;
188         bool empty = true;
189         if (names.size() != 0) { empty=false; }
190         Utils util;
191 
192         while(!in.eof()){
193 
194             if (m->getControl_pressed()) { break; }
195 
196             Sequence currSeq(in); util.gobble(in);
197 
198             if (currSeq.getName() != "") { addName(empty, currSeq.getName(), names, newNames); }
199         }
200 
201         names = newNames;
202     }
203     catch(exception& e) {
204         m->errorOut(e, "ListSeqsCommand", "readFasta");
205         exit(1);
206     }
207 }
208 //**********************************************************************************************************************
readList(set<string> & names,ifstream & in,MothurOut * & m)209 void readList(set<string>& names, ifstream& in, MothurOut*& m){
210     try {
211         set<string> newNames;
212         bool empty = true;
213         if (names.size() != 0) { empty=false; }
214 
215         Utils util; string tag = "Otu"; string readHeaders = ""; //Tells mothur to try and read headers from the file
216 
217         if(!in.eof()){
218             ListVector list(in, readHeaders, tag); //read in list vector
219 
220             //for each bin
221             for (int i = 0; i < list.getNumBins(); i++) {
222                 if (m->getControl_pressed()) { break; }
223 
224                 string bin = list.get(i);
225                 vector<string> binnames; util.splitAtComma(bin, binnames);
226 
227                 for (int j = 0; j < binnames.size(); j++) { addName(empty, binnames[j], names, newNames); }
228             }
229         }
230 
231         names = newNames;
232     }
233     catch(exception& e) {
234         m->errorOut(e, "ListSeqsCommand", "readList");
235         exit(1);
236     }
237 }
238 //**********************************************************************************************************************
readNameTaxGroup(set<string> & names,ifstream & in,MothurOut * & m)239 void readNameTaxGroup(set<string>& names, ifstream& in, MothurOut*& m){
240     try {
241         set<string> newNames;
242         bool empty = true;
243         if (names.size() != 0) { empty=false; }
244 
245         Utils util; string name;
246 
247         while(!in.eof()){
248 
249             if (m->getControl_pressed()) { break; }
250 
251             in >> name; util.getline(in); util.gobble(in);
252 
253             addName(empty, name, names, newNames);
254         }
255 
256         names = newNames;
257     }
258     catch(exception& e) {
259         m->errorOut(e, "ListSeqsCommand", "readNameTaxGroup");
260         exit(1);
261     }
262 }
263 //**********************************************************************************************************************
readCount(set<string> & names,ifstream & in,MothurOut * & m)264 void readCount(set<string>& names, ifstream& in, MothurOut*& m){
265     try {
266         set<string> newNames;
267         bool empty = true;
268         if (names.size() != 0) { empty=false; }
269 
270         CountTable ct; ct.readTable(in, false, false);
271 
272         if (m->getControl_pressed()) { return; }
273 
274         vector<string> cnames = ct.getNamesOfSeqs();
275 
276         for (int j = 0; j < cnames.size(); j++) { addName(empty, cnames[j], names, newNames); }
277 
278         names = newNames;
279     }
280     catch(exception& e) {
281         m->errorOut(e, "ListSeqsCommand", "readCount");
282         exit(1);
283     }
284 }
285 //**********************************************************************************************************************
readAlignContigs(set<string> & names,ifstream & in,MothurOut * & m)286 void readAlignContigs(set<string>& names, ifstream& in, MothurOut*& m){
287     try {
288         set<string> newNames;
289         bool empty = true;
290         if (names.size() != 0) { empty=false; }
291         string name;
292 
293         Utils util; util.getline(in);  util.gobble(in);
294 
295         while(!in.eof()){
296             if (m->getControl_pressed()) { break; }
297 
298             in >> name; util.getline(in); util.gobble(in);
299 
300             addName(empty, name, names, newNames);
301         }
302 
303         names = newNames;
304     }
305     catch(exception& e) {
306         m->errorOut(e, "ListSeqsCommand", "readAlignContigs");
307         exit(1);
308     }
309 }
310 //**********************************************************************************************************************
311 
execute()312 int ListSeqsCommand::execute(){
313 	try {
314 
315 		if (abort) { if (calledHelp) { return 0; }  return 2;	}
316 
317         set<string> names;
318 
319 		//read functions fill names vector
320 		if (fastafiles.size() != 0)		    {   process(fastafiles, names, &readFasta);	        }
321         if (fastqfiles.size() != 0)	        {	process(fastqfiles, names, &readFastq);	        }
322 		if (namefiles.size() != 0)	        {	process(namefiles, names, &readNameTaxGroup);   }
323 		if (groupfiles.size() != 0)	        {	process(groupfiles, names, &readNameTaxGroup);  }
324         if (taxfiles.size() != 0)           {   process(taxfiles, names, &readNameTaxGroup);    }
325 		if (alignfiles.size() != 0)	        {   process(alignfiles, names, &readAlignContigs);         }
326         if (contigsreportfiles.size() != 0) {   process(contigsreportfiles, names, &readAlignContigs); }
327 		if (listfiles.size() != 0)	        {	process(listfiles, names, &readList);       }
328         if (countfiles.size() != 0)	        {	process(countfiles, names, &readCount);     }
329 
330 		if (m->getControl_pressed()) { outputTypes.clear();  return 0; }
331 
332 		if (outputdir == "") {  outputdir += util.hasPath(inputFileName);  }
333 
334         map<string, string> variables;
335         variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName));
336 		string outputFileName = getOutputFileName("accnos", variables);
337 
338         util.printAccnos(outputFileName, names);
339 
340 		outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
341 
342 		if (m->getControl_pressed()) { outputTypes.clear();  util.mothurRemove(outputFileName); return 0; }
343 
344 		current->setAccnosFile(outputFileName);
345 
346 		m->mothurOut("\nOutput File Names: \n" + outputFileName + "\n\n");
347 
348 		//set accnos file as new current accnosfile
349 		string currentName = "";
350 		itTypes = outputTypes.find("accnos");
351 		if (itTypes != outputTypes.end()) {
352 			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); }
353 		}
354 
355 		return 0;
356 	}catch(exception& e) {
357 		m->errorOut(e, "ListSeqsCommand", "execute");
358 		exit(1);
359 	}
360 }
361 //**********************************************************************************************************************
process(vector<string> files,set<string> & names,void f (set<string> &,ifstream &,MothurOut * &))362 void ListSeqsCommand::process(vector<string> files, set<string>& names, void f(set<string>&, ifstream&, MothurOut*&)){
363     try {
364         Utils util;
365         for (int i = 0; i < files.size(); i++) {
366             if (m->getControl_pressed()) { break; }
367 
368             inputFileName = files[i];
369 
370             ifstream in; util.openInputFile(inputFileName, in);
371 
372             f(names, in, m);
373 
374             in.close();
375         }
376     }
377     catch(exception& e) {
378         m->errorOut(e, "ListSeqsCommand", "process");
379         exit(1);
380     }
381 }
382 //**********************************************************************************************************************
383