1 /*
2 * listseqscommand.cpp
3 * Mothur
4 *
5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7 *
8 */
9
10 #include "listseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "fastqread.h"
15
16 //**********************************************************************************************************************
setParameters()17 vector<string> ListSeqsCommand::setParameters(){
18 try {
19 CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq);
20 CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta);
21 CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount);
23 CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pgroup);
24 CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(plist);
25 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(ptaxonomy);
26 CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(palignreport);
27 CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(pcontigsreport);
28 CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31
32 abort = false; calledHelp = false;
33
34 vector<string> tempOutNames;
35 outputTypes["accnos"] = tempOutNames;
36
37 vector<string> myArray;
38 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 return myArray;
40 }
41 catch(exception& e) {
42 m->errorOut(e, "ListSeqsCommand", "setParameters");
43 exit(1);
44 }
45 }
46 //**********************************************************************************************************************
getHelpString()47 string ListSeqsCommand::getHelpString(){
48 try {
49 string helpString = "";
50 helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq, alignreport or contigsreport file and outputs a .accnos file containing sequence names.\n";
51 helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq, contigsreport and alignreport. You must provide one of these parameters.\n";
52 helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
53 helpString += "Example list.seqs(fasta=amazon.fasta).\n";
54 ;
55 return helpString;
56 }
57 catch(exception& e) {
58 m->errorOut(e, "ListSeqsCommand", "getHelpString");
59 exit(1);
60 }
61 }
62 //**********************************************************************************************************************
getOutputPattern(string type)63 string ListSeqsCommand::getOutputPattern(string type) {
64 try {
65 string pattern = "";
66
67 if (type == "accnos") { pattern = "[filename],accnos"; }
68 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); }
69
70 return pattern;
71 }
72 catch(exception& e) {
73 m->errorOut(e, "ListSeqsCommand", "getOutputPattern");
74 exit(1);
75 }
76 }
77 //**********************************************************************************************************************
ListSeqsCommand(string option)78 ListSeqsCommand::ListSeqsCommand(string option) : Command() {
79 try {
80
81 if(option == "help") { help(); abort = true; calledHelp = true; }
82 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83 else if(option == "category") { abort = true; calledHelp = true; }
84 else {
85 OptionParser parser(option, setParameters());
86 map<string,string> parameters = parser.getParameters();
87
88 ValidParameters validParameter;
89
90
91 //check for required parameters
92 fastafiles = validParameter.validFiles(parameters, "fasta");
93 if (fastafiles.size() != 0) {
94 if (fastafiles[0] == "not open") { abort = true; }
95 else { current->setFastaFile(fastafiles[0]); }
96 }
97
98 namefiles = validParameter.validFiles(parameters, "name");
99 if (namefiles.size() != 0) {
100 if (namefiles[0] == "not open") { abort = true; }
101 else { current->setNameFile(namefiles[0]); }
102 }
103
104 groupfiles = validParameter.validFiles(parameters, "group");
105 if (groupfiles.size() != 0) {
106 if (groupfiles[0] == "not open") { abort = true; }
107 else { current->setGroupFile(groupfiles[0]); }
108 }
109
110 alignfiles = validParameter.validFiles(parameters, "alignreport");
111 if (alignfiles.size() != 0) {
112 if (alignfiles[0] == "not open") { abort = true; }
113 }
114
115 contigsreportfiles = validParameter.validFiles(parameters, "contigsreport");
116 if (contigsreportfiles.size() != 0) {
117 if (contigsreportfiles[0] == "not open") { abort = true; }
118 else { current->setContigsReportFile(contigsreportfiles[0]); }
119 }
120
121 listfiles = validParameter.validFiles(parameters, "list");
122 if (listfiles.size() != 0) {
123 if (listfiles[0] == "not open") { abort = true; }
124 else { current->setListFile(listfiles[0]); }
125 }
126
127 taxfiles = validParameter.validFiles(parameters, "taxonomy");
128 if (taxfiles.size() != 0) {
129 if (taxfiles[0] == "not open") { abort = true; }
130 else { current->setTaxonomyFile(taxfiles[0]); }
131 }
132
133 countfiles = validParameter.validFiles(parameters, "count");
134 if (countfiles.size() != 0) {
135 if (countfiles[0] == "not open") { abort = true; }
136 else { current->setCountFile(countfiles[0]); }
137 }
138
139 fastqfiles = validParameter.validFiles(parameters, "fastq");
140 if (fastqfiles.size() != 0) {
141 if (fastqfiles[0] == "not open") { abort = true; }
142 }
143
144 if ((fastqfiles.size() == 0) && (countfiles.size() == 0) && (fastafiles.size() == 0) && (namefiles.size() == 0) && (listfiles.size() == 0) && (groupfiles.size() == 0) && (alignfiles.size() == 0) && (taxfiles.size() == 0) && (contigsreportfiles.size() == 0)) { m->mothurOut("You must provide a file.\n"); abort = true; }
145 }
146 }
147 catch(exception& e) {
148 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
149 exit(1);
150 }
151 }
152 //**********************************************************************************************************************
addName(bool empty,string name,set<string> & names,set<string> & newNames)153 void addName(bool empty, string name, set<string>& names, set<string>& newNames) {
154 if (empty) { newNames.insert(name); } //for first file or single file
155 else {
156 if (names.count(name) != 0) { newNames.insert(name); } //present in files so far so add to newNames
157 }
158 }
159 //**********************************************************************************************************************
readFastq(set<string> & names,ifstream & in,MothurOut * & m)160 void readFastq(set<string>& names, ifstream& in, MothurOut*& m){
161 try {
162 set<string> newNames;
163 bool empty = true;
164 if (names.size() != 0) { empty=false; }
165 Utils util;
166
167 while(!in.eof()){
168
169 if (m->getControl_pressed()) { break; }
170
171 bool ignore;
172 FastqRead fread(in, ignore, "illumina1.8+"); util.gobble(in);
173
174 if (!ignore) { addName(empty, fread.getName(), names, newNames); }
175 }
176
177 names = newNames;
178 }
179 catch(exception& e) {
180 m->errorOut(e, "ListSeqsCommand", "readFastq");
181 exit(1);
182 }
183 }
184 //**********************************************************************************************************************
readFasta(set<string> & names,ifstream & in,MothurOut * & m)185 void readFasta(set<string>& names, ifstream& in, MothurOut*& m){
186 try {
187 set<string> newNames;
188 bool empty = true;
189 if (names.size() != 0) { empty=false; }
190 Utils util;
191
192 while(!in.eof()){
193
194 if (m->getControl_pressed()) { break; }
195
196 Sequence currSeq(in); util.gobble(in);
197
198 if (currSeq.getName() != "") { addName(empty, currSeq.getName(), names, newNames); }
199 }
200
201 names = newNames;
202 }
203 catch(exception& e) {
204 m->errorOut(e, "ListSeqsCommand", "readFasta");
205 exit(1);
206 }
207 }
208 //**********************************************************************************************************************
readList(set<string> & names,ifstream & in,MothurOut * & m)209 void readList(set<string>& names, ifstream& in, MothurOut*& m){
210 try {
211 set<string> newNames;
212 bool empty = true;
213 if (names.size() != 0) { empty=false; }
214
215 Utils util; string tag = "Otu"; string readHeaders = ""; //Tells mothur to try and read headers from the file
216
217 if(!in.eof()){
218 ListVector list(in, readHeaders, tag); //read in list vector
219
220 //for each bin
221 for (int i = 0; i < list.getNumBins(); i++) {
222 if (m->getControl_pressed()) { break; }
223
224 string bin = list.get(i);
225 vector<string> binnames; util.splitAtComma(bin, binnames);
226
227 for (int j = 0; j < binnames.size(); j++) { addName(empty, binnames[j], names, newNames); }
228 }
229 }
230
231 names = newNames;
232 }
233 catch(exception& e) {
234 m->errorOut(e, "ListSeqsCommand", "readList");
235 exit(1);
236 }
237 }
238 //**********************************************************************************************************************
readNameTaxGroup(set<string> & names,ifstream & in,MothurOut * & m)239 void readNameTaxGroup(set<string>& names, ifstream& in, MothurOut*& m){
240 try {
241 set<string> newNames;
242 bool empty = true;
243 if (names.size() != 0) { empty=false; }
244
245 Utils util; string name;
246
247 while(!in.eof()){
248
249 if (m->getControl_pressed()) { break; }
250
251 in >> name; util.getline(in); util.gobble(in);
252
253 addName(empty, name, names, newNames);
254 }
255
256 names = newNames;
257 }
258 catch(exception& e) {
259 m->errorOut(e, "ListSeqsCommand", "readNameTaxGroup");
260 exit(1);
261 }
262 }
263 //**********************************************************************************************************************
readCount(set<string> & names,ifstream & in,MothurOut * & m)264 void readCount(set<string>& names, ifstream& in, MothurOut*& m){
265 try {
266 set<string> newNames;
267 bool empty = true;
268 if (names.size() != 0) { empty=false; }
269
270 CountTable ct; ct.readTable(in, false, false);
271
272 if (m->getControl_pressed()) { return; }
273
274 vector<string> cnames = ct.getNamesOfSeqs();
275
276 for (int j = 0; j < cnames.size(); j++) { addName(empty, cnames[j], names, newNames); }
277
278 names = newNames;
279 }
280 catch(exception& e) {
281 m->errorOut(e, "ListSeqsCommand", "readCount");
282 exit(1);
283 }
284 }
285 //**********************************************************************************************************************
readAlignContigs(set<string> & names,ifstream & in,MothurOut * & m)286 void readAlignContigs(set<string>& names, ifstream& in, MothurOut*& m){
287 try {
288 set<string> newNames;
289 bool empty = true;
290 if (names.size() != 0) { empty=false; }
291 string name;
292
293 Utils util; util.getline(in); util.gobble(in);
294
295 while(!in.eof()){
296 if (m->getControl_pressed()) { break; }
297
298 in >> name; util.getline(in); util.gobble(in);
299
300 addName(empty, name, names, newNames);
301 }
302
303 names = newNames;
304 }
305 catch(exception& e) {
306 m->errorOut(e, "ListSeqsCommand", "readAlignContigs");
307 exit(1);
308 }
309 }
310 //**********************************************************************************************************************
311
execute()312 int ListSeqsCommand::execute(){
313 try {
314
315 if (abort) { if (calledHelp) { return 0; } return 2; }
316
317 set<string> names;
318
319 //read functions fill names vector
320 if (fastafiles.size() != 0) { process(fastafiles, names, &readFasta); }
321 if (fastqfiles.size() != 0) { process(fastqfiles, names, &readFastq); }
322 if (namefiles.size() != 0) { process(namefiles, names, &readNameTaxGroup); }
323 if (groupfiles.size() != 0) { process(groupfiles, names, &readNameTaxGroup); }
324 if (taxfiles.size() != 0) { process(taxfiles, names, &readNameTaxGroup); }
325 if (alignfiles.size() != 0) { process(alignfiles, names, &readAlignContigs); }
326 if (contigsreportfiles.size() != 0) { process(contigsreportfiles, names, &readAlignContigs); }
327 if (listfiles.size() != 0) { process(listfiles, names, &readList); }
328 if (countfiles.size() != 0) { process(countfiles, names, &readCount); }
329
330 if (m->getControl_pressed()) { outputTypes.clear(); return 0; }
331
332 if (outputdir == "") { outputdir += util.hasPath(inputFileName); }
333
334 map<string, string> variables;
335 variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName));
336 string outputFileName = getOutputFileName("accnos", variables);
337
338 util.printAccnos(outputFileName, names);
339
340 outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
341
342 if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFileName); return 0; }
343
344 current->setAccnosFile(outputFileName);
345
346 m->mothurOut("\nOutput File Names: \n" + outputFileName + "\n\n");
347
348 //set accnos file as new current accnosfile
349 string currentName = "";
350 itTypes = outputTypes.find("accnos");
351 if (itTypes != outputTypes.end()) {
352 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); }
353 }
354
355 return 0;
356 }catch(exception& e) {
357 m->errorOut(e, "ListSeqsCommand", "execute");
358 exit(1);
359 }
360 }
361 //**********************************************************************************************************************
process(vector<string> files,set<string> & names,void f (set<string> &,ifstream &,MothurOut * &))362 void ListSeqsCommand::process(vector<string> files, set<string>& names, void f(set<string>&, ifstream&, MothurOut*&)){
363 try {
364 Utils util;
365 for (int i = 0; i < files.size(); i++) {
366 if (m->getControl_pressed()) { break; }
367
368 inputFileName = files[i];
369
370 ifstream in; util.openInputFile(inputFileName, in);
371
372 f(names, in, m);
373
374 in.close();
375 }
376 }
377 catch(exception& e) {
378 m->errorOut(e, "ListSeqsCommand", "process");
379 exit(1);
380 }
381 }
382 //**********************************************************************************************************************
383