1 /*
2 * phylotypecommand.cpp
3 * Mothur
4 *
5 * Created by westcott on 11/20/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
7 *
8 */
9
10 #include "phylotypecommand.h"
11 #include "phylotree.h"
12 #include "listvector.hpp"
13 #include "rabundvector.hpp"
14 #include "sabundvector.hpp"
15 #include "counttable.h"
16
17 //**********************************************************************************************************************
setParameters()18 vector<string> PhylotypeCommand::setParameters(){
19 try {
20 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","list-rabund-sabund",false,true,true); parameters.push_back(ptaxonomy);
21 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
23 CommandParameter pcutoff("cutoff", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pcutoff);
24 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
25 CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28
29 abort = false; calledHelp = false;
30
31 vector<string> tempOutNames;
32 outputTypes["list"] = tempOutNames;
33 outputTypes["sabund"] = tempOutNames;
34 outputTypes["rabund"] = tempOutNames;
35
36 vector<string> myArray;
37 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 return myArray;
39 }
40 catch(exception& e) {
41 m->errorOut(e, "PhylotypeCommand", "setParameters");
42 exit(1);
43 }
44 }
45 //**********************************************************************************************************************
getHelpString()46 string PhylotypeCommand::getHelpString(){
47 try {
48 string helpString = "";
49 helpString += "The phylotype command reads a taxonomy file and outputs a .list, .rabund and .sabund file. \n";
50 helpString += "The phylotype command parameter options are taxonomy, name, count, cutoff and label. The taxonomy parameter is required.\n";
51 helpString += "The cutoff parameter allows you to specify the level you want to stop at. The default is the highest level in your taxonomy file. \n";
52 helpString += "For example: taxonomy = Bacteria;Bacteroidetes-Chlorobi;Bacteroidetes; - cutoff=2, would truncate the taxonomy to Bacteria;Bacteroidetes-Chlorobi; \n";
53 helpString += "For the cutoff parameter levels count up from the root of the phylotree. This enables you to look at the grouping down to a specific resolution, say the genus level.\n";
54 helpString += "The label parameter allows you to specify which level you would like, and are separated by dashes. The default all levels in your taxonomy file. \n";
55 helpString += "For the label parameter, levels count down from the root to keep the output similar to mothur's other commands which report information from finer resolution to coarser resolutions.\n";
56 helpString += "The phylotype command should be in the following format: \n";
57 helpString += "phylotype(taxonomy=yourTaxonomyFile, cutoff=yourCutoff, label=yourLabels) \n";
58 helpString += "Eaxample: phylotype(taxonomy=amazon.taxonomy, cutoff=5, label=1-3-5).\n";
59 return helpString;
60 }
61 catch(exception& e) {
62 m->errorOut(e, "PhylotypeCommand", "getHelpString");
63 exit(1);
64 }
65 }
66 //**********************************************************************************************************************
getOutputPattern(string type)67 string PhylotypeCommand::getOutputPattern(string type) {
68 try {
69 string pattern = "";
70
71 if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
72 else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; }
73 else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; }
74 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); }
75
76 return pattern;
77 }
78 catch(exception& e) {
79 m->errorOut(e, "PhylotypeCommand", "getOutputPattern");
80 exit(1);
81 }
82 }
83 /**********************************************************************************************************************/
PhylotypeCommand(string option)84 PhylotypeCommand::PhylotypeCommand(string option) : Command() {
85 try {
86
87 if(option == "help") { help(); abort = true; calledHelp = true; }
88 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89 else if(option == "category") { abort = true; calledHelp = true; }
90
91 else {
92 OptionParser parser(option, setParameters());
93 map<string, string> parameters = parser.getParameters();
94
95 ValidParameters validParameter;
96 taxonomyFileName = validParameter.validFile(parameters, "taxonomy");
97 if (taxonomyFileName == "not found") {
98 taxonomyFileName = current->getTaxonomyFile();
99 if (taxonomyFileName != "") { m->mothurOut("Using " + taxonomyFileName + " as input file for the taxonomy parameter.\n"); }
100 else {
101 m->mothurOut("No valid current files. taxonomy is a required parameter.\n");
102 abort = true;
103 }
104 }else if (taxonomyFileName == "not open") { taxonomyFileName = ""; abort = true; }
105 else { current->setTaxonomyFile(taxonomyFileName); }
106
107 namefile = validParameter.validFile(parameters, "name");
108 if (namefile == "not open") { namefile = ""; abort = true; }
109 else if (namefile == "not found") { namefile = ""; }
110 else { util.readNames(namefile, namemap); current->setNameFile(namefile); }
111
112 countfile = validParameter.validFile(parameters, "count");
113 if (countfile == "not open") { abort = true; countfile = ""; }
114 else if (countfile == "not found") { countfile = ""; }
115 else { current->setCountFile(countfile); }
116
117 if (outputdir == ""){ outputdir += util.hasPath(taxonomyFileName); }
118
119 if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; }
120
121 string temp = validParameter.valid(parameters, "cutoff");
122 if (temp == "not found") { temp = "-1"; }
123 util.mothurConvert(temp, cutoff);
124
125 label = validParameter.valid(parameters, "label");
126 if (label == "not found") { label = ""; allLines = true; }
127 else {
128 if(label != "all") { util.splitAtDash(label, labels); allLines = false; }
129 else { allLines = true; }
130 }
131
132 if (countfile == "") {
133 if (namefile == "") {
134 vector<string> files; files.push_back(taxonomyFileName);
135 if (!current->getMothurCalling()) { parser.getNameFile(files); }
136 }
137 }
138 }
139 }
140 catch(exception& e) {
141 m->errorOut(e, "PhylotypeCommand", "PhylotypeCommand");
142 exit(1);
143 }
144 }
145 /**********************************************************************************************************************/
146
execute()147 int PhylotypeCommand::execute(){
148 try {
149
150 if (abort) { if (calledHelp) { return 0; } return 2; }
151
152 //reads in taxonomy file and makes all the taxonomies the same length
153 //by appending the last taxon to a given taxonomy as many times as needed to
154 //make it as long as the longest taxonomy in the file
155 TaxEqualizer* taxEqual = new TaxEqualizer(taxonomyFileName, cutoff, outputdir);
156
157 if (m->getControl_pressed()) { delete taxEqual; return 0; }
158
159 string equalizedTaxFile = taxEqual->getEqualizedTaxFile();
160
161 delete taxEqual;
162
163 //build taxonomy tree from equalized file
164 PhyloTree* tree = new PhyloTree(equalizedTaxFile);
165 vector<int> leaves = tree->getGenusNodes();
166
167 //store leaf nodes in current map
168 for (int i = 0; i < leaves.size(); i++) { currentNodes[leaves[i]] = leaves[i]; }
169
170 bool done = false;
171 if (tree->get(leaves[0]).parent == -1) { m->mothurOut("Empty Tree\n"); done = true; }
172
173 if (m->getControl_pressed()) { delete tree; return 0; }
174
175 ofstream outList, outRabund, outSabund;
176 map<string, string> variables;
177 string fileroot = outputdir + util.getRootName(util.getSimpleName(taxonomyFileName));
178 variables["[filename]"] = fileroot;
179 variables["[clustertag]"] = "tx";
180 string sabundFileName = getOutputFileName("sabund", variables);
181 string rabundFileName = getOutputFileName("rabund", variables);
182 //if (countfile != "") { variables["[tag2]"] = "unique_list"; }
183 string listFileName = getOutputFileName("list", variables);
184
185 map<string, int> counts;
186 if (countfile == "") {
187 util.openOutputFile(sabundFileName, outSabund);
188 util.openOutputFile(rabundFileName, outRabund);
189 outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
190 outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
191
192 }else {
193 CountTable ct;
194 ct.readTable(countfile, false, false);
195 counts = ct.getNameMap();
196 }
197 util.openOutputFile(listFileName, outList);
198 outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
199
200
201 int count = 1; bool printHeaders = true;
202 //start at leaves of tree and work towards root, processing the labels the user wants
203 while((!done) && ((allLines == 1) || (labels.size() != 0))) {
204
205 string level = toString(count);
206 count++;
207
208 if (m->getControl_pressed()) {
209 if (countfile == "") { outRabund.close(); outSabund.close(); } outList.close();
210 for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); }
211 delete tree; return 0;
212 }
213
214 //is this a level the user want output for
215 if(allLines == 1 || labels.count(level) == 1){
216
217 //output level
218 m->mothurOut(level); m->mothurOutEndLine();
219
220 ListVector list("Phylo"); list.setLabel(level);
221
222 //go through nodes and build listvector
223 for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
224
225 //get parents
226 TaxNode node = tree->get(itCurrent->first);
227 parentNodes[node.parent] = node.parent;
228
229 vector<string> names = node.accessions;
230
231 //make the names compatable with listvector
232 string name = "";
233 for (int i = 0; i < names.size(); i++) {
234
235 if (names[i] != "unknown") {
236 if (namefile != "") {
237 map<string, string>::iterator itNames = namemap.find(names[i]); //make sure this name is in namefile
238
239 if (itNames != namemap.end()) { name += namemap[names[i]] + ","; } //you found it in namefile
240 else { m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct.\n"); m->setControl_pressed(true); }
241
242 }else{ name += names[i] + ","; }
243 }
244 }
245
246 if (m->getControl_pressed()) { break; }
247
248 name = name.substr(0, name.length()-1); //rip off extra ','
249 //add bin to list vector
250 if (name != "") { list.push_back(name); } //caused by unknown
251 }
252
253 if (printHeaders) { //only print headers the first time
254 printHeaders = false;
255 }else { list.setPrintedLabels(printHeaders); }
256
257 //print listvector
258 if (countfile == "") { list.print(outList); }
259 else { list.print(outList, counts); }
260
261 if (countfile == "") {
262 //print rabund
263 list.getRAbundVector().print(outRabund);
264 //print sabund
265 list.getSAbundVector().print(outSabund);
266 }
267 labels.erase(level);
268
269 }else {
270
271 //just get parents
272 for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
273 int parent = tree->get(itCurrent->first).parent;
274 parentNodes[parent] = parent;
275 }
276 }
277
278 //move up a level
279 currentNodes = parentNodes;
280 parentNodes.clear();
281
282 //have we reached the rootnode
283 if (tree->get(currentNodes.begin()->first).parent == -1) { done = true; }
284 }
285
286 outList.close();
287 if (countfile == "") {
288 outSabund.close();
289 outRabund.close();
290 }
291
292 delete tree;
293
294 if (m->getControl_pressed()) {
295 for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); }
296 return 0;
297 }
298
299 //set list file as new current listfile
300 string currentName = "";
301 itTypes = outputTypes.find("list");
302 if (itTypes != outputTypes.end()) {
303 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); }
304 }
305
306 //set rabund file as new current rabundfile
307 itTypes = outputTypes.find("rabund");
308 if (itTypes != outputTypes.end()) {
309 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); }
310 }
311
312 //set sabund file as new current sabundfile
313 itTypes = outputTypes.find("sabund");
314 if (itTypes != outputTypes.end()) {
315 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); }
316 }
317
318 m->mothurOut("\nOutput File Names: \n");
319 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine();
320
321 return 0;
322 }
323
324 catch(exception& e) {
325 m->errorOut(e, "PhylotypeCommand", "execute");
326 exit(1);
327 }
328 }
329 /**********************************************************************************************************************/
330