1 /*
2 * clusterdoturcommand.cpp
3 * Mothur
4 *
5 * Created by westcott on 10/27/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
7 *
8 */
9
10 #include "clusterdoturcommand.h"
11 #include "clusterclassic.h"
12
13 //**********************************************************************************************************************
setParameters()14 vector<string> ClusterDoturCommand::setParameters(){
15 try {
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pphylip);
17 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","rabund-sabund",false,false,true); parameters.push_back(pname);
18 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount);
19 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pcutoff);
20 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
21 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "","",false,false); parameters.push_back(pmethod);
22 CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim);
23 CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26
27 abort = false; calledHelp = false;
28
29 vector<string> tempOutNames;
30 outputTypes["list"] = tempOutNames;
31 outputTypes["rabund"] = tempOutNames;
32 outputTypes["sabund"] = tempOutNames;
33
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 return myArray;
37 }
38 catch(exception& e) {
39 m->errorOut(e, "ClusterDoturCommand", "setParameters");
40 exit(1);
41 }
42 }
43 //**********************************************************************************************************************
getHelpString()44 string ClusterDoturCommand::getHelpString(){
45 try {
46 string helpString = "";
47 helpString += "The cluster.classic command clusters using the algorithm from dotur. \n";
48 helpString += "The cluster.classic command parameter options are phylip, name, count, method, cuttoff, sim, precision. Phylip is required, unless you have a valid current file.\n";
49 helpString += "The cluster.classic command should be in the following format: \n";
50 helpString += "cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
51 helpString += "The acceptable cluster methods are furthest, nearest, weighted and average. If no method is provided then average is assumed.\n";
52 return helpString;
53 }
54 catch(exception& e) {
55 m->errorOut(e, "ClusterDoturCommand", "getHelpString");
56 exit(1);
57 }
58 }
59 //**********************************************************************************************************************
getOutputPattern(string type)60 string ClusterDoturCommand::getOutputPattern(string type) {
61 try {
62 string pattern = "";
63
64 if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
65 else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; }
66 else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; }
67 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); }
68
69 return pattern;
70 }
71 catch(exception& e) {
72 m->errorOut(e, "ClusterDoturCommand", "getOutputPattern");
73 exit(1);
74 }
75 }
76 //**********************************************************************************************************************
77 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
ClusterDoturCommand(string option)78 ClusterDoturCommand::ClusterDoturCommand(string option) : Command() {
79 try{
80 //allow user to run help
81 if(option == "help") { help(); abort = true; calledHelp = true; }
82 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83 else if(option == "category") { abort = true; calledHelp = true; }
84
85 else {
86 OptionParser parser(option, setParameters());
87 map<string,string> parameters = parser.getParameters();
88
89 ValidParameters validParameter;
90
91
92 //check for required parameters
93 phylipfile = validParameter.validFile(parameters, "phylip");
94 if (phylipfile == "not open") { abort = true; }
95 else if (phylipfile == "not found") {
96 phylipfile = current->getPhylipFile();
97 if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); }
98 else {
99 m->mothurOut("You need to provide a phylip file with the cluster.classic command.\n");abort = true;
100 }
101 }else { current->setPhylipFile(phylipfile); }
102
103 //check for optional parameter and set defaults
104 namefile = validParameter.validFile(parameters, "name");
105 if (namefile == "not open") { abort = true; namefile = ""; }
106 else if (namefile == "not found") { namefile = ""; }
107 else { current->setNameFile(namefile); }
108
109 countfile = validParameter.validFile(parameters, "count");
110 if (countfile == "not open") { abort = true; countfile = ""; }
111 else if (countfile == "not found") { countfile = ""; }
112 else { current->setCountFile(countfile); }
113
114 if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.classic command you must enter ONLY ONE of the following: count or name.\n"); abort = true; }
115
116 string temp;
117 temp = validParameter.valid(parameters, "precision");
118 if (temp == "not found") { temp = "100"; }
119 //saves precision legnth for formatting below
120 length = temp.length();
121 util.mothurConvert(temp, precision);
122
123 temp = validParameter.valid(parameters, "cutoff");
124 if (temp == "not found") { temp = "10"; }
125 util.mothurConvert(temp, cutoff);
126
127 temp = validParameter.valid(parameters, "sim"); if (temp == "not found") { temp = "F"; }
128 sim = util.isTrue(temp);
129
130 method = validParameter.valid(parameters, "method");
131 if (method == "not found") { method = "average"; }
132
133 if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) {
134 if (method == "furthest") { tag = "fn"; }
135 else if (method == "nearest") { tag = "nn"; }
136 else if (method == "average") { tag = "an"; }
137 else if (method == "weighted") { tag = "wn"; }
138 }else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average, weighted.\n"); abort = true; }
139 }
140 }
141 catch(exception& e) {
142 m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
143 exit(1);
144 }
145 }
146 //**********************************************************************************************************************
147
execute()148 int ClusterDoturCommand::execute(){
149 try {
150
151 if (abort) { if (calledHelp) { return 0; } return 2; }
152
153
154 ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
155
156 NameAssignment* nameMap = NULL;
157 CountTable* ct = NULL;
158 map<string, int> counts;
159 if(namefile != "") {
160 nameMap = new NameAssignment(namefile);
161 nameMap->readMap();
162 cluster->readPhylipFile(phylipfile, nameMap);
163 delete nameMap;
164 }else if (countfile != "") {
165 ct = new CountTable();
166 ct->readTable(countfile, false, false);
167 cluster->readPhylipFile(phylipfile, ct);
168 counts = ct->getNameMap();
169 delete ct;
170 }else {
171 cluster->readPhylipFile(phylipfile, nameMap);
172 }
173 tag = cluster->getTag();
174
175 if (m->getControl_pressed()) { delete cluster; return 0; }
176
177 list = cluster->getListVector();
178 rabund = cluster->getRAbundVector();
179
180 if (outputdir == "") { outputdir += util.hasPath(phylipfile); }
181 fileroot = outputdir + util.getRootName(util.getSimpleName(phylipfile));
182
183 map<string, string> variables;
184 variables["[filename]"] = fileroot;
185 variables["[clustertag]"] = tag;
186 string sabundFileName = getOutputFileName("sabund", variables);
187 string rabundFileName = getOutputFileName("rabund", variables);
188 //if (countfile != "") { variables["[tag2]"] = "unique_list"; }
189 string listFileName = getOutputFileName("list", variables);
190
191 if (countfile == "") {
192 util.openOutputFile(sabundFileName, sabundFile);
193 util.openOutputFile(rabundFileName, rabundFile);
194 outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
195 outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
196
197 }
198 util.openOutputFile(listFileName, listFile);
199 outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
200
201 float previousDist = 0.00000;
202 float rndPreviousDist = 0.00000;
203 oldRAbund = *rabund;
204 oldList = *list;
205 bool printHeaders = true;
206
207 int estart = time(NULL);
208
209 while ((cluster->getSmallDist() <= cutoff) && (cluster->getNSeqs() > 1)){
210 if (m->getControl_pressed()) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close(); util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); }
211 listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; }
212
213 cluster->update(cutoff);
214
215 float dist = cluster->getSmallDist();
216 float rndDist = util.ceilDist(dist, precision);
217
218 if(previousDist <= 0.0000 && dist != previousDist) { printData("unique", counts, printHeaders); }
219 else if(rndDist != rndPreviousDist) { printData(toString(rndPreviousDist, length-1), counts, printHeaders); }
220
221 previousDist = dist;
222 rndPreviousDist = rndDist;
223 oldRAbund = *rabund;
224 oldList = *list;
225 }
226
227 if(previousDist <= 0.0000) { printData("unique", counts, printHeaders); }
228 else if(rndPreviousDist<cutoff) { printData(toString(rndPreviousDist, length-1), counts, printHeaders); }
229
230 if (countfile == "") {
231 sabundFile.close();
232 rabundFile.close();
233 }
234 listFile.close();
235
236 delete cluster; delete list; delete rabund;
237
238 //set list file as new current listfile
239 string currentName = "";
240 itTypes = outputTypes.find("list");
241 if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } }
242
243 //set rabund file as new current rabundfile
244 itTypes = outputTypes.find("rabund");
245 if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } }
246
247 //set sabund file as new current sabundfile
248 itTypes = outputTypes.find("sabund");
249 if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } }
250
251 m->mothurOut("\nOutput File Names: \n");
252 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine();
253
254 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster\n");
255
256 return 0;
257 }
258 catch(exception& e) {
259 m->errorOut(e, "ClusterDoturCommand", "execute");
260 exit(1);
261 }
262 }
263
264 //**********************************************************************************************************************
265
printData(string label,map<string,int> & counts,bool & ph)266 void ClusterDoturCommand::printData(string label, map<string, int>& counts, bool& ph){
267 try {
268 oldList.setPrintedLabels(ph); ph = false;
269
270 oldRAbund.setLabel(label);
271 if (countfile == "") {
272 oldRAbund.print(rabundFile);
273 oldRAbund.getSAbundVector().print(sabundFile);
274 }
275
276 oldRAbund.getSAbundVector().print(cout);
277
278 oldList.setLabel(label);
279 if(countfile != "") {
280 oldList.print(listFile, counts);
281 }else {
282 oldList.print(listFile, true);
283 }
284 ph = false;
285 }
286 catch(exception& e) {
287 m->errorOut(e, "ClusterDoturCommand", "printData");
288 exit(1);
289 }
290 }
291 //**********************************************************************************************************************
292