1 /*
2  *  clusterdoturcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9 
10 #include "clusterdoturcommand.h"
11 #include "clusterclassic.h"
12 
13 //**********************************************************************************************************************
setParameters()14 vector<string> ClusterDoturCommand::setParameters(){
15 	try {
16 		CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pphylip);
17 		CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","rabund-sabund",false,false,true); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount);
19 		CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pcutoff);
20 		CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
21 		CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "","",false,false); parameters.push_back(pmethod);
22 		CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim);
23 		CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
24         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26 
27         abort = false; calledHelp = false;
28 
29         vector<string> tempOutNames;
30         outputTypes["list"] = tempOutNames;
31         outputTypes["rabund"] = tempOutNames;
32         outputTypes["sabund"] = tempOutNames;
33 
34 		vector<string> myArray;
35 		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
36 		return myArray;
37 	}
38 	catch(exception& e) {
39 		m->errorOut(e, "ClusterDoturCommand", "setParameters");
40 		exit(1);
41 	}
42 }
43 //**********************************************************************************************************************
getHelpString()44 string ClusterDoturCommand::getHelpString(){
45 	try {
46 		string helpString = "";
47 		helpString += "The cluster.classic command clusters using the algorithm from dotur. \n";
48 		helpString += "The cluster.classic command parameter options are phylip, name, count, method, cuttoff, sim, precision. Phylip is required, unless you have a valid current file.\n";
49 		helpString += "The cluster.classic command should be in the following format: \n";
50 		helpString += "cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
51 		helpString += "The acceptable cluster methods are furthest, nearest, weighted and average.  If no method is provided then average is assumed.\n";
52 		return helpString;
53 	}
54 	catch(exception& e) {
55 		m->errorOut(e, "ClusterDoturCommand", "getHelpString");
56 		exit(1);
57 	}
58 }
59 //**********************************************************************************************************************
getOutputPattern(string type)60 string ClusterDoturCommand::getOutputPattern(string type) {
61     try {
62         string pattern = "";
63 
64         if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
65         else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; }
66         else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
67         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true);  }
68 
69         return pattern;
70     }
71     catch(exception& e) {
72         m->errorOut(e, "ClusterDoturCommand", "getOutputPattern");
73         exit(1);
74     }
75 }
76 //**********************************************************************************************************************
77 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
ClusterDoturCommand(string option)78 ClusterDoturCommand::ClusterDoturCommand(string option) : Command()  {
79 	try{
80 		//allow user to run help
81 		if(option == "help") { help(); abort = true; calledHelp = true; }
82 		else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83         else if(option == "category") {  abort = true; calledHelp = true;  }
84 
85 		else {
86 			OptionParser parser(option, setParameters());
87 			map<string,string> parameters = parser.getParameters();
88 
89 			ValidParameters validParameter;
90 
91 
92 			//check for required parameters
93 			phylipfile = validParameter.validFile(parameters, "phylip");
94 			if (phylipfile == "not open") { abort = true; }
95 			else if (phylipfile == "not found") {
96 				phylipfile = current->getPhylipFile();
97 				if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); }
98 				else {
99 					m->mothurOut("You need to provide a phylip file with the cluster.classic command.\n");abort = true;
100 				}
101 			}else { current->setPhylipFile(phylipfile); }
102 
103 			//check for optional parameter and set defaults
104 			namefile = validParameter.validFile(parameters, "name");
105 			if (namefile == "not open") { abort = true; namefile = ""; }
106 			else if (namefile == "not found") { namefile = ""; }
107 			else { current->setNameFile(namefile); }
108 
109             countfile = validParameter.validFile(parameters, "count");
110 			if (countfile == "not open") { abort = true; countfile = ""; }
111 			else if (countfile == "not found") { countfile = ""; }
112 			else { current->setCountFile(countfile); }
113 
114             if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.classic command you must enter ONLY ONE of the following: count or name.\n");  abort = true; }
115 
116 			string temp;
117 			temp = validParameter.valid(parameters, "precision");
118 			if (temp == "not found") { temp = "100"; }
119 			//saves precision legnth for formatting below
120 			length = temp.length();
121 			util.mothurConvert(temp, precision);
122 
123 			temp = validParameter.valid(parameters, "cutoff");
124 			if (temp == "not found") { temp = "10"; }
125 			util.mothurConvert(temp, cutoff);
126 
127 			temp = validParameter.valid(parameters, "sim");				if (temp == "not found") { temp = "F"; }
128 			sim = util.isTrue(temp);
129 
130 			method = validParameter.valid(parameters, "method");
131 			if (method == "not found") { method = "average"; }
132 
133 			if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) {
134 				if (method == "furthest") { tag = "fn"; }
135 				else if (method == "nearest") { tag = "nn"; }
136 				else if (method == "average") { tag = "an"; }
137 				else if (method == "weighted") { tag = "wn"; }
138 			}else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest, average, weighted.\n");  abort = true; }
139 		}
140 	}
141 	catch(exception& e) {
142 		m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
143 		exit(1);
144 	}
145 }
146 //**********************************************************************************************************************
147 
execute()148 int ClusterDoturCommand::execute(){
149 	try {
150 
151 		if (abort) { if (calledHelp) { return 0; }  return 2;	}
152 
153 
154         ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
155 
156         NameAssignment* nameMap = NULL;
157         CountTable* ct = NULL;
158         map<string, int> counts;
159         if(namefile != "") {
160 			nameMap = new NameAssignment(namefile);
161 			nameMap->readMap();
162             cluster->readPhylipFile(phylipfile, nameMap);
163             delete nameMap;
164 		}else if (countfile != "") {
165             ct = new CountTable();
166             ct->readTable(countfile, false, false);
167             cluster->readPhylipFile(phylipfile, ct);
168             counts = ct->getNameMap();
169             delete ct;
170         }else {
171             cluster->readPhylipFile(phylipfile, nameMap);
172         }
173         tag = cluster->getTag();
174 
175 		if (m->getControl_pressed()) { delete cluster; return 0; }
176 
177 		list = cluster->getListVector();
178 		rabund = cluster->getRAbundVector();
179 
180 		if (outputdir == "") { outputdir += util.hasPath(phylipfile); }
181 		fileroot = outputdir + util.getRootName(util.getSimpleName(phylipfile));
182 
183         map<string, string> variables;
184         variables["[filename]"] = fileroot;
185         variables["[clustertag]"] = tag;
186         string sabundFileName = getOutputFileName("sabund", variables);
187         string rabundFileName = getOutputFileName("rabund", variables);
188         //if (countfile != "") { variables["[tag2]"] = "unique_list"; }
189         string listFileName = getOutputFileName("list", variables);
190 
191         if (countfile == "") {
192             util.openOutputFile(sabundFileName,	sabundFile);
193             util.openOutputFile(rabundFileName,	rabundFile);
194             outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
195             outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
196 
197         }
198 		util.openOutputFile(listFileName,	listFile);
199         outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
200 
201 		float previousDist = 0.00000;
202 		float rndPreviousDist = 0.00000;
203 		oldRAbund = *rabund;
204 		oldList = *list;
205         bool printHeaders = true;
206 
207         int estart = time(NULL);
208 
209 		while ((cluster->getSmallDist() <= cutoff) && (cluster->getNSeqs() > 1)){
210 			if (m->getControl_pressed()) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close();  util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); }
211                 listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear();  return 0;  }
212 
213 			cluster->update(cutoff);
214 
215 			float dist = cluster->getSmallDist();
216 			float rndDist = util.ceilDist(dist, precision);
217 
218 			if(previousDist <= 0.0000 && dist != previousDist)  { printData("unique", counts, printHeaders);                                }
219 			else if(rndDist != rndPreviousDist)                 { printData(toString(rndPreviousDist,  length-1), counts, printHeaders);    }
220 
221 			previousDist = dist;
222 			rndPreviousDist = rndDist;
223 			oldRAbund = *rabund;
224 			oldList = *list;
225 		}
226 
227 		if(previousDist <= 0.0000)          { printData("unique", counts, printHeaders);                            }
228 		else if(rndPreviousDist<cutoff)     { printData(toString(rndPreviousDist, length-1), counts, printHeaders); }
229 
230         if (countfile == "") {
231             sabundFile.close();
232             rabundFile.close();
233         }
234 		listFile.close();
235 
236 		delete cluster;  delete list; delete rabund;
237 
238 		//set list file as new current listfile
239 		string currentName = "";
240 		itTypes = outputTypes.find("list");
241 		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } }
242 
243 		//set rabund file as new current rabundfile
244 		itTypes = outputTypes.find("rabund");
245 		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } }
246 
247 		//set sabund file as new current sabundfile
248 		itTypes = outputTypes.find("sabund");
249 		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } }
250 
251 		m->mothurOut("\nOutput File Names: \n");
252 		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();
253 
254 		m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster\n");
255 
256 		return 0;
257 	}
258 	catch(exception& e) {
259 		m->errorOut(e, "ClusterDoturCommand", "execute");
260 		exit(1);
261 	}
262 }
263 
264 //**********************************************************************************************************************
265 
printData(string label,map<string,int> & counts,bool & ph)266 void ClusterDoturCommand::printData(string label, map<string, int>& counts, bool& ph){
267 	try {
268         oldList.setPrintedLabels(ph); ph = false;
269 
270         oldRAbund.setLabel(label);
271         if (countfile == "") {
272             oldRAbund.print(rabundFile);
273             oldRAbund.getSAbundVector().print(sabundFile);
274         }
275 
276 		oldRAbund.getSAbundVector().print(cout);
277 
278 		oldList.setLabel(label);
279         if(countfile != "") {
280             oldList.print(listFile, counts);
281         }else {
282             oldList.print(listFile, true);
283         }
284         ph = false;
285 	}
286 	catch(exception& e) {
287 		m->errorOut(e, "ClusterDoturCommand", "printData");
288 		exit(1);
289 	}
290 }
291 //**********************************************************************************************************************
292