1 /*
2  *  secondarystructurecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/18/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9 
10 #include "aligncheckcommand.h"
11 #include "sequence.hpp"
12 #include "counttable.h"
13 
14 //**********************************************************************************************************************
setParameters()15 vector<string> AlignCheckCommand::setParameters(){
16 	try {
17 		CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","aligncheck",false,true,true); parameters.push_back(pfasta);
18 		CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pmap);
19 		CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pcount);
21         CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
22         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23 		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 
25         abort = false; calledHelp = false;    haderror = 0;
26 
27         vector<string> tempOutNames;
28         outputTypes["aligncheck"] = tempOutNames;
29 
30 		vector<string> myArray;
31 		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
32 		return myArray;
33 	}
34 	catch(exception& e) {
35 		m->errorOut(e, "AlignCheckCommand", "setParameters");
36 		exit(1);
37 	}
38 }
39 //**********************************************************************************************************************
getHelpString()40 string AlignCheckCommand::getHelpString(){
41 	try {
42 		string helpString = "";
43 		helpString += "The align.check command reads a fasta file and map file as well as an optional name or count file.\n";
44 		helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
45 		helpString += "The align.check command parameters are fasta and map, both are required.\n";
46 		helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
47 		helpString += "Example align.check(map=silva.ss.map, fasta=amazon.fasta).\n";
48 		;
49 		return helpString;
50 	}
51 	catch(exception& e) {
52 		m->errorOut(e, "AlignCheckCommand", "getHelpString");
53 		exit(1);
54 	}
55 }
56 //**********************************************************************************************************************
getOutputPattern(string type)57 string AlignCheckCommand::getOutputPattern(string type) {
58     try {
59         string pattern = "";
60 
61         if (type == "aligncheck") {  pattern = "[filename],align.check"; }
62         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true);  }
63 
64         return pattern;
65     }
66     catch(exception& e) {
67         m->errorOut(e, "AlignCheckCommand", "getOutputPattern");
68         exit(1);
69     }
70 }
71 //**********************************************************************************************************************
72 
AlignCheckCommand(string option)73 AlignCheckCommand::AlignCheckCommand(string option) : Command()  {
74 	try {
75 		if(option == "help") { help(); abort = true; calledHelp = true; }
76 		else if(option == "citation") { citation(); abort = true; calledHelp = true;}
77         else if(option == "category") {  abort = true; calledHelp = true;  }
78 
79 		else {
80 			OptionParser parser(option, setParameters());
81 			map<string,string> parameters = parser.getParameters();
82 
83 			ValidParameters validParameter;
84 			mapfile = validParameter.validFile(parameters, "map");
85 			if (mapfile == "not open") { abort = true; }
86 			else if (mapfile == "not found") {  mapfile = "";  m->mothurOut("You must provide an map file.\n");  abort = true; }
87 
88 			fastafile = validParameter.validFile(parameters, "fasta");
89 			if (fastafile == "not open") { fastafile = ""; abort = true; }
90 			else if (fastafile == "not found") {
91 				fastafile = current->getFastaFile();
92 				if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n");  }
93 				else { 	m->mothurOut("[ERROR]: You have no current fastafile and the fasta parameter is required.\n"); abort = true; }
94 			}else { current->setFastaFile(fastafile); }
95 
96 			namefile = validParameter.validFile(parameters, "name");
97 			if (namefile == "not open") { namefile = ""; abort = true; }
98 			else if (namefile == "not found") { namefile = "";  }
99 			else { current->setNameFile(namefile); }
100 
101             countfile = validParameter.validFile(parameters, "count");
102 			if (countfile == "not open") { abort = true; countfile = ""; }
103 			else if (countfile == "not found") { countfile = ""; }
104 			else { current->setCountFile(countfile); }
105 
106             if ((countfile != "") && (namefile != "")) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n");  abort = true; }
107 
108             if (outputdir == "") {
109                 outputdir += util.hasPath(fastafile);
110             }
111 
112             if (countfile == "") {
113                 if ((namefile == "") && (fastafile != "")){
114                     vector<string> files; files.push_back(fastafile);
115                     if (!current->getMothurCalling())  {  parser.getNameFile(files);  }
116                 }
117             }
118 		}
119 
120 	}
121 	catch(exception& e) {
122 		m->errorOut(e, "AlignCheckCommand", "AlignCheckCommand");
123 		exit(1);
124 	}
125 }
126 //**********************************************************************************************************************
127 
execute()128 int AlignCheckCommand::execute(){
129 	try {
130 
131 		if (abort) { if (calledHelp) { return 0; }  return 2;	}
132 
133 		//get secondary structure info.
134 		readMap();
135 
136 		if (namefile != "") { nameMap = util.readNames(namefile); }
137         else if (countfile != "") {
138             CountTable ct;
139             ct.readTable(countfile, false, false);
140             nameMap = ct.getNameMap();
141         }
142 
143 		if (m->getControl_pressed()) { return 0; }
144 
145 		ifstream in;
146 		util.openInputFile(fastafile, in);
147 
148 		ofstream out;
149         map<string, string> variables;
150 		variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile));
151 		string outfile = getOutputFileName("aligncheck",variables);
152 		util.openOutputFile(outfile, out);
153 
154 
155 		out << "name" << '\t' << "pound" << '\t' << "dash" << '\t' << "plus" << '\t' << "equal" << '\t';
156 		out << "loop" << '\t' << "tilde" << '\t' << "total"  << '\t' << "numseqs" << endl;
157 
158 		vector<int> pound;
159 		vector<int> dash;
160 		vector<int> plus;
161 		vector<int> equal;
162 		vector<int> loop;
163 		vector<int> tilde;
164 		vector<int> total;
165 
166 		int count = 0;
167 		while(!in.eof()){
168 			if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outfile); return 0; }
169 
170 			Sequence seq(in);  util.gobble(in);
171 			if (seq.getName() != "") {
172 				statData data = getStats(seq.getAligned());
173 
174 				if (haderror == 1) { m->setControl_pressed(true); break; }
175 
176 				int num = 1;
177 				if ((namefile != "") || (countfile != "")) {
178 					//make sure this sequence is in the namefile, else error
179 					map<string, int>::iterator it = nameMap.find(seq.getName());
180 
181 					if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + seq.getName() + " is not in your namefile, please correct.\n");  m->setControl_pressed(true); }
182 					else { num = it->second; }
183 				}
184 
185 				//for each sequence this sequence represents
186 				for (int i = 0; i < num; i++) {
187 					pound.push_back(data.pound);
188 					dash.push_back(data.dash);
189 					plus.push_back(data.plus);
190 					equal.push_back(data.equal);
191 					loop.push_back(data.loop);
192 					tilde.push_back(data.tilde);
193 					total.push_back(data.total);
194 				}
195 				count++;
196 
197 				out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
198 				out << data.loop << '\t' << data.tilde << '\t' << data.total << '\t' << num << endl;
199 			}
200 		}
201 
202 		in.close();
203 		out.close();
204 
205 		if (m->getControl_pressed()) {  util.mothurRemove(outfile); return 0; }
206 
207 		sort(pound.begin(), pound.end());
208 		sort(dash.begin(), dash.end());
209 		sort(plus.begin(), plus.end());
210 		sort(equal.begin(), equal.end());
211 		sort(loop.begin(), loop.end());
212 		sort(tilde.begin(), tilde.end());
213 		sort(total.begin(), total.end());
214 		int size = pound.size();
215 
216 		int ptile0_25	= int(size * 0.025);
217 		int ptile25		= int(size * 0.250);
218 		int ptile50		= int(size * 0.500);
219 		int ptile75		= int(size * 0.750);
220 		int ptile97_5	= int(size * 0.975);
221 		int ptile100	= size - 1;
222 
223 		if (m->getControl_pressed()) {  util.mothurRemove(outfile); return 0; }
224 
225 		m->mothurOut("\n\t\tPound\tDash\tPlus\tEqual\tLoop\tTilde\tTotal\n");
226 		m->mothurOut("Minimum:\t" + toString(pound[0]) + "\t" + toString(dash[0]) + "\t" + toString(plus[0]) + "\t" + toString(equal[0]) + "\t" + toString(loop[0]) + "\t" + toString(tilde[0]) + "\t" + toString(total[0])+ "\n");
227 		m->mothurOut("2.5%-tile:\t" + toString(pound[ptile0_25]) + "\t" + toString(dash[ptile0_25]) + "\t" + toString(plus[ptile0_25]) + "\t" + toString(equal[ptile0_25]) + "\t"+ toString(loop[ptile0_25]) + "\t"+ toString(tilde[ptile0_25]) + "\t"+ toString(total[ptile0_25])+ "\n");
228 		m->mothurOut("25%-tile:\t" + toString(pound[ptile25]) + "\t" + toString(dash[ptile25]) + "\t" + toString(plus[ptile25]) + "\t" + toString(equal[ptile25]) + "\t" + toString(loop[ptile25]) + "\t" + toString(tilde[ptile25]) + "\t" + toString(total[ptile25])+ "\n");
229 		m->mothurOut("Median: \t" + toString(pound[ptile50]) + "\t" + toString(dash[ptile50]) + "\t" + toString(plus[ptile50]) + "\t" + toString(equal[ptile50]) + "\t" + toString(loop[ptile50]) + "\t" + toString(tilde[ptile50]) + "\t" + toString(total[ptile50])+ "\n");
230 		m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])+ "\n");
231 		m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])+ "\n");
232 		m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])+ "\n");
233 		if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(count)+ "\n"); }
234 		else { m->mothurOut("# of unique seqs:\t" + toString(count)+ "\n"); m->mothurOut("total # of seqs:\t" + toString(size)+ "\n"); }
235 
236 
237 		m->mothurOut("\nOutput File Names: \n");
238 		m->mothurOut(outfile); m->mothurOutEndLine();	outputNames.push_back(outfile); outputTypes["aligncheck"].push_back(outfile);
239 		m->mothurOutEndLine();
240 
241 		return 0;
242 	}
243 
244 	catch(exception& e) {
245 		m->errorOut(e, "AlignCheckCommand", "execute");
246 		exit(1);
247 	}
248 }
249 //**********************************************************************************************************************
readMap()250 void AlignCheckCommand::readMap(){
251 	try {
252 
253 		structMap.resize(1, 0);
254 		ifstream in;
255 
256 		util.openInputFile(mapfile, in);
257 
258 		while(!in.eof()){
259 			int position;
260 			in >> position;
261 			structMap.push_back(position);
262 			util.gobble(in);
263 		}
264 		in.close();
265 
266 		seqLength = structMap.size();
267 
268 
269 		//check you make sure is structMap[10] = 380 then structMap[380] = 10.
270 		for(int i=0;i<seqLength;i++){
271 			if(structMap[i] != 0){
272 				if(structMap[structMap[i]] != i){
273 					m->mothurOut("Your map file contains an error:  line " + toString(i) + " does not match line " + toString(structMap[i]) + ".\n");
274 				}
275 			}
276 		}
277 
278 
279 	}
280 	catch(exception& e) {
281 		m->errorOut(e, "AlignCheckCommand", "readMap");
282 		exit(1);
283 	}
284 }
285 /**************************************************************************************************/
286 
getStats(string sequence)287 statData AlignCheckCommand::getStats(string sequence){
288 	try {
289 
290 		statData data;
291 		sequence = "*" + sequence; // need to pad the sequence so we can index it by 1
292 
293 		int length = sequence.length();
294 
295 		if (length != seqLength) { m->mothurOut("your sequences are " + toString(length) + " long, but your map file only contains " + toString(seqLength) + " entries. please correct.\n");  haderror = 1; return data;  }
296 
297 		for(int i=1;i<length;i++){
298 			if(structMap[i] != 0){
299 				if(sequence[i] == 'A'){
300 					if(sequence[structMap[i]] == 'T')		{	data.tilde++;	}
301 					else if(sequence[structMap[i]] == 'A')	{	data.pound++;	}
302 					else if(sequence[structMap[i]] == 'G')	{	data.equal++;	}
303 					else if(sequence[structMap[i]] == 'C')	{	data.pound++;	}
304 					else if(sequence[structMap[i]] == '-')	{	data.pound++;	}
305 					data.total++;
306 				}
307 				else if(sequence[i] == 'T'){
308 					if(sequence[structMap[i]] == 'T')		{	data.plus++;	}
309 					else if(sequence[structMap[i]] == 'A')	{	data.tilde++;	}
310 					else if(sequence[structMap[i]] == 'G')	{	data.dash++;	}
311 					else if(sequence[structMap[i]] == 'C')	{	data.pound++;	}
312 					else if(sequence[structMap[i]] == '-')	{	data.pound++;	}
313 					data.total++;
314 				}
315 				else if(sequence[i] == 'G'){
316 					if(sequence[structMap[i]] == 'T')		{	data.dash++;	}
317 					else if(sequence[structMap[i]] == 'A')	{	data.equal++;	}
318 					else if(sequence[structMap[i]] == 'G')	{	data.pound++;	}
319 					else if(sequence[structMap[i]] == 'C')	{	data.tilde++;	}
320 					else if(sequence[structMap[i]] == '-')	{	data.pound++;	}
321 					data.total++;
322 				}
323 				else if(sequence[i] == 'C'){
324 					if(sequence[structMap[i]] == 'T')		{	data.pound++;	}
325 					else if(sequence[structMap[i]] == 'A')	{	data.pound++;	}
326 					else if(sequence[structMap[i]] == 'G')	{	data.tilde++;	}
327 					else if(sequence[structMap[i]] == 'C')	{	data.pound++;	}
328 					else if(sequence[structMap[i]] == '-')	{	data.pound++;	}
329 					data.total++;
330 				}
331 				else if(sequence[i] == '-'){
332 					if(sequence[structMap[i]] == 'T')		{	data.pound++;	data.total++;	}
333 					else if(sequence[structMap[i]] == 'A')	{	data.pound++;	data.total++;	}
334 					else if(sequence[structMap[i]] == 'G')	{	data.pound++;	data.total++;	}
335 					else if(sequence[structMap[i]] == 'C')	{	data.pound++;	data.total++;	}
336 					else if(sequence[structMap[i]] == '-')	{		/*donothing*/				}
337 				}
338 			}
339 			else if(isalnum(sequence[i])){
340 				data.loop++;
341 				data.total++;
342 			}
343 		}
344 		return data;
345 
346 	}
347 	catch(exception& e) {
348 		m->errorOut(e, "AlignCheckCommand", "getStats");
349 		exit(1);
350 	}
351 }
352 //**********************************************************************************************************************
353