1 /*
2  *  chimeraccodecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 3/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9 
10 #include "chimeraccodecommand.h"
11 #include "ccode.h"
12 
13 //**********************************************************************************************************************
setParameters()14 vector<string> ChimeraCcodeCommand::setParameters(){
15 	try {
16 		CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
17 		CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-mapinfo-accnos",false,true,true); parameters.push_back(pfasta);
18 		CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter);
19 		CommandParameter pwindow("window", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pwindow);
20 		CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pnumwanted);
21 		CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask);
22 		CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
23         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24 		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25 
26         abort = false; calledHelp = false;
27 
28         vector<string> tempOutNames;
29         outputTypes["chimera"] = tempOutNames;
30         outputTypes["mapinfo"] = tempOutNames;
31         outputTypes["accnos"] = tempOutNames;
32 
33 		vector<string> myArray;
34 		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
35 		return myArray;
36 	}
37 	catch(exception& e) {
38 		m->errorOut(e, "ChimeraCcodeCommand", "setParameters");
39 		exit(1);
40 	}
41 }
42 //**********************************************************************************************************************
getHelpString()43 string ChimeraCcodeCommand::getHelpString(){
44 	try {
45 		string helpString = "";
46 		helpString += "The chimera.ccode command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
47 		helpString += "This command was created using the algorithms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n";
48 		helpString += "The chimera.ccode command parameters are fasta, reference, filter, mask, processors, window and numwanted.\n";
49 		helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
50 		helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
51 		helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
52 		helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
53 		helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
54 		helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
55 		helpString += "The chimera.ccode command should be in the following format: \n";
56 		helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
57 		helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
58 
59 		return helpString;
60 	}
61 	catch(exception& e) {
62 		m->errorOut(e, "ChimeraCcodeCommand", "getHelpString");
63 		exit(1);
64 	}
65 }
66 //**********************************************************************************************************************
getOutputPattern(string type)67 string ChimeraCcodeCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70 
71         if (type == "chimera") {  pattern = "[filename],[tag],ccode.chimeras-[filename],ccode.chimeras"; }
72         else if (type == "accnos") {  pattern = "[filename],[tag],ccode.accnos-[filename],ccode.accnos"; }
73         else if (type == "mapinfo") {  pattern =  "[filename],mapinfo"; }
74         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true);  }
75 
76         return pattern;
77     }
78     catch(exception& e) {
79         m->errorOut(e, "ChimeraCcodeCommand", "getOutputPattern");
80         exit(1);
81     }
82 }
83 //***************************************************************************************************************
ChimeraCcodeCommand(string option)84 ChimeraCcodeCommand::ChimeraCcodeCommand(string option) : Command()  {
85 	try {
86 		//allow user to run help
87 		if(option == "help") { help(); abort = true; calledHelp = true; }
88 		else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89         else if(option == "category") {  abort = true; calledHelp = true;  }
90 
91 		else {
92 			OptionParser parser(option, setParameters());
93 			map<string,string> parameters = parser.getParameters();
94 
95             ValidParameters validParameter;
96             fastafile = validParameter.validFile(parameters, "fasta");
97             if (fastafile == "not found") {
98                 fastafile = current->getFastaFile();
99                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); }
100                 else { 	m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n");  abort = true; }
101             }
102             else if (fastafile == "not open") { abort = true; }
103             else { current->setFastaFile(fastafile); }
104 
105 			maskfile = validParameter.validPath(parameters, "mask");
106 			if (maskfile == "not found") { maskfile = "";  }
107 			else if (maskfile != "default")  {
108 				ifstream in;
109 				bool ableToOpen = util.openInputFile(maskfile, in);
110 				if (!ableToOpen) { abort = true; }
111 				in.close();
112             }else if (maskfile == "default") {  m->mothurOut("[NOTE]: Using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013.\n");    }
113 
114 			string temp;
115 			temp = validParameter.valid(parameters, "filter");			if (temp == "not found") { temp = "F"; }
116 			filter = util.isTrue(temp);
117 
118 			temp = validParameter.valid(parameters, "window");			if (temp == "not found") { temp = "0"; }
119 			util.mothurConvert(temp, window);
120 
121 			temp = validParameter.valid(parameters, "numwanted");		if (temp == "not found") { temp = "20"; }
122 			util.mothurConvert(temp, numwanted);
123 
124 			//this has to go after save so that if the user sets save=t and provides no reference we abort
125 			templatefile = validParameter.validFile(parameters, "reference");
126 			if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true;
127 			}else if (templatefile == "not open") { abort = true; }
128 
129 		}
130 	}
131 	catch(exception& e) {
132 		m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand");
133 		exit(1);
134 	}
135 }
136 //***************************************************************************************************************
execute()137 int ChimeraCcodeCommand::execute(){
138 	try{
139 
140 		if (abort) { if (calledHelp) { return 0; }  return 2;	}
141 
142         m->mothurOut("Checking sequences from " + fastafile + " ...\n" );
143 
144         long start = time(NULL);
145 
146         if (outputdir == "") { outputdir = util.hasPath(fastafile);  }
147         string outputFileName, accnosFileName;
148         map<string, string> variables;
149         variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile));
150         string mapInfo = getOutputFileName("mapinfo", variables);
151         if (maskfile != "") { variables["[tag]"] = maskfile; }
152         outputFileName = getOutputFileName("chimera", variables);
153         accnosFileName = getOutputFileName("accnos", variables);
154 
155         if (m->getControl_pressed()) {  return 0;	}
156 
157         numSeqs = driver(outputFileName, fastafile, accnosFileName);
158 
159         if (m->getControl_pressed()) { util.mothurRemove(outputFileName); util.mothurRemove(accnosFileName);  return 0; }
160 
161         ofstream outHeader;
162         string tempHeader = outputdir + util.getRootName(util.getSimpleName(fastafile)) + maskfile + "ccode.chimeras.tempHeader";
163         util.openOutputFile(tempHeader, outHeader); outHeader << "For full window mapping info refer to " << mapInfo << endl << endl; outHeader.close();
164 
165         util.appendFiles(outputFileName, tempHeader);
166         util.mothurRemove(outputFileName);
167         rename(tempHeader.c_str(), outputFileName.c_str());
168 
169         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
170         outputNames.push_back(mapInfo);	outputTypes["mapinfo"].push_back(mapInfo);
171         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
172 
173         m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.\n");
174 
175 		//set accnos file as new current accnosfile
176 		string currentName = "";
177 		itTypes = outputTypes.find("accnos");
178 		if (itTypes != outputTypes.end()) {
179 			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); }
180 		}
181 
182 		m->mothurOut("\nOutput File Names: \n");
183 		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
184 		m->mothurOutEndLine();
185 
186 		return 0;
187 
188 	}
189 	catch(exception& e) {
190 		m->errorOut(e, "ChimeraCcodeCommand", "execute");
191 		exit(1);
192 	}
193 }
194 //**********************************************************************************************************************
195 
driver(string outputFName,string filename,string accnos)196 int ChimeraCcodeCommand::driver(string outputFName, string filename, string accnos){
197 	try {
198         MothurChimera* chimera = new Ccode(fastafile, templatefile, filter, maskfile, window, numwanted, outputdir);
199 
200         //is your template aligned?
201         if (chimera->getUnaligned()) { m->mothurOut("[ERROR]: Your reference sequences are unaligned, please correct.\n");  delete chimera; return 0; }
202         templateSeqsLength = chimera->getLength();
203 
204 		ofstream out;
205 		util.openOutputFile(outputFName, out);
206 
207 		ofstream out2;
208 		util.openOutputFile(accnos, out2);
209 
210 		ifstream inFASTA;
211 		util.openInputFile(filename, inFASTA);
212 
213 		int count = 0;
214 		while (!inFASTA.eof()) {
215 
216             if (m->getControl_pressed()) {	count = 1; break;	}
217 
218 			Sequence* candidateSeq = new Sequence(inFASTA);  util.gobble(inFASTA);
219 
220 			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
221 
222 				if (candidateSeq->getAligned().length() != templateSeqsLength) {
223 					m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping.\n");
224 				}else{
225 					//find chimeras
226 					chimera->getChimeras(candidateSeq);
227 
228 					if (m->getControl_pressed()) {	delete candidateSeq; return 1;	}
229 
230 					//print results
231 					chimera->print(out, out2);
232 				}
233 				count++;
234 			}
235 			delete candidateSeq;
236 
237 
238 			//report progress
239 			if((count) % 100 == 0){	m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");		}
240 		}
241 		//report progress
242 		if((count) % 100 != 0){	m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");	}
243 
244 		out.close();
245 		out2.close();
246 		inFASTA.close();
247         delete chimera;
248 
249 		return count;
250 	}
251 	catch(exception& e) {
252 		m->errorOut(e, "ChimeraCcodeCommand", "driver");
253 		exit(1);
254 	}
255 }
256 //**********************************************************************************************************************
257 
258