1 /*
2 * chimeraccodecommand.cpp
3 * Mothur
4 *
5 * Created by westcott on 3/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
7 *
8 */
9
10 #include "chimeraccodecommand.h"
11 #include "ccode.h"
12
13 //**********************************************************************************************************************
setParameters()14 vector<string> ChimeraCcodeCommand::setParameters(){
15 try {
16 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-mapinfo-accnos",false,true,true); parameters.push_back(pfasta);
18 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter);
19 CommandParameter pwindow("window", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pwindow);
20 CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pnumwanted);
21 CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask);
22 CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25
26 abort = false; calledHelp = false;
27
28 vector<string> tempOutNames;
29 outputTypes["chimera"] = tempOutNames;
30 outputTypes["mapinfo"] = tempOutNames;
31 outputTypes["accnos"] = tempOutNames;
32
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 return myArray;
36 }
37 catch(exception& e) {
38 m->errorOut(e, "ChimeraCcodeCommand", "setParameters");
39 exit(1);
40 }
41 }
42 //**********************************************************************************************************************
getHelpString()43 string ChimeraCcodeCommand::getHelpString(){
44 try {
45 string helpString = "";
46 helpString += "The chimera.ccode command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
47 helpString += "This command was created using the algorithms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n";
48 helpString += "The chimera.ccode command parameters are fasta, reference, filter, mask, processors, window and numwanted.\n";
49 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
50 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
51 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
52 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
53 helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
54 helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
55 helpString += "The chimera.ccode command should be in the following format: \n";
56 helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
57 helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
58
59 return helpString;
60 }
61 catch(exception& e) {
62 m->errorOut(e, "ChimeraCcodeCommand", "getHelpString");
63 exit(1);
64 }
65 }
66 //**********************************************************************************************************************
getOutputPattern(string type)67 string ChimeraCcodeCommand::getOutputPattern(string type) {
68 try {
69 string pattern = "";
70
71 if (type == "chimera") { pattern = "[filename],[tag],ccode.chimeras-[filename],ccode.chimeras"; }
72 else if (type == "accnos") { pattern = "[filename],[tag],ccode.accnos-[filename],ccode.accnos"; }
73 else if (type == "mapinfo") { pattern = "[filename],mapinfo"; }
74 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); }
75
76 return pattern;
77 }
78 catch(exception& e) {
79 m->errorOut(e, "ChimeraCcodeCommand", "getOutputPattern");
80 exit(1);
81 }
82 }
83 //***************************************************************************************************************
ChimeraCcodeCommand(string option)84 ChimeraCcodeCommand::ChimeraCcodeCommand(string option) : Command() {
85 try {
86 //allow user to run help
87 if(option == "help") { help(); abort = true; calledHelp = true; }
88 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89 else if(option == "category") { abort = true; calledHelp = true; }
90
91 else {
92 OptionParser parser(option, setParameters());
93 map<string,string> parameters = parser.getParameters();
94
95 ValidParameters validParameter;
96 fastafile = validParameter.validFile(parameters, "fasta");
97 if (fastafile == "not found") {
98 fastafile = current->getFastaFile();
99 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); }
100 else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; }
101 }
102 else if (fastafile == "not open") { abort = true; }
103 else { current->setFastaFile(fastafile); }
104
105 maskfile = validParameter.validPath(parameters, "mask");
106 if (maskfile == "not found") { maskfile = ""; }
107 else if (maskfile != "default") {
108 ifstream in;
109 bool ableToOpen = util.openInputFile(maskfile, in);
110 if (!ableToOpen) { abort = true; }
111 in.close();
112 }else if (maskfile == "default") { m->mothurOut("[NOTE]: Using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013.\n"); }
113
114 string temp;
115 temp = validParameter.valid(parameters, "filter"); if (temp == "not found") { temp = "F"; }
116 filter = util.isTrue(temp);
117
118 temp = validParameter.valid(parameters, "window"); if (temp == "not found") { temp = "0"; }
119 util.mothurConvert(temp, window);
120
121 temp = validParameter.valid(parameters, "numwanted"); if (temp == "not found") { temp = "20"; }
122 util.mothurConvert(temp, numwanted);
123
124 //this has to go after save so that if the user sets save=t and provides no reference we abort
125 templatefile = validParameter.validFile(parameters, "reference");
126 if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true;
127 }else if (templatefile == "not open") { abort = true; }
128
129 }
130 }
131 catch(exception& e) {
132 m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand");
133 exit(1);
134 }
135 }
136 //***************************************************************************************************************
execute()137 int ChimeraCcodeCommand::execute(){
138 try{
139
140 if (abort) { if (calledHelp) { return 0; } return 2; }
141
142 m->mothurOut("Checking sequences from " + fastafile + " ...\n" );
143
144 long start = time(NULL);
145
146 if (outputdir == "") { outputdir = util.hasPath(fastafile); }
147 string outputFileName, accnosFileName;
148 map<string, string> variables;
149 variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile));
150 string mapInfo = getOutputFileName("mapinfo", variables);
151 if (maskfile != "") { variables["[tag]"] = maskfile; }
152 outputFileName = getOutputFileName("chimera", variables);
153 accnosFileName = getOutputFileName("accnos", variables);
154
155 if (m->getControl_pressed()) { return 0; }
156
157 numSeqs = driver(outputFileName, fastafile, accnosFileName);
158
159 if (m->getControl_pressed()) { util.mothurRemove(outputFileName); util.mothurRemove(accnosFileName); return 0; }
160
161 ofstream outHeader;
162 string tempHeader = outputdir + util.getRootName(util.getSimpleName(fastafile)) + maskfile + "ccode.chimeras.tempHeader";
163 util.openOutputFile(tempHeader, outHeader); outHeader << "For full window mapping info refer to " << mapInfo << endl << endl; outHeader.close();
164
165 util.appendFiles(outputFileName, tempHeader);
166 util.mothurRemove(outputFileName);
167 rename(tempHeader.c_str(), outputFileName.c_str());
168
169 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
170 outputNames.push_back(mapInfo); outputTypes["mapinfo"].push_back(mapInfo);
171 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
172
173 m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.\n");
174
175 //set accnos file as new current accnosfile
176 string currentName = "";
177 itTypes = outputTypes.find("accnos");
178 if (itTypes != outputTypes.end()) {
179 if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); }
180 }
181
182 m->mothurOut("\nOutput File Names: \n");
183 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
184 m->mothurOutEndLine();
185
186 return 0;
187
188 }
189 catch(exception& e) {
190 m->errorOut(e, "ChimeraCcodeCommand", "execute");
191 exit(1);
192 }
193 }
194 //**********************************************************************************************************************
195
driver(string outputFName,string filename,string accnos)196 int ChimeraCcodeCommand::driver(string outputFName, string filename, string accnos){
197 try {
198 MothurChimera* chimera = new Ccode(fastafile, templatefile, filter, maskfile, window, numwanted, outputdir);
199
200 //is your template aligned?
201 if (chimera->getUnaligned()) { m->mothurOut("[ERROR]: Your reference sequences are unaligned, please correct.\n"); delete chimera; return 0; }
202 templateSeqsLength = chimera->getLength();
203
204 ofstream out;
205 util.openOutputFile(outputFName, out);
206
207 ofstream out2;
208 util.openOutputFile(accnos, out2);
209
210 ifstream inFASTA;
211 util.openInputFile(filename, inFASTA);
212
213 int count = 0;
214 while (!inFASTA.eof()) {
215
216 if (m->getControl_pressed()) { count = 1; break; }
217
218 Sequence* candidateSeq = new Sequence(inFASTA); util.gobble(inFASTA);
219
220 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
221
222 if (candidateSeq->getAligned().length() != templateSeqsLength) {
223 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping.\n");
224 }else{
225 //find chimeras
226 chimera->getChimeras(candidateSeq);
227
228 if (m->getControl_pressed()) { delete candidateSeq; return 1; }
229
230 //print results
231 chimera->print(out, out2);
232 }
233 count++;
234 }
235 delete candidateSeq;
236
237
238 //report progress
239 if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); }
240 }
241 //report progress
242 if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); }
243
244 out.close();
245 out2.close();
246 inFASTA.close();
247 delete chimera;
248
249 return count;
250 }
251 catch(exception& e) {
252 m->errorOut(e, "ChimeraCcodeCommand", "driver");
253 exit(1);
254 }
255 }
256 //**********************************************************************************************************************
257
258