1 /*
2 * secondarystructurecommand.cpp
3 * Mothur
4 *
5 * Created by westcott on 9/18/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
7 *
8 */
9
10 #include "aligncheckcommand.h"
11 #include "sequence.hpp"
12 #include "counttable.h"
13
14 //**********************************************************************************************************************
setParameters()15 vector<string> AlignCheckCommand::setParameters(){
16 try {
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","aligncheck",false,true,true); parameters.push_back(pfasta);
18 CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pmap);
19 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pcount);
21 CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24
25 abort = false; calledHelp = false; haderror = 0;
26
27 vector<string> tempOutNames;
28 outputTypes["aligncheck"] = tempOutNames;
29
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 return myArray;
33 }
34 catch(exception& e) {
35 m->errorOut(e, "AlignCheckCommand", "setParameters");
36 exit(1);
37 }
38 }
39 //**********************************************************************************************************************
getHelpString()40 string AlignCheckCommand::getHelpString(){
41 try {
42 string helpString = "";
43 helpString += "The align.check command reads a fasta file and map file as well as an optional name or count file.\n";
44 helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
45 helpString += "The align.check command parameters are fasta and map, both are required.\n";
46 helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
47 helpString += "Example align.check(map=silva.ss.map, fasta=amazon.fasta).\n";
48 ;
49 return helpString;
50 }
51 catch(exception& e) {
52 m->errorOut(e, "AlignCheckCommand", "getHelpString");
53 exit(1);
54 }
55 }
56 //**********************************************************************************************************************
getOutputPattern(string type)57 string AlignCheckCommand::getOutputPattern(string type) {
58 try {
59 string pattern = "";
60
61 if (type == "aligncheck") { pattern = "[filename],align.check"; }
62 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); }
63
64 return pattern;
65 }
66 catch(exception& e) {
67 m->errorOut(e, "AlignCheckCommand", "getOutputPattern");
68 exit(1);
69 }
70 }
71 //**********************************************************************************************************************
72
AlignCheckCommand(string option)73 AlignCheckCommand::AlignCheckCommand(string option) : Command() {
74 try {
75 if(option == "help") { help(); abort = true; calledHelp = true; }
76 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
77 else if(option == "category") { abort = true; calledHelp = true; }
78
79 else {
80 OptionParser parser(option, setParameters());
81 map<string,string> parameters = parser.getParameters();
82
83 ValidParameters validParameter;
84 mapfile = validParameter.validFile(parameters, "map");
85 if (mapfile == "not open") { abort = true; }
86 else if (mapfile == "not found") { mapfile = ""; m->mothurOut("You must provide an map file.\n"); abort = true; }
87
88 fastafile = validParameter.validFile(parameters, "fasta");
89 if (fastafile == "not open") { fastafile = ""; abort = true; }
90 else if (fastafile == "not found") {
91 fastafile = current->getFastaFile();
92 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); }
93 else { m->mothurOut("[ERROR]: You have no current fastafile and the fasta parameter is required.\n"); abort = true; }
94 }else { current->setFastaFile(fastafile); }
95
96 namefile = validParameter.validFile(parameters, "name");
97 if (namefile == "not open") { namefile = ""; abort = true; }
98 else if (namefile == "not found") { namefile = ""; }
99 else { current->setNameFile(namefile); }
100
101 countfile = validParameter.validFile(parameters, "count");
102 if (countfile == "not open") { abort = true; countfile = ""; }
103 else if (countfile == "not found") { countfile = ""; }
104 else { current->setCountFile(countfile); }
105
106 if ((countfile != "") && (namefile != "")) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; }
107
108 if (outputdir == "") {
109 outputdir += util.hasPath(fastafile);
110 }
111
112 if (countfile == "") {
113 if ((namefile == "") && (fastafile != "")){
114 vector<string> files; files.push_back(fastafile);
115 if (!current->getMothurCalling()) { parser.getNameFile(files); }
116 }
117 }
118 }
119
120 }
121 catch(exception& e) {
122 m->errorOut(e, "AlignCheckCommand", "AlignCheckCommand");
123 exit(1);
124 }
125 }
126 //**********************************************************************************************************************
127
execute()128 int AlignCheckCommand::execute(){
129 try {
130
131 if (abort) { if (calledHelp) { return 0; } return 2; }
132
133 //get secondary structure info.
134 readMap();
135
136 if (namefile != "") { nameMap = util.readNames(namefile); }
137 else if (countfile != "") {
138 CountTable ct;
139 ct.readTable(countfile, false, false);
140 nameMap = ct.getNameMap();
141 }
142
143 if (m->getControl_pressed()) { return 0; }
144
145 ifstream in;
146 util.openInputFile(fastafile, in);
147
148 ofstream out;
149 map<string, string> variables;
150 variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile));
151 string outfile = getOutputFileName("aligncheck",variables);
152 util.openOutputFile(outfile, out);
153
154
155 out << "name" << '\t' << "pound" << '\t' << "dash" << '\t' << "plus" << '\t' << "equal" << '\t';
156 out << "loop" << '\t' << "tilde" << '\t' << "total" << '\t' << "numseqs" << endl;
157
158 vector<int> pound;
159 vector<int> dash;
160 vector<int> plus;
161 vector<int> equal;
162 vector<int> loop;
163 vector<int> tilde;
164 vector<int> total;
165
166 int count = 0;
167 while(!in.eof()){
168 if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outfile); return 0; }
169
170 Sequence seq(in); util.gobble(in);
171 if (seq.getName() != "") {
172 statData data = getStats(seq.getAligned());
173
174 if (haderror == 1) { m->setControl_pressed(true); break; }
175
176 int num = 1;
177 if ((namefile != "") || (countfile != "")) {
178 //make sure this sequence is in the namefile, else error
179 map<string, int>::iterator it = nameMap.find(seq.getName());
180
181 if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + seq.getName() + " is not in your namefile, please correct.\n"); m->setControl_pressed(true); }
182 else { num = it->second; }
183 }
184
185 //for each sequence this sequence represents
186 for (int i = 0; i < num; i++) {
187 pound.push_back(data.pound);
188 dash.push_back(data.dash);
189 plus.push_back(data.plus);
190 equal.push_back(data.equal);
191 loop.push_back(data.loop);
192 tilde.push_back(data.tilde);
193 total.push_back(data.total);
194 }
195 count++;
196
197 out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
198 out << data.loop << '\t' << data.tilde << '\t' << data.total << '\t' << num << endl;
199 }
200 }
201
202 in.close();
203 out.close();
204
205 if (m->getControl_pressed()) { util.mothurRemove(outfile); return 0; }
206
207 sort(pound.begin(), pound.end());
208 sort(dash.begin(), dash.end());
209 sort(plus.begin(), plus.end());
210 sort(equal.begin(), equal.end());
211 sort(loop.begin(), loop.end());
212 sort(tilde.begin(), tilde.end());
213 sort(total.begin(), total.end());
214 int size = pound.size();
215
216 int ptile0_25 = int(size * 0.025);
217 int ptile25 = int(size * 0.250);
218 int ptile50 = int(size * 0.500);
219 int ptile75 = int(size * 0.750);
220 int ptile97_5 = int(size * 0.975);
221 int ptile100 = size - 1;
222
223 if (m->getControl_pressed()) { util.mothurRemove(outfile); return 0; }
224
225 m->mothurOut("\n\t\tPound\tDash\tPlus\tEqual\tLoop\tTilde\tTotal\n");
226 m->mothurOut("Minimum:\t" + toString(pound[0]) + "\t" + toString(dash[0]) + "\t" + toString(plus[0]) + "\t" + toString(equal[0]) + "\t" + toString(loop[0]) + "\t" + toString(tilde[0]) + "\t" + toString(total[0])+ "\n");
227 m->mothurOut("2.5%-tile:\t" + toString(pound[ptile0_25]) + "\t" + toString(dash[ptile0_25]) + "\t" + toString(plus[ptile0_25]) + "\t" + toString(equal[ptile0_25]) + "\t"+ toString(loop[ptile0_25]) + "\t"+ toString(tilde[ptile0_25]) + "\t"+ toString(total[ptile0_25])+ "\n");
228 m->mothurOut("25%-tile:\t" + toString(pound[ptile25]) + "\t" + toString(dash[ptile25]) + "\t" + toString(plus[ptile25]) + "\t" + toString(equal[ptile25]) + "\t" + toString(loop[ptile25]) + "\t" + toString(tilde[ptile25]) + "\t" + toString(total[ptile25])+ "\n");
229 m->mothurOut("Median: \t" + toString(pound[ptile50]) + "\t" + toString(dash[ptile50]) + "\t" + toString(plus[ptile50]) + "\t" + toString(equal[ptile50]) + "\t" + toString(loop[ptile50]) + "\t" + toString(tilde[ptile50]) + "\t" + toString(total[ptile50])+ "\n");
230 m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])+ "\n");
231 m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])+ "\n");
232 m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])+ "\n");
233 if ((namefile == "") && (countfile == "")) { m->mothurOut("# of Seqs:\t" + toString(count)+ "\n"); }
234 else { m->mothurOut("# of unique seqs:\t" + toString(count)+ "\n"); m->mothurOut("total # of seqs:\t" + toString(size)+ "\n"); }
235
236
237 m->mothurOut("\nOutput File Names: \n");
238 m->mothurOut(outfile); m->mothurOutEndLine(); outputNames.push_back(outfile); outputTypes["aligncheck"].push_back(outfile);
239 m->mothurOutEndLine();
240
241 return 0;
242 }
243
244 catch(exception& e) {
245 m->errorOut(e, "AlignCheckCommand", "execute");
246 exit(1);
247 }
248 }
249 //**********************************************************************************************************************
readMap()250 void AlignCheckCommand::readMap(){
251 try {
252
253 structMap.resize(1, 0);
254 ifstream in;
255
256 util.openInputFile(mapfile, in);
257
258 while(!in.eof()){
259 int position;
260 in >> position;
261 structMap.push_back(position);
262 util.gobble(in);
263 }
264 in.close();
265
266 seqLength = structMap.size();
267
268
269 //check you make sure is structMap[10] = 380 then structMap[380] = 10.
270 for(int i=0;i<seqLength;i++){
271 if(structMap[i] != 0){
272 if(structMap[structMap[i]] != i){
273 m->mothurOut("Your map file contains an error: line " + toString(i) + " does not match line " + toString(structMap[i]) + ".\n");
274 }
275 }
276 }
277
278
279 }
280 catch(exception& e) {
281 m->errorOut(e, "AlignCheckCommand", "readMap");
282 exit(1);
283 }
284 }
285 /**************************************************************************************************/
286
getStats(string sequence)287 statData AlignCheckCommand::getStats(string sequence){
288 try {
289
290 statData data;
291 sequence = "*" + sequence; // need to pad the sequence so we can index it by 1
292
293 int length = sequence.length();
294
295 if (length != seqLength) { m->mothurOut("your sequences are " + toString(length) + " long, but your map file only contains " + toString(seqLength) + " entries. please correct.\n"); haderror = 1; return data; }
296
297 for(int i=1;i<length;i++){
298 if(structMap[i] != 0){
299 if(sequence[i] == 'A'){
300 if(sequence[structMap[i]] == 'T') { data.tilde++; }
301 else if(sequence[structMap[i]] == 'A') { data.pound++; }
302 else if(sequence[structMap[i]] == 'G') { data.equal++; }
303 else if(sequence[structMap[i]] == 'C') { data.pound++; }
304 else if(sequence[structMap[i]] == '-') { data.pound++; }
305 data.total++;
306 }
307 else if(sequence[i] == 'T'){
308 if(sequence[structMap[i]] == 'T') { data.plus++; }
309 else if(sequence[structMap[i]] == 'A') { data.tilde++; }
310 else if(sequence[structMap[i]] == 'G') { data.dash++; }
311 else if(sequence[structMap[i]] == 'C') { data.pound++; }
312 else if(sequence[structMap[i]] == '-') { data.pound++; }
313 data.total++;
314 }
315 else if(sequence[i] == 'G'){
316 if(sequence[structMap[i]] == 'T') { data.dash++; }
317 else if(sequence[structMap[i]] == 'A') { data.equal++; }
318 else if(sequence[structMap[i]] == 'G') { data.pound++; }
319 else if(sequence[structMap[i]] == 'C') { data.tilde++; }
320 else if(sequence[structMap[i]] == '-') { data.pound++; }
321 data.total++;
322 }
323 else if(sequence[i] == 'C'){
324 if(sequence[structMap[i]] == 'T') { data.pound++; }
325 else if(sequence[structMap[i]] == 'A') { data.pound++; }
326 else if(sequence[structMap[i]] == 'G') { data.tilde++; }
327 else if(sequence[structMap[i]] == 'C') { data.pound++; }
328 else if(sequence[structMap[i]] == '-') { data.pound++; }
329 data.total++;
330 }
331 else if(sequence[i] == '-'){
332 if(sequence[structMap[i]] == 'T') { data.pound++; data.total++; }
333 else if(sequence[structMap[i]] == 'A') { data.pound++; data.total++; }
334 else if(sequence[structMap[i]] == 'G') { data.pound++; data.total++; }
335 else if(sequence[structMap[i]] == 'C') { data.pound++; data.total++; }
336 else if(sequence[structMap[i]] == '-') { /*donothing*/ }
337 }
338 }
339 else if(isalnum(sequence[i])){
340 data.loop++;
341 data.total++;
342 }
343 }
344 return data;
345
346 }
347 catch(exception& e) {
348 m->errorOut(e, "AlignCheckCommand", "getStats");
349 exit(1);
350 }
351 }
352 //**********************************************************************************************************************
353