1 //
2 // vsearchfileparser.cpp
3 // Mothur
4 //
5 // Created by Sarah Westcott on 10/13/15.
6 // Copyright (c) 2015 Schloss Lab. All rights reserved.
7 //
8
9 #include "vsearchfileparser.h"
10
11
12 /***********************************************************************/
VsearchFileParser()13 VsearchFileParser::VsearchFileParser(){
14 try {
15 m = MothurOut::getInstance();
16 fastafile = "";
17 namefile = "";
18 countfile = "";
19 format = "count";
20 }
21 catch(exception& e) {
22 m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
23 exit(1);
24 }
25 }
26 /***********************************************************************/
VsearchFileParser(string f)27 VsearchFileParser::VsearchFileParser(string f){
28 try {
29 m = MothurOut::getInstance();
30 fastafile = f;
31 namefile = "";
32 countfile = "";
33 format = "count";
34 }
35 catch(exception& e) {
36 m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
37 exit(1);
38 }
39 }
40 /***********************************************************************/
VsearchFileParser(string f,string nameOrCount,string forma)41 VsearchFileParser::VsearchFileParser(string f, string nameOrCount, string forma) {
42 try {
43 m = MothurOut::getInstance();
44 fastafile = f;
45 namefile = "";
46 countfile = "";
47 format = forma;
48
49 if (format == "name") { namefile = nameOrCount; }
50 else if (format == "count") { countfile = nameOrCount; }
51 else { m->mothurOut("[ERROR]: " + format + " is not a valid file format for the VsearchFileParser, quitting.\n"); m->setControl_pressed(true); }
52
53 }
54 catch(exception& e) {
55 m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
56 exit(1);
57 }
58 }
59 /***********************************************************************/
getVsearchFile()60 string VsearchFileParser::getVsearchFile() {
61 try {
62 Utils util;
63 if (fastafile == "") { m->mothurOut("[ERROR]: no fasta file given, cannot continue.\n"); m->setControl_pressed(true); }
64
65 //Run unique.seqs on the data if a name or count file is not given
66 if ((namefile == "") && (countfile == "")) { getNamesFile(fastafile); }
67 else if (namefile != "") { counts = util.readNames(namefile); }
68
69 if (countfile != "") { CountTable countTable; countTable.readTable(countfile, false, false); counts = countTable.getNameMap(); }
70
71 if (m->getControl_pressed()) { return 0; }
72
73 //Remove gap characters from each sequence if needed
74 //Append the number of sequences that each unique sequence represents to the end of the fasta file name
75 //Sorts by abundance
76 string vsearchFastafile = createVsearchFasta(fastafile);
77
78 return vsearchFastafile;
79
80 }
81 catch(exception& e) {
82 m->errorOut(e, "VsearchFileParser", "getVsearchFile");
83 exit(1);
84 }
85 }
86 /**********************************************************************/
87
createVsearchFasta(string inputFile)88 string VsearchFileParser::createVsearchFasta(string inputFile){
89 try {
90 Utils util;
91 string vsearchFasta = util.getSimpleName(fastafile) + ".sorted.fasta.temp";
92
93 vector<seqPriorityNode> seqs;
94 map<string, int>::iterator it;
95
96 ifstream in;
97 util.openInputFile(inputFile, in);
98
99 while (!in.eof()) {
100
101 if (m->getControl_pressed()) { in.close(); return vsearchFasta; }
102
103 Sequence seq(in); util.gobble(in);
104
105 it = counts.find(seq.getName());
106 if (it == counts.end()) {
107 m->mothurOut("[ERROR]: " + seq.getName() + " is not in your name or countfile, quitting.\n"); m->setControl_pressed(true);
108 }else {
109 seqPriorityNode temp(it->second, seq.getUnaligned(), it->first);
110 seqs.push_back(temp);
111 }
112
113 }
114 in.close();
115
116 util.printVsearchFile(seqs, vsearchFasta, ";size=", ";");
117
118 return vsearchFasta;
119 }
120 catch(exception& e) {
121 m->errorOut(e, "VsearchFileParser", "createVsearchFasta");
122 exit(1);
123 }
124 }
125 /*************************************************************************/
126
getNamesFile(string & inputFile)127 string VsearchFileParser::getNamesFile(string& inputFile){
128 try {
129
130 m->mothurOut("\nNo namesfile given, running unique.seqs command to generate one.\n\n");
131
132 //use unique.seqs to create new name and fastafile
133 string inputString = "fasta=" + inputFile + ", format=count";
134 m->mothurOut("/******************************************/\n");
135 m->mothurOut("Running command: unique.seqs(" + inputString + ")\n");
136 Command* uniqueCommand = new DeconvoluteCommand(inputString);
137 uniqueCommand->execute();
138
139 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
140
141 delete uniqueCommand;
142 m->mothurOut("/******************************************/\n");
143
144 countfile = filenames["count"][0];
145 fastafile = filenames["fasta"][0];
146
147 return countfile;
148 }
149 catch(exception& e) {
150 m->errorOut(e, "VsearchFileParser", "getNamesFile");
151 exit(1);
152 }
153 }
154 /*************************************************************************/
155 //S 1 275 * * * * * GQY1XT001C44N8/ab=3677/ *
createListFile(string inputFile,int numBins,string label,map<string,int> & ct)156 ListVector VsearchFileParser::createListFile(string inputFile, int numBins, string label, map<string, int>& ct){
157 try {
158 Utils util;
159 map<string, string>::iterator itName;
160 if (format == "name") { counts.clear(); util.readNames(namefile, nameMap); }
161
162 ifstream in;
163 util.openInputFile(inputFile, in);
164
165 ListVector list(numBins); list.setLabel(label);
166
167 int clusterNumber;
168 string seqName, recordType, length, percentIdentity, strand, notUsed1, notUsed2, compressedAlignment, repSequence;
169
170 while(!in.eof()) {
171 if (m->getControl_pressed()) { break; }
172
173 in >> recordType >> clusterNumber >> length >> percentIdentity >> strand >> notUsed1 >> notUsed2 >> compressedAlignment >> seqName >> repSequence; util.gobble(in);
174
175 if (recordType != "S") {
176
177 seqName = removeAbundances(seqName);
178
179 if (format == "name") {
180 itName = nameMap.find(seqName);
181 if (itName == nameMap.end()) { m->mothurOut("[ERROR]: " + seqName + " is not in your name file. Parsing error???\n"); m->setControl_pressed(true); }
182 else{ seqName = itName->second; }
183 }
184
185 string bin = list.get(clusterNumber);
186 if (bin == "") { bin = seqName; }
187 else { bin += ',' + seqName; }
188 list.set(clusterNumber, bin);
189
190 }
191
192 }
193 in.close();
194 ct = counts;
195
196 return list;
197
198 }
199 catch(exception& e) {
200 m->errorOut(e, "VsearchFileParser", "createListFile");
201 exit(1);
202 }
203 }
204 /*************************************************************************/
205 //GQY1XT001C44N8/ab=3677/ *
removeAbundances(string seqName)206 string VsearchFileParser::removeAbundances(string seqName){
207 try {
208
209 int pos = seqName.find_last_of(";", seqName.length()-2); //don't look at the last /
210 if (pos != string::npos) { seqName = seqName.substr(0, pos); }
211
212 return seqName;
213 }
214 catch(exception& e) {
215 m->errorOut(e, "VsearchFileParser", "removeAbundances");
216 exit(1);
217 }
218 }
219 /*************************************************************************/
220 //GQY1XT001C44N8/ab=3677/ *
getNumBins(string logfile)221 int VsearchFileParser::getNumBins(string logfile){
222 try {
223
224 int numBins = 0;
225
226 ifstream in;
227 Utils util; util.openInputFile(logfile, in);
228
229 string line;
230 while(!in.eof()) {
231 if (m->getControl_pressed()) { break; }
232
233 line = util.getline(in); util.gobble(in);
234
235 int pos = line.find("Clusters:");
236 if (pos != string::npos) {
237 vector<string> pieces = util.splitWhiteSpace(line);
238 if (pieces.size() > 1) { util.mothurConvert(pieces[1], numBins); }
239 break;
240 }
241 }
242 in.close();
243
244 return numBins;
245 }
246 catch(exception& e) {
247 m->errorOut(e, "VsearchFileParser", "getNumBins");
248 exit(1);
249 }
250 }
251 /***********************************************************************/
252
253