1 //
2 //  vsearchfileparser.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 10/13/15.
6 //  Copyright (c) 2015 Schloss Lab. All rights reserved.
7 //
8 
9 #include "vsearchfileparser.h"
10 
11 
12 /***********************************************************************/
VsearchFileParser()13 VsearchFileParser::VsearchFileParser(){
14     try {
15         m = MothurOut::getInstance();
16         fastafile = "";
17         namefile = "";
18         countfile = "";
19         format = "count";
20     }
21     catch(exception& e) {
22         m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
23         exit(1);
24     }
25 }
26 /***********************************************************************/
VsearchFileParser(string f)27 VsearchFileParser::VsearchFileParser(string f){
28     try {
29         m = MothurOut::getInstance();
30         fastafile = f;
31         namefile = "";
32         countfile = "";
33         format = "count";
34     }
35     catch(exception& e) {
36         m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
37         exit(1);
38     }
39 }
40 /***********************************************************************/
VsearchFileParser(string f,string nameOrCount,string forma)41 VsearchFileParser::VsearchFileParser(string f, string nameOrCount, string forma) {
42     try {
43         m = MothurOut::getInstance();
44         fastafile = f;
45         namefile = "";
46         countfile = "";
47         format = forma;
48 
49         if (format == "name") { namefile = nameOrCount; }
50         else if (format == "count") { countfile = nameOrCount; }
51         else {  m->mothurOut("[ERROR]: " + format + " is not a valid file format for the VsearchFileParser, quitting.\n"); m->setControl_pressed(true);  }
52 
53     }
54     catch(exception& e) {
55         m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
56         exit(1);
57     }
58 }
59 /***********************************************************************/
getVsearchFile()60 string VsearchFileParser::getVsearchFile() {
61     try {
62         Utils util;
63         if (fastafile == "") { m->mothurOut("[ERROR]: no fasta file given, cannot continue.\n"); m->setControl_pressed(true);  }
64 
65         //Run unique.seqs on the data if a name or count file is not given
66         if ((namefile == "") && (countfile == ""))  {  getNamesFile(fastafile);                  }
67         else if (namefile != "")                    {  counts = util.readNames(namefile);        }
68 
69         if (countfile != "") { CountTable countTable; countTable.readTable(countfile, false, false);  counts = countTable.getNameMap(); }
70 
71         if (m->getControl_pressed()) {  return 0; }
72 
73         //Remove gap characters from each sequence if needed
74         //Append the number of sequences that each unique sequence represents to the end of the fasta file name
75         //Sorts by abundance
76         string vsearchFastafile = createVsearchFasta(fastafile);
77 
78         return vsearchFastafile;
79 
80     }
81     catch(exception& e) {
82         m->errorOut(e, "VsearchFileParser", "getVsearchFile");
83         exit(1);
84     }
85 }
86 /**********************************************************************/
87 
createVsearchFasta(string inputFile)88 string VsearchFileParser::createVsearchFasta(string inputFile){
89     try {
90         Utils util;
91         string vsearchFasta = util.getSimpleName(fastafile) + ".sorted.fasta.temp";
92 
93         vector<seqPriorityNode> seqs;
94         map<string, int>::iterator it;
95 
96         ifstream in;
97         util.openInputFile(inputFile, in);
98 
99         while (!in.eof()) {
100 
101             if (m->getControl_pressed()) { in.close(); return vsearchFasta; }
102 
103             Sequence seq(in); util.gobble(in);
104 
105             it = counts.find(seq.getName());
106             if (it == counts.end()) {
107                 m->mothurOut("[ERROR]: " + seq.getName() + " is not in your name or countfile, quitting.\n"); m->setControl_pressed(true);
108             }else {
109                 seqPriorityNode temp(it->second, seq.getUnaligned(), it->first);
110                 seqs.push_back(temp);
111             }
112 
113         }
114         in.close();
115 
116         util.printVsearchFile(seqs, vsearchFasta, ";size=", ";");
117 
118         return vsearchFasta;
119     }
120     catch(exception& e) {
121         m->errorOut(e, "VsearchFileParser", "createVsearchFasta");
122         exit(1);
123     }
124 }
125 /*************************************************************************/
126 
getNamesFile(string & inputFile)127 string VsearchFileParser::getNamesFile(string& inputFile){
128     try {
129 
130         m->mothurOut("\nNo namesfile given, running unique.seqs command to generate one.\n\n");
131 
132         //use unique.seqs to create new name and fastafile
133         string inputString = "fasta=" + inputFile + ", format=count";
134         m->mothurOut("/******************************************/\n");
135         m->mothurOut("Running command: unique.seqs(" + inputString + ")\n");
136         Command* uniqueCommand = new DeconvoluteCommand(inputString);
137         uniqueCommand->execute();
138 
139         map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
140 
141         delete uniqueCommand;
142         m->mothurOut("/******************************************/\n");
143 
144         countfile = filenames["count"][0];
145         fastafile = filenames["fasta"][0];
146 
147         return countfile;
148     }
149     catch(exception& e) {
150         m->errorOut(e, "VsearchFileParser", "getNamesFile");
151         exit(1);
152     }
153 }
154 /*************************************************************************/
155 //S	1	275	*	*	*	*	*	GQY1XT001C44N8/ab=3677/	*
createListFile(string inputFile,int numBins,string label,map<string,int> & ct)156 ListVector VsearchFileParser::createListFile(string inputFile, int numBins, string label, map<string, int>& ct){
157     try {
158         Utils util;
159         map<string, string>::iterator itName;
160         if (format == "name") { counts.clear(); util.readNames(namefile, nameMap); }
161 
162         ifstream in;
163         util.openInputFile(inputFile, in);
164 
165         ListVector list(numBins); list.setLabel(label);
166 
167         int clusterNumber;
168         string seqName, recordType, length, percentIdentity, strand, notUsed1, notUsed2, compressedAlignment, repSequence;
169 
170         while(!in.eof()) {
171             if (m->getControl_pressed()) { break; }
172 
173             in >> recordType >> clusterNumber >> length >> percentIdentity >> strand >> notUsed1 >> notUsed2 >> compressedAlignment >> seqName >> repSequence; util.gobble(in);
174 
175             if (recordType != "S") {
176 
177                 seqName = removeAbundances(seqName);
178 
179                 if (format == "name") {
180                     itName = nameMap.find(seqName);
181                     if (itName == nameMap.end()) {  m->mothurOut("[ERROR]: " + seqName + " is not in your name file. Parsing error???\n"); m->setControl_pressed(true); }
182                     else{  seqName = itName->second;  }
183                 }
184 
185                 string bin = list.get(clusterNumber);
186                 if (bin == "")  {   bin = seqName;          }
187                 else            {   bin += ',' + seqName;   }
188                 list.set(clusterNumber, bin);
189 
190             }
191 
192         }
193         in.close();
194         ct = counts;
195 
196         return list;
197 
198     }
199     catch(exception& e) {
200         m->errorOut(e, "VsearchFileParser", "createListFile");
201         exit(1);
202     }
203 }
204 /*************************************************************************/
205 //GQY1XT001C44N8/ab=3677/	*
removeAbundances(string seqName)206 string VsearchFileParser::removeAbundances(string seqName){
207     try {
208 
209         int pos = seqName.find_last_of(";", seqName.length()-2); //don't look at the last /
210         if (pos != string::npos) { seqName = seqName.substr(0, pos); }
211 
212         return seqName;
213     }
214     catch(exception& e) {
215         m->errorOut(e, "VsearchFileParser", "removeAbundances");
216         exit(1);
217     }
218 }
219 /*************************************************************************/
220 //GQY1XT001C44N8/ab=3677/	*
getNumBins(string logfile)221 int VsearchFileParser::getNumBins(string logfile){
222     try {
223 
224         int numBins = 0;
225 
226         ifstream in;
227         Utils util; util.openInputFile(logfile, in);
228 
229         string line;
230         while(!in.eof()) {
231             if (m->getControl_pressed()) { break; }
232 
233             line = util.getline(in); util.gobble(in);
234 
235             int pos = line.find("Clusters:");
236             if (pos != string::npos) {
237                 vector<string> pieces = util.splitWhiteSpace(line);
238                 if (pieces.size() > 1) { util.mothurConvert(pieces[1], numBins);  }
239                 break;
240             }
241         }
242         in.close();
243 
244         return numBins;
245     }
246     catch(exception& e) {
247         m->errorOut(e, "VsearchFileParser", "getNumBins");
248         exit(1);
249     }
250 }
251 /***********************************************************************/
252 
253