1 //
2 // createdatabasecommand.cpp
3 // Mothur
4 //
5 // Created by Sarah Westcott on 3/28/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "createdatabasecommand.h"
10 #include "inputdata.h"
11
12 //**********************************************************************************************************************
setParameters()13 vector<string> CreateDatabaseCommand::setParameters(){
14 try {
15 CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none","database",false,false,true); parameters.push_back(pfasta);
16 CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
17 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
18 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
19 CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy);
20 CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist);
21 CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared);
22 CommandParameter prelabund("relabund", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(prelabund);
23 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
24 CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27
28 abort = false; calledHelp = false;
29
30 vector<string> tempOutNames;
31 outputTypes["database"] = tempOutNames;
32
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 return myArray;
36 }
37 catch(exception& e) {
38 m->errorOut(e, "CreateDatabaseCommand", "setParameters");
39 exit(1);
40 }
41 }
42 //**********************************************************************************************************************
getHelpString()43 string CreateDatabaseCommand::getHelpString(){
44 try {
45 string helpString = "";
46 helpString += "The create.database command reads a list, shared or relabund file, *.cons.taxonomy, and optional *.rep.fasta, *.rep.names, groupfile, or count file and creates a database file.\n";
47 helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. List or shared and constaxonomy are required.\n";
48 helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
49 helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
50 helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n";
51 helpString += "The constaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n";
52 helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
53 helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
54 helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
55 helpString += "The create.database command should be in the following format: \n";
56 helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";
57 helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, repname=final.an.0.03.rep.names, list=final.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
58 return helpString;
59 }
60 catch(exception& e) {
61 m->errorOut(e, "CreateDatabaseCommand", "getHelpString");
62 exit(1);
63 }
64 }
65 //**********************************************************************************************************************
getOutputPattern(string type)66 string CreateDatabaseCommand::getOutputPattern(string type) {
67 try {
68 string pattern = "";
69
70 if (type == "database") { pattern = "[filename],database"; }
71 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); }
72
73 return pattern;
74 }
75 catch(exception& e) {
76 m->errorOut(e, "CreateDatabaseCommand", "getOutputPattern");
77 exit(1);
78 }
79 }
80 //**********************************************************************************************************************
CreateDatabaseCommand(string option)81 CreateDatabaseCommand::CreateDatabaseCommand(string option) : Command() {
82 try{
83 //allow user to run help
84 if (option == "help") {
85 help(); abort = true; calledHelp = true;
86 }else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87 else if(option == "category") { abort = true; calledHelp = true; }
88 else {
89 OptionParser parser(option, setParameters());
90 map<string, string> parameters = parser.getParameters();
91
92 ValidParameters validParameter;
93
94
95 //check for required parameters
96 listfile = validParameter.validFile(parameters, "list");
97 if (listfile == "not found") { listfile = ""; }
98 else if (listfile == "not open") { listfile = ""; abort = true; }
99 else { current->setListFile(listfile); }
100
101 sharedfile = validParameter.validFile(parameters, "shared");
102 if (sharedfile == "not found") { sharedfile = ""; }
103 else if (sharedfile == "not open") { sharedfile = ""; abort = true; }
104 else { current->setSharedFile(sharedfile); }
105
106 relabundfile = validParameter.validFile(parameters, "relabund");
107 if (relabundfile == "not found") { relabundfile = ""; }
108 else if (relabundfile == "not open") { relabundfile = ""; abort = true; }
109 else { current->setRelAbundFile(relabundfile); }
110
111 if ((sharedfile == "") && (listfile == "") && (relabundfile == "")) {
112 //is there are current file available for either of these?
113 //give priority to list, then shared, then relabund
114 listfile = current->getListFile();
115 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); }
116 else {
117 sharedfile = current->getSharedFile();
118 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); }
119 else {
120 relabundfile = current->getRelAbundFile();
121 if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); }
122 else {
123 m->mothurOut("[ERROR]: No valid current files. You must provide a shared, list or relabund file before you can use the create.database command.\n"); abort = true;
124 }
125 }
126 }
127 }
128 else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: relabund, shared or list.\n"); abort = true; }
129
130 if (sharedfile != "") { if (outputdir == "") { outputdir = util.hasPath(sharedfile); } }
131 else if (listfile != ""){ if (outputdir == "") { outputdir = util.hasPath(listfile); } }
132 else { if (outputdir == "") { outputdir = util.hasPath(relabundfile); } }
133
134 contaxonomyfile = validParameter.validFile(parameters, "constaxonomy");
135 if (contaxonomyfile == "not found") { //if there is a current list file, use it
136 contaxonomyfile = ""; m->mothurOut("The constaxonomy parameter is required, aborting.\n"); abort = true;
137 }
138 else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
139
140 repfastafile = validParameter.validFile(parameters, "repfasta");
141 if (repfastafile == "not found") { repfastafile = ""; }
142 else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
143
144 repnamesfile = validParameter.validFile(parameters, "repname");
145 if (repnamesfile == "not found") { repnamesfile = ""; }
146 else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
147
148 if ((repnamesfile != "") && (repfastafile == "")) { m->mothurOut("[ERROR]: You must provide a repfasta file if you are using a repnames file.\n");
149 abort = true; }
150
151 countfile = validParameter.validFile(parameters, "count");
152 if (countfile == "not found") { countfile = ""; }
153 else if (countfile == "not open") { countfile = ""; abort = true; }
154
155 groupfile = validParameter.validFile(parameters, "group");
156 if (groupfile == "not open") { groupfile = ""; abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
158 else { current->setGroupFile(groupfile); }
159
160 //check for optional parameter and set defaults
161 // ...at some point should added some additional type checking...
162 label = validParameter.valid(parameters, "label");
163 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your file.\n");}
164 }
165 }
166 catch(exception& e) {
167 m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
168 exit(1);
169 }
170 }
171 //**********************************************************************************************************************
execute()172 int CreateDatabaseCommand::execute(){
173 try {
174
175 if (abort) { if (calledHelp) { return 0; } return 2; }
176
177 //taxonomies holds the taxonomy info for each Otu
178 //classifyOtuSizes holds the size info of each Otu to help with error checking
179 vector<string> taxonomies;
180 vector<string> otuLabels;
181 vector<int> classifyOtuSizes = readTax(taxonomies, otuLabels);
182
183 if (m->getControl_pressed()) { return 0; }
184
185 vector<Sequence> seqs;
186 vector<int> repOtusSizes;
187 if (repfastafile != "") { repOtusSizes = readFasta(seqs); }
188
189 if (m->getControl_pressed()) { return 0; }
190
191 //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
192 map<string, string> repNames;
193 map<string, int> nameMap;
194 int numUniqueNamesFile = 0;
195 CountTable ct;
196 if (repnamesfile != "") {
197 numUniqueNamesFile = util.readNames(repnamesfile, repNames, 1);
198 //the repnames file does not have the same order as the list file bins so we need to sort and reassemble for the search below
199 map<string, string> tempRepNames;
200 for (map<string, string>::iterator it = repNames.begin(); it != repNames.end();) {
201 string bin = it->first;
202 vector<string> temp;
203 util.splitAtChar(bin, temp, ',');
204 sort(temp.begin(), temp.end());
205 bin = "";
206 for (int i = 0; i < temp.size()-1; i++) {
207 bin += temp[i] + ',';
208 }
209 bin += temp[temp.size()-1];
210 tempRepNames[bin] = it->second;
211 repNames.erase(it++);
212 }
213 repNames = tempRepNames;
214 }else if (countfile != ""){
215 ct.readTable(countfile, true, false);
216 numUniqueNamesFile = ct.getNumUniqueSeqs();
217 nameMap = ct.getNameMap();
218 }
219
220 if (m->getControl_pressed()) { return 0; }
221
222 if (repfastafile != "") {
223
224 //are there the same number of otus in the fasta and name files
225 if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->setControl_pressed(true); }
226
227 //are there the same number of OTUs in the tax and fasta file
228 if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->setControl_pressed(true); }
229
230 if (m->getControl_pressed()) { return 0; }
231
232 //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
233 for (int i = 0; i < classifyOtuSizes.size(); i++) {
234 if (classifyOtuSizes[i] != repOtusSizes[i]) {
235 m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ". These should match. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true);
236 }
237 }
238 }
239
240 if (m->getControl_pressed()) { return 0; }
241
242
243 map<string, string> variables;
244 if (listfile != "") { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); }
245 else if (sharedfile != "") { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); }
246 else { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(relabundfile)); }
247 string outputFileName = getOutputFileName("database", variables);
248 outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
249
250 ofstream out;
251 util.openOutputFile(outputFileName, out);
252
253 string header = "OTUNumber\tAbundance";
254
255
256 if (listfile != "") {
257 //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
258 ListVector* list = getList();
259
260 if (otuLabels.size() != list->getNumBins()) {
261 m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); }
262
263 if (m->getControl_pressed()) { delete list; return 0; }
264
265 GroupMap* groupmap = NULL;
266 if (groupfile != "") {
267 groupmap = new GroupMap(groupfile);
268 groupmap->readMap();
269 }
270
271 if (m->getControl_pressed()) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
272
273 if (groupfile != "") {
274 header = "OTUNumber";
275 for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += '\t' + (groupmap->getNamesOfGroups())[i]; }
276 }else if (countfile != "") {
277 if (ct.hasGroupInfo()) {
278 header = "OTUNumber";
279 for (int i = 0; i < ct.getNamesOfGroups().size(); i++) { header += '\t' + (ct.getNamesOfGroups())[i]; }
280 }
281 }
282
283 if (repfastafile != "") { header += "\trepSeqName\trepSeq"; }
284 header += "\tOTUConTaxonomy";
285 out << header << endl;
286
287 vector<string> binLabels = list->getLabels();
288 for (int i = 0; i < list->getNumBins(); i++) {
289
290 int index = findIndex(otuLabels, binLabels[i]);
291 if (index == -1) { m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); }
292
293 if (m->getControl_pressed()) { break; }
294
295 out << otuLabels[index];
296
297 vector<string> binNames;
298 string bin = list->get(i);
299 util.splitAtComma(bin, binNames);
300
301 string seqRepName = "";
302 int numSeqsRep = binNames.size();
303
304 if (repnamesfile != "") {
305 sort(binNames.begin(), binNames.end());
306 bin = "";
307 for (int j = 0; j < binNames.size()-1; j++) {
308 bin += binNames[j] + ',';
309 }
310 bin += binNames[binNames.size()-1];
311 map<string, string>::iterator it = repNames.find(bin);
312
313 if (it == repNames.end()) {
314 m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break;
315 }else { seqRepName = it->second; numSeqsRep = binNames.size(); }
316
317 //sanity check
318 if (binNames.size() != classifyOtuSizes[index]) {
319 m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break;
320 }
321 }else if ((countfile != "") && (repfastafile != "")) {
322 //find rep sequence in bin
323 for (int j = 0; j < binNames.size(); j++) {
324 map<string, int>::iterator itNameMap = nameMap.find(binNames[j]); //if you are in the counttable you must be the rep. because get.oturep with a countfile only includes the rep sequences in the rep.count file.
325 if (itNameMap != nameMap.end()) {
326 seqRepName = itNameMap->first;
327 numSeqsRep = itNameMap->second;
328 j += binNames.size(); //exit loop
329 }
330 }
331
332 if (seqRepName == "") {
333 m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break;
334 }
335
336 if (numSeqsRep != classifyOtuSizes[i]) {
337 m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break;
338 }
339 }
340
341 //output abundances
342 if (groupfile != "") {
343 string groupAbunds = "";
344 map<string, int> counts;
345 //initialize counts to 0
346 for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
347
348 //find abundances by group
349 bool error = false;
350 for (int j = 0; j < binNames.size(); j++) {
351 string group = groupmap->getGroup(binNames[j]);
352 if (group == "not found") {
353 m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
354 error = true;
355 }else { counts[group]++; }
356 }
357
358 //output counts
359 for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << '\t' << counts[(groupmap->getNamesOfGroups())[j]]; }
360
361 if (error) { m->setControl_pressed(true); }
362 }else if ((countfile != "") && (repfastafile != "")) {
363 if (ct.hasGroupInfo()) {
364 vector<int> groupCounts = ct.getGroupCounts(seqRepName);
365 for (int j = 0; j < groupCounts.size(); j++) { out << '\t' << groupCounts[j]; }
366 }else { out << '\t' << numSeqsRep; }
367 }else if ((countfile != "") && (repfastafile == "")) {
368 if (ct.hasGroupInfo()) {
369 vector<int> groupTotals; groupTotals.resize(ct.getNumGroups(), 0);
370 for (int j = 0; j < binNames.size(); j++) {
371 vector<int> groupCounts = ct.getGroupCounts(binNames[j]);
372 for (int k = 0; k < groupCounts.size(); k++) { groupTotals[k] += groupCounts[k]; }
373 }
374 for (int j = 0; j < groupTotals.size(); j++) { out << '\t' << groupTotals[j]; }
375 }else { out << '\t' << numSeqsRep; }
376 }else { out << '\t' << numSeqsRep; }
377
378 //output repSeq
379 if (repfastafile != "") { out << '\t' << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; }
380 else { out << '\t' << taxonomies[index] << endl; }
381 }
382
383
384 delete list;
385 if (groupfile != "") { delete groupmap; }
386
387 }else if (sharedfile != "") {
388 SharedRAbundVectors* lookup = getShared();
389 vector<string> namesOfGroups = lookup->getNamesGroups();
390
391 header = "OTUNumber";
392 for (int i = 0; i < namesOfGroups.size(); i++) { header += '\t' + namesOfGroups[i]; }
393 if (repfastafile != "") { header += "\trepSeqName\trepSeq"; }
394 header += "\tOTUConTaxonomy";
395 out << header << endl;
396
397 vector<string> currentLabels = lookup->getOTUNames();
398 for (int h = 0; h < lookup->getNumBins(); h++) {
399
400 if (m->getControl_pressed()) { break; }
401
402 int index = findIndex(otuLabels, currentLabels[h]);
403 if (index == -1) { m->mothurOut("[ERROR]: " + currentLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); }
404
405 if (m->getControl_pressed()) { break; }
406
407 out << otuLabels[index];
408
409 int totalAbund = 0;
410 for (int i = 0; i < lookup->size(); i++) {
411 int abund = lookup->get(h, namesOfGroups[i]);
412 totalAbund += abund;
413 out << '\t' << abund;
414 }
415
416 //output repSeq
417 if (repfastafile != "") { out << '\t' << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; }
418 else { out << '\t' << taxonomies[index] << endl; }
419 }
420 }else { //relabund
421 SharedRAbundFloatVectors* lookup = getRelabund();
422 vector<string> namesOfGroups = lookup->getNamesGroups();
423
424 header = "OTUNumber";
425 for (int i = 0; i < namesOfGroups.size(); i++) { header += '\t' + namesOfGroups[i]; }
426 if (repfastafile != "") { header += "\trepSeqName\trepSeq"; }
427 header += "\tOTUConTaxonomy";
428 out << header << endl;
429
430 vector<string> currentLabels = lookup->getOTUNames();
431 for (int h = 0; h < lookup->getNumBins(); h++) {
432
433 if (m->getControl_pressed()) { break; }
434
435 int index = findIndex(otuLabels, currentLabels[h]);
436 if (index == -1) { m->mothurOut("[ERROR]: " + currentLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); }
437
438 if (m->getControl_pressed()) { break; }
439
440 out << otuLabels[index];
441
442 float totalAbund = 0;
443 for (int i = 0; i < lookup->size(); i++) {
444 float abund = lookup->get(h, namesOfGroups[i]);
445 totalAbund += abund;
446 out << '\t' << abund;
447 }
448
449 //output repSeq
450 if (repfastafile != "") { out << '\t' << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; }
451 else { out << '\t' << taxonomies[index] << endl; }
452 }
453
454 }
455 out.close();
456 if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; }
457
458 m->mothurOut("\nOutput File Names: \n");
459 m->mothurOut(outputFileName); m->mothurOutEndLine();
460 m->mothurOutEndLine();
461
462 return 0;
463
464 }
465 catch(exception& e) {
466 m->errorOut(e, "CreateDatabaseCommand", "execute");
467 exit(1);
468 }
469 }
470 //**********************************************************************************************************************
findIndex(vector<string> & otuLabels,string label)471 int CreateDatabaseCommand::findIndex(vector<string>& otuLabels, string label){
472 try {
473 int index = -1;
474 for (int i = 0; i < otuLabels.size(); i++) {
475 if (util.isLabelEquivalent(otuLabels[i],label)) { index = i; break; }
476 }
477 return index;
478 }
479 catch(exception& e) {
480 m->errorOut(e, "CreateDatabaseCommand", "findIndex");
481 exit(1);
482 }
483 }
484 //**********************************************************************************************************************
readTax(vector<string> & taxonomies,vector<string> & otuLabels)485 vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies, vector<string>& otuLabels){
486 try {
487
488 vector<int> sizes;
489
490 ifstream in;
491 util.openInputFile(contaxonomyfile, in);
492
493 //read headers
494 util.getline(in);
495
496 while (!in.eof()) {
497
498 if (m->getControl_pressed()) { break; }
499
500 string otu = ""; string tax = "unknown";
501 int size = 0;
502
503 in >> otu >> size; util.gobble(in);
504 tax = util.getline(in); util.gobble(in);
505
506 sizes.push_back(size);
507 taxonomies.push_back(tax);
508 otuLabels.push_back(otu);
509 }
510 in.close();
511
512 return sizes;
513 }
514 catch(exception& e) {
515 m->errorOut(e, "CreateDatabaseCommand", "readTax");
516 exit(1);
517 }
518 }
519 //**********************************************************************************************************************
readFasta(vector<Sequence> & seqs)520 vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
521 try {
522
523 vector<int> sizes;
524
525 ifstream in;
526 util.openInputFile(repfastafile, in);
527
528 set<int> sanity;
529 while (!in.eof()) {
530
531 if (m->getControl_pressed()) { break; }
532
533 string binInfo;
534 Sequence seq(in, binInfo, true); util.gobble(in);
535
536 //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
537 vector<string> info;
538 util.splitAtChar(binInfo, info, '|');
539 //if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->setControl_pressed(true); break;}
540
541 int size = 0;
542 util.mothurConvert(info[1], size);
543
544 int binNumber = 0;
545 string temp = "";
546 for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } }
547 util.mothurConvert(util.getSimpleLabel(temp), binNumber);
548 set<int>::iterator it = sanity.find(binNumber);
549 if (it != sanity.end()) {
550 m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->setControl_pressed(true); break;
551 }else { sanity.insert(binNumber); }
552
553 sizes.push_back(size);
554 seqs.push_back(seq);
555 }
556 in.close();
557
558 return sizes;
559 }
560 catch(exception& e) {
561 m->errorOut(e, "CreateDatabaseCommand", "readFasta");
562 exit(1);
563 }
564 }
565 //**********************************************************************************************************************
getList()566 ListVector* CreateDatabaseCommand::getList(){
567 try {
568 InputData* input = new InputData(listfile, "list", nullVector);
569 ListVector* list = input->getListVector();
570 string lastLabel = list->getLabel();
571
572 if (label == "") { label = lastLabel; delete input; return list; }
573
574 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
575 set<string> labels; labels.insert(label);
576 set<string> processedLabels;
577 set<string> userLabels = labels;
578
579 //as long as you are not at the end of the file or done wih the lines you want
580 while((list != NULL) && (userLabels.size() != 0)) {
581 if (m->getControl_pressed()) { delete input; return list; }
582
583 if(labels.count(list->getLabel()) == 1){
584 processedLabels.insert(list->getLabel());
585 userLabels.erase(list->getLabel());
586 break;
587 }
588
589 if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
590 string saveLabel = list->getLabel();
591
592 delete list;
593 list = input->getListVector(lastLabel);
594
595 processedLabels.insert(list->getLabel());
596 userLabels.erase(list->getLabel());
597
598 //restore real lastlabel to save below
599 list->setLabel(saveLabel);
600 break;
601 }
602
603 lastLabel = list->getLabel();
604
605 //get next line to process
606 //prevent memory leak
607 delete list;
608 list = input->getListVector();
609 }
610
611
612 if (m->getControl_pressed()) { delete input; return list; }
613
614 //output error messages about any remaining user labels
615 bool needToRun = false;
616 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
617 m->mothurOut("Your file does not include the label " + *it);
618 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
619 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
620 }
621
622 //run last label if you need to
623 if (needToRun ) {
624 delete list;
625 list = input->getListVector(lastLabel);
626 }
627
628 delete input;
629
630 return list;
631 }
632 catch(exception& e) {
633 m->errorOut(e, "CreateDatabaseCommand", "getList");
634 exit(1);
635 }
636 }
637 //**********************************************************************************************************************
getShared()638 SharedRAbundVectors* CreateDatabaseCommand::getShared(){
639 try {
640 InputData input(sharedfile, "sharedfile", nullVector);
641 SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
642 string lastLabel = lookup->getLabel();
643
644 if (label == "") { label = lastLabel; return lookup; }
645
646 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
647 set<string> labels; labels.insert(label);
648 set<string> processedLabels;
649 set<string> userLabels = labels;
650
651 //as long as you are not at the end of the file or done wih the lines you want
652 while((lookup != NULL) && (userLabels.size() != 0)) {
653 if (m->getControl_pressed()) { return lookup; }
654
655 if(labels.count(lookup->getLabel()) == 1){
656 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
657 break;
658 }
659
660 if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
661 string saveLabel = lookup->getLabel();
662
663 delete lookup;
664 lookup = input.getSharedRAbundVectors(lastLabel);
665
666 processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
667
668 //restore real lastlabel to save below
669 lookup->setLabels(saveLabel);
670 break;
671 }
672
673 lastLabel = lookup->getLabel();
674
675 //get next line to process
676 //prevent memory leak
677 delete lookup;
678 lookup = input.getSharedRAbundVectors();
679 }
680
681
682 if (m->getControl_pressed()) { return lookup; }
683
684 //output error messages about any remaining user labels
685 bool needToRun = false;
686 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
687 m->mothurOut("Your file does not include the label " + *it);
688 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
689 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
690 }
691
692 //run last label if you need to
693 if (needToRun ) {
694 delete lookup;
695 lookup = input.getSharedRAbundVectors(lastLabel);
696 }
697
698 return lookup;
699 }
700 catch(exception& e) {
701 m->errorOut(e, "CreateDatabaseCommand", "getShared");
702 exit(1);
703 }
704 }
705 //**********************************************************************************************************************
getRelabund()706 SharedRAbundFloatVectors* CreateDatabaseCommand::getRelabund(){
707 try {
708 InputData input(relabundfile, "relabund", nullVector);
709 SharedRAbundFloatVectors* lookupFloat = input.getSharedRAbundFloatVectors();
710 string lastLabel = lookupFloat->getLabel();
711
712 if (label == "") { label = lastLabel; return lookupFloat; }
713
714 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
715 set<string> labels; labels.insert(label);
716 set<string> processedLabels;
717 set<string> userLabels = labels;
718
719 //as long as you are not at the end of the file or done wih the lines you want
720 while((lookupFloat != NULL) && (userLabels.size() != 0)) {
721
722 if (m->getControl_pressed()) { return 0; }
723
724 if(labels.count(lookupFloat->getLabel()) == 1){
725 processedLabels.insert(lookupFloat->getLabel());
726 userLabels.erase(lookupFloat->getLabel());
727 break;
728 }
729
730 if ((util.anyLabelsToProcess(lookupFloat->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
731 string saveLabel = lookupFloat->getLabel();
732
733 delete lookupFloat;
734 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
735
736 processedLabels.insert(lookupFloat->getLabel());
737 userLabels.erase(lookupFloat->getLabel());
738
739 //restore real lastlabel to save below
740 lookupFloat->setLabels(saveLabel);
741 break;
742 }
743
744 lastLabel = lookupFloat->getLabel();
745
746 //get next line to process
747 //prevent memory leak
748 delete lookupFloat;
749 lookupFloat = input.getSharedRAbundFloatVectors();
750 }
751
752
753 if (m->getControl_pressed()) { return 0; }
754
755 //output error messages about any remaining user labels
756 bool needToRun = false;
757 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
758 m->mothurOut("Your file does not include the label " + *it);
759 if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }
760 else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); }
761 }
762
763 //run last label if you need to
764 if (needToRun ) {
765 delete lookupFloat;
766 lookupFloat = input.getSharedRAbundFloatVectors();
767 }
768
769 return lookupFloat;
770 }
771 catch(exception& e) {
772 m->errorOut(e, "CreateDatabaseCommand", "getRelabund");
773 exit(1);
774 }
775 }
776
777
778 //**********************************************************************************************************************
779
780
781