1 //
2 //  createdatabasecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/28/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8 
9 #include "createdatabasecommand.h"
10 #include "inputdata.h"
11 
12 //**********************************************************************************************************************
setParameters()13 vector<string> CreateDatabaseCommand::setParameters(){
14 	try {
15 		CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none","database",false,false,true); parameters.push_back(pfasta);
16 		CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
18 		CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
19 		CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy);
20 		CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist);
21         CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared);
22         CommandParameter prelabund("relabund", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(prelabund);
23 		CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
24 		CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
25         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26 		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 
28         abort = false; calledHelp = false;
29 
30         vector<string> tempOutNames;
31         outputTypes["database"] = tempOutNames;
32 
33 		vector<string> myArray;
34 		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
35 		return myArray;
36 	}
37 	catch(exception& e) {
38 		m->errorOut(e, "CreateDatabaseCommand", "setParameters");
39 		exit(1);
40 	}
41 }
42 //**********************************************************************************************************************
getHelpString()43 string CreateDatabaseCommand::getHelpString(){
44 	try {
45 		string helpString = "";
46 		helpString += "The create.database command reads a list, shared or relabund file, *.cons.taxonomy, and optional *.rep.fasta, *.rep.names, groupfile, or count file and creates a database file.\n";
47 		helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. List or shared and constaxonomy are required.\n";
48         helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
49         helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
50         helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n";
51         helpString += "The constaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n";
52         helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
53         helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
54         helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
55         helpString += "The create.database command should be in the following format: \n";
56 		helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";
57 		helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, repname=final.an.0.03.rep.names, list=final.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
58 		return helpString;
59 	}
60 	catch(exception& e) {
61 		m->errorOut(e, "CreateDatabaseCommand", "getHelpString");
62 		exit(1);
63 	}
64 }
65 //**********************************************************************************************************************
getOutputPattern(string type)66 string CreateDatabaseCommand::getOutputPattern(string type) {
67     try {
68         string pattern = "";
69 
70         if (type == "database") {  pattern = "[filename],database"; }
71         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true);  }
72 
73         return pattern;
74     }
75     catch(exception& e) {
76         m->errorOut(e, "CreateDatabaseCommand", "getOutputPattern");
77         exit(1);
78     }
79 }
80 //**********************************************************************************************************************
CreateDatabaseCommand(string option)81 CreateDatabaseCommand::CreateDatabaseCommand(string option) : Command()  {
82 	try{
83 		//allow user to run help
84 		if (option == "help") {
85 			help(); abort = true; calledHelp = true;
86 		}else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87         else if(option == "category") {  abort = true; calledHelp = true;  }
88 		else {
89 			OptionParser parser(option, setParameters());
90 			map<string, string> parameters = parser.getParameters();
91 
92 			ValidParameters validParameter;
93 
94 
95 			//check for required parameters
96 			listfile = validParameter.validFile(parameters, "list");
97 			if (listfile == "not found") {	listfile = "";			}
98 			else if (listfile == "not open") { listfile = ""; abort = true; }
99 			else { current->setListFile(listfile); }
100 
101             sharedfile = validParameter.validFile(parameters, "shared");
102 			if (sharedfile == "not found") {	sharedfile = "";			}
103 			else if (sharedfile == "not open") { sharedfile = ""; abort = true; }
104 			else { current->setSharedFile(sharedfile); }
105 
106             relabundfile = validParameter.validFile(parameters, "relabund");
107             if (relabundfile == "not found") {	relabundfile = "";			}
108             else if (relabundfile == "not open") { relabundfile = ""; abort = true; }
109             else { current->setRelAbundFile(relabundfile); }
110 
111             if ((sharedfile == "") && (listfile == "") && (relabundfile == "")) {
112 				//is there are current file available for either of these?
113 				//give priority to list, then shared, then relabund
114 				listfile = current->getListFile();
115 				if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter.\n");  }
116 				else {
117 					sharedfile = current->getSharedFile();
118 					if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n");  }
119 					else {
120                         relabundfile = current->getRelAbundFile();
121                         if (relabundfile != "") {  m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n");  }
122                         else {
123                             m->mothurOut("[ERROR]: No valid current files. You must provide a shared, list or relabund file before you can use the create.database command.\n");  abort = true;
124                         }
125 					}
126 				}
127 			}
128 			else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: relabund, shared or list.\n"); abort = true; }
129 
130             if (sharedfile != "") { if (outputdir == "") { outputdir = util.hasPath(sharedfile); } }
131             else if (listfile != ""){ if (outputdir == "") { outputdir = util.hasPath(listfile); } }
132             else { if (outputdir == "") { outputdir = util.hasPath(relabundfile); } }
133 
134 			contaxonomyfile = validParameter.validFile(parameters, "constaxonomy");
135 			if (contaxonomyfile == "not found") {  //if there is a current list file, use it
136                contaxonomyfile = "";  m->mothurOut("The constaxonomy parameter is required, aborting.\n");  abort = true;
137 			}
138 			else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
139 
140             repfastafile = validParameter.validFile(parameters, "repfasta");
141 			if (repfastafile == "not found") { repfastafile = ""; }
142 			else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
143 
144             repnamesfile = validParameter.validFile(parameters, "repname");
145 			if (repnamesfile == "not found") {  repnamesfile = "";  			}
146 			else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
147 
148             if ((repnamesfile != "") && (repfastafile == "")) { m->mothurOut("[ERROR]: You must provide a repfasta file if you are using a repnames file.\n");
149                 abort = true; }
150 
151             countfile = validParameter.validFile(parameters, "count");
152 			if (countfile == "not found") {  countfile = "";  			}
153 			else if (countfile == "not open") { countfile = ""; abort = true; }
154 
155 			groupfile = validParameter.validFile(parameters, "group");
156 			if (groupfile == "not open") { groupfile = ""; abort = true; }
157 			else if (groupfile == "not found") { groupfile = ""; }
158 			else { current->setGroupFile(groupfile); }
159 
160 			//check for optional parameter and set defaults
161 			// ...at some point should added some additional type checking...
162             label = validParameter.valid(parameters, "label");
163 			if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your file.\n");}
164         }
165 	}
166 	catch(exception& e) {
167 		m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
168 		exit(1);
169 	}
170 }
171 //**********************************************************************************************************************
execute()172 int CreateDatabaseCommand::execute(){
173 	try {
174 
175 		if (abort) { if (calledHelp) { return 0; }  return 2;	}
176 
177         //taxonomies holds the taxonomy info for each Otu
178         //classifyOtuSizes holds the size info of each Otu to help with error checking
179         vector<string> taxonomies;
180         vector<string> otuLabels;
181         vector<int> classifyOtuSizes = readTax(taxonomies, otuLabels);
182 
183         if (m->getControl_pressed()) { return 0; }
184 
185         vector<Sequence> seqs;
186         vector<int> repOtusSizes;
187         if (repfastafile != "") { repOtusSizes = readFasta(seqs); }
188 
189         if (m->getControl_pressed()) { return 0; }
190 
191         //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
192         map<string, string> repNames;
193         map<string, int> nameMap;
194         int numUniqueNamesFile = 0;
195         CountTable ct;
196         if (repnamesfile != "") {
197             numUniqueNamesFile = util.readNames(repnamesfile, repNames, 1);
198             //the repnames file does not have the same order as the list file bins so we need to sort and reassemble for the search below
199             map<string, string> tempRepNames;
200             for (map<string, string>::iterator it = repNames.begin(); it != repNames.end();) {
201                 string bin = it->first;
202                 vector<string> temp;
203                 util.splitAtChar(bin, temp, ',');
204                 sort(temp.begin(), temp.end());
205                 bin = "";
206                 for (int i = 0; i < temp.size()-1; i++) {
207                     bin += temp[i] + ',';
208                 }
209                 bin += temp[temp.size()-1];
210                 tempRepNames[bin] = it->second;
211                 repNames.erase(it++);
212             }
213             repNames = tempRepNames;
214         }else if (countfile != ""){
215             ct.readTable(countfile, true, false);
216             numUniqueNamesFile = ct.getNumUniqueSeqs();
217             nameMap = ct.getNameMap();
218         }
219 
220         if (m->getControl_pressed()) { return 0; }
221 
222         if (repfastafile != "") {
223 
224             //are there the same number of otus in the fasta and name files
225             if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->setControl_pressed(true); }
226 
227             //are there the same number of OTUs in the tax and fasta file
228             if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->setControl_pressed(true); }
229 
230             if (m->getControl_pressed()) { return 0; }
231 
232             //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
233             for (int i = 0; i < classifyOtuSizes.size(); i++) {
234                 if (classifyOtuSizes[i] != repOtusSizes[i]) {
235                     m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ".  These should match. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true);
236                 }
237             }
238         }
239 
240         if (m->getControl_pressed()) { return 0; }
241 
242 
243         map<string, string> variables;
244         if (listfile != "") {  variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); }
245         else if (sharedfile != "") { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); }
246         else {  variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(relabundfile)); }
247         string outputFileName = getOutputFileName("database", variables);
248         outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
249 
250         ofstream out;
251         util.openOutputFile(outputFileName, out);
252 
253         string header = "OTUNumber\tAbundance";
254 
255 
256         if (listfile != "") {
257             //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
258             ListVector* list = getList();
259 
260             if (otuLabels.size() != list->getNumBins()) {
261                 m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true);  }
262 
263             if (m->getControl_pressed()) { delete list; return 0; }
264 
265             GroupMap* groupmap = NULL;
266             if (groupfile != "") {
267                 groupmap = new GroupMap(groupfile);
268                 groupmap->readMap();
269             }
270 
271             if (m->getControl_pressed()) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
272 
273             if (groupfile != "") {
274                 header = "OTUNumber";
275                 for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += '\t' + (groupmap->getNamesOfGroups())[i]; }
276             }else if (countfile != "") {
277                 if (ct.hasGroupInfo()) {
278                     header = "OTUNumber";
279                     for (int i = 0; i < ct.getNamesOfGroups().size(); i++) { header += '\t' + (ct.getNamesOfGroups())[i]; }
280                 }
281             }
282 
283             if (repfastafile != "") {  header += "\trepSeqName\trepSeq";  }
284             header += "\tOTUConTaxonomy";
285             out << header << endl;
286 
287             vector<string> binLabels = list->getLabels();
288             for (int i = 0; i < list->getNumBins(); i++) {
289 
290                 int index = findIndex(otuLabels, binLabels[i]);
291                 if (index == -1) {  m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); }
292 
293                 if (m->getControl_pressed()) { break; }
294 
295                 out << otuLabels[index];
296 
297                 vector<string> binNames;
298                 string bin = list->get(i);
299                 util.splitAtComma(bin, binNames);
300 
301                 string seqRepName = "";
302                 int numSeqsRep = binNames.size();
303 
304                 if (repnamesfile != "") {
305                     sort(binNames.begin(), binNames.end());
306                     bin = "";
307                     for (int j = 0; j < binNames.size()-1; j++) {
308                         bin += binNames[j] + ',';
309                     }
310                     bin += binNames[binNames.size()-1];
311                     map<string, string>::iterator it = repNames.find(bin);
312 
313                     if (it == repNames.end()) {
314                         m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true);   break;
315                     }else { seqRepName = it->second;  numSeqsRep = binNames.size(); }
316 
317                     //sanity check
318                     if (binNames.size() != classifyOtuSizes[index]) {
319                         m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->setControl_pressed(true);   break;
320                     }
321                 }else if ((countfile != "") && (repfastafile != "")) {
322                     //find rep sequence in bin
323                     for (int j = 0; j < binNames.size(); j++) {
324                         map<string, int>::iterator itNameMap = nameMap.find(binNames[j]); //if you are in the counttable you must be the rep. because get.oturep with a countfile only includes the rep sequences in the rep.count file.
325                         if (itNameMap != nameMap.end()) {
326                             seqRepName = itNameMap->first;
327                             numSeqsRep = itNameMap->second;
328                             j += binNames.size(); //exit loop
329                         }
330                     }
331 
332                     if (seqRepName == "") {
333                         m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true);   break;
334                     }
335 
336                     if (numSeqsRep != classifyOtuSizes[i]) {
337                         m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->setControl_pressed(true);   break;
338                     }
339                 }
340 
341                 //output abundances
342                 if (groupfile != "") {
343                     string groupAbunds = "";
344                     map<string, int> counts;
345                     //initialize counts to 0
346                     for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
347 
348                     //find abundances by group
349                     bool error = false;
350                     for (int j = 0; j < binNames.size(); j++) {
351                         string group = groupmap->getGroup(binNames[j]);
352                         if (group == "not found") {
353                             m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
354                             error = true;
355                         }else { counts[group]++; }
356                     }
357 
358                     //output counts
359                     for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << '\t' << counts[(groupmap->getNamesOfGroups())[j]];  }
360 
361                     if (error) { m->setControl_pressed(true); }
362                 }else if ((countfile != "") && (repfastafile != "")) {
363                     if (ct.hasGroupInfo()) {
364                         vector<int> groupCounts = ct.getGroupCounts(seqRepName);
365                         for (int j = 0; j < groupCounts.size(); j++) { out << '\t' << groupCounts[j];  }
366                     }else { out << '\t' << numSeqsRep; }
367                 }else if ((countfile != "") && (repfastafile == "")) {
368                     if (ct.hasGroupInfo()) {
369                         vector<int> groupTotals; groupTotals.resize(ct.getNumGroups(), 0);
370                         for (int j = 0; j < binNames.size(); j++) {
371                             vector<int> groupCounts = ct.getGroupCounts(binNames[j]);
372                             for (int k = 0; k < groupCounts.size(); k++) { groupTotals[k] += groupCounts[k]; }
373                         }
374                         for (int j = 0; j < groupTotals.size(); j++) { out << '\t' << groupTotals[j];  }
375                     }else { out << '\t' << numSeqsRep; }
376                 }else { out << '\t' << numSeqsRep; }
377 
378                 //output repSeq
379                 if (repfastafile != "") { out << '\t' << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; }
380                 else { out << '\t' << taxonomies[index] << endl; }
381             }
382 
383 
384             delete list;
385             if (groupfile != "") { delete groupmap; }
386 
387         }else if (sharedfile != "") {
388             SharedRAbundVectors* lookup = getShared();
389             vector<string> namesOfGroups = lookup->getNamesGroups();
390 
391             header = "OTUNumber";
392             for (int i = 0; i < namesOfGroups.size(); i++) { header += '\t' + namesOfGroups[i]; }
393             if (repfastafile != "") {  header += "\trepSeqName\trepSeq";  }
394             header += "\tOTUConTaxonomy";
395             out << header << endl;
396 
397             vector<string> currentLabels = lookup->getOTUNames();
398             for (int h = 0; h < lookup->getNumBins(); h++) {
399 
400                 if (m->getControl_pressed()) { break; }
401 
402                 int index = findIndex(otuLabels, currentLabels[h]);
403                 if (index == -1) {  m->mothurOut("[ERROR]: " + currentLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); }
404 
405                 if (m->getControl_pressed()) { break; }
406 
407                 out << otuLabels[index];
408 
409                 int totalAbund = 0;
410                 for (int i = 0; i < lookup->size(); i++) {
411                     int abund = lookup->get(h, namesOfGroups[i]);
412                     totalAbund += abund;
413                     out  << '\t' << abund;
414                 }
415 
416                 //output repSeq
417                 if (repfastafile != "") { out << '\t' << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; }
418                 else { out << '\t' << taxonomies[index] << endl; }
419             }
420         }else { //relabund
421             SharedRAbundFloatVectors* lookup = getRelabund();
422             vector<string> namesOfGroups = lookup->getNamesGroups();
423 
424             header = "OTUNumber";
425             for (int i = 0; i < namesOfGroups.size(); i++) { header += '\t' + namesOfGroups[i]; }
426             if (repfastafile != "") {  header += "\trepSeqName\trepSeq";  }
427             header += "\tOTUConTaxonomy";
428             out << header << endl;
429 
430             vector<string> currentLabels = lookup->getOTUNames();
431             for (int h = 0; h < lookup->getNumBins(); h++) {
432 
433                 if (m->getControl_pressed()) { break; }
434 
435                 int index = findIndex(otuLabels, currentLabels[h]);
436                 if (index == -1) {  m->mothurOut("[ERROR]: " + currentLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); }
437 
438                 if (m->getControl_pressed()) { break; }
439 
440                 out << otuLabels[index];
441 
442                 float totalAbund = 0;
443                 for (int i = 0; i < lookup->size(); i++) {
444                     float abund = lookup->get(h, namesOfGroups[i]);
445                     totalAbund += abund;
446                     out  << '\t' << abund;
447                 }
448 
449                 //output repSeq
450                 if (repfastafile != "") { out << '\t' << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; }
451                 else { out << '\t' << taxonomies[index] << endl; }
452             }
453 
454         }
455         out.close();
456         if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; }
457 
458         m->mothurOut("\nOutput File Names: \n");
459 		m->mothurOut(outputFileName); m->mothurOutEndLine();
460 		m->mothurOutEndLine();
461 
462         return 0;
463 
464     }
465 	catch(exception& e) {
466 		m->errorOut(e, "CreateDatabaseCommand", "execute");
467 		exit(1);
468 	}
469 }
470 //**********************************************************************************************************************
findIndex(vector<string> & otuLabels,string label)471 int CreateDatabaseCommand::findIndex(vector<string>& otuLabels, string label){
472 	try {
473         int index = -1;
474         for (int i = 0; i < otuLabels.size(); i++) {
475             if (util.isLabelEquivalent(otuLabels[i],label)) { index = i; break; }
476         }
477 		return index;
478     }
479 	catch(exception& e) {
480 		m->errorOut(e, "CreateDatabaseCommand", "findIndex");
481 		exit(1);
482 	}
483 }
484 //**********************************************************************************************************************
readTax(vector<string> & taxonomies,vector<string> & otuLabels)485 vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies, vector<string>& otuLabels){
486 	try {
487 
488         vector<int> sizes;
489 
490         ifstream in;
491         util.openInputFile(contaxonomyfile, in);
492 
493         //read headers
494         util.getline(in);
495 
496         while (!in.eof()) {
497 
498             if (m->getControl_pressed()) { break; }
499 
500             string otu = ""; string tax = "unknown";
501             int size = 0;
502 
503             in >> otu >> size; util.gobble(in);
504             tax = util.getline(in); util.gobble(in);
505 
506             sizes.push_back(size);
507             taxonomies.push_back(tax);
508             otuLabels.push_back(otu);
509         }
510         in.close();
511 
512         return sizes;
513     }
514 	catch(exception& e) {
515 		m->errorOut(e, "CreateDatabaseCommand", "readTax");
516 		exit(1);
517 	}
518 }
519 //**********************************************************************************************************************
readFasta(vector<Sequence> & seqs)520 vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
521 	try {
522 
523         vector<int> sizes;
524 
525         ifstream in;
526         util.openInputFile(repfastafile, in);
527 
528         set<int> sanity;
529         while (!in.eof()) {
530 
531             if (m->getControl_pressed()) { break; }
532 
533             string binInfo;
534             Sequence seq(in, binInfo, true);  util.gobble(in);
535 
536             //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
537             vector<string> info;
538             util.splitAtChar(binInfo, info, '|');
539             //if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->setControl_pressed(true); break;}
540 
541             int size = 0;
542             util.mothurConvert(info[1], size);
543 
544             int binNumber = 0;
545             string temp = "";
546             for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } }
547             util.mothurConvert(util.getSimpleLabel(temp), binNumber);
548             set<int>::iterator it = sanity.find(binNumber);
549             if (it != sanity.end()) {
550                 m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->setControl_pressed(true); break;
551             }else { sanity.insert(binNumber); }
552 
553             sizes.push_back(size);
554             seqs.push_back(seq);
555         }
556         in.close();
557 
558         return sizes;
559     }
560 	catch(exception& e) {
561 		m->errorOut(e, "CreateDatabaseCommand", "readFasta");
562 		exit(1);
563 	}
564 }
565 //**********************************************************************************************************************
getList()566 ListVector* CreateDatabaseCommand::getList(){
567 	try {
568 		InputData* input = new InputData(listfile, "list", nullVector);
569 		ListVector* list = input->getListVector();
570 		string lastLabel = list->getLabel();
571 
572 		if (label == "") { label = lastLabel; delete input; return list; }
573 
574 		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
575 		set<string> labels; labels.insert(label);
576 		set<string> processedLabels;
577 		set<string> userLabels = labels;
578 
579 		//as long as you are not at the end of the file or done wih the lines you want
580 		while((list != NULL) && (userLabels.size() != 0)) {
581 			if (m->getControl_pressed()) {  delete input; return list;  }
582 
583 			if(labels.count(list->getLabel()) == 1){
584 				processedLabels.insert(list->getLabel());
585 				userLabels.erase(list->getLabel());
586 				break;
587 			}
588 
589 			if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
590 				string saveLabel = list->getLabel();
591 
592 				delete list;
593 				list = input->getListVector(lastLabel);
594 
595 				processedLabels.insert(list->getLabel());
596 				userLabels.erase(list->getLabel());
597 
598 				//restore real lastlabel to save below
599 				list->setLabel(saveLabel);
600 				break;
601 			}
602 
603 			lastLabel = list->getLabel();
604 
605 			//get next line to process
606 			//prevent memory leak
607 			delete list;
608 			list = input->getListVector();
609 		}
610 
611 
612 		if (m->getControl_pressed()) { delete input; return list;  }
613 
614 		//output error messages about any remaining user labels
615 		bool needToRun = false;
616 		for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
617 			m->mothurOut("Your file does not include the label " + *it);
618             if (processedLabels.count(lastLabel) != 1)  { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true;  }
619 			else                                        { m->mothurOut(". Please refer to " + lastLabel + ".\n");               }
620 		}
621 
622 		//run last label if you need to
623 		if (needToRun )  {
624 			delete list;
625 			list = input->getListVector(lastLabel);
626 		}
627 
628 		delete input;
629 
630         return list;
631     }
632 	catch(exception& e) {
633 		m->errorOut(e, "CreateDatabaseCommand", "getList");
634 		exit(1);
635 	}
636 }
637 //**********************************************************************************************************************
getShared()638 SharedRAbundVectors* CreateDatabaseCommand::getShared(){
639 	try {
640 		InputData input(sharedfile, "sharedfile", nullVector);
641 		SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
642 		string lastLabel = lookup->getLabel();
643 
644 		if (label == "") { label = lastLabel; return lookup; }
645 
646 		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
647 		set<string> labels; labels.insert(label);
648 		set<string> processedLabels;
649 		set<string> userLabels = labels;
650 
651 		//as long as you are not at the end of the file or done wih the lines you want
652 		while((lookup != NULL) && (userLabels.size() != 0)) {
653 			if (m->getControl_pressed()) {  return lookup;  }
654 
655 			if(labels.count(lookup->getLabel()) == 1){
656 				processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
657 				break;
658 			}
659 
660 			if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
661 				string saveLabel = lookup->getLabel();
662 
663                 delete lookup;
664 				lookup = input.getSharedRAbundVectors(lastLabel);
665 
666 				processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
667 
668 				//restore real lastlabel to save below
669 				lookup->setLabels(saveLabel);
670 				break;
671 			}
672 
673 			lastLabel = lookup->getLabel();
674 
675 			//get next line to process
676 			//prevent memory leak
677 			delete lookup;
678 			lookup = input.getSharedRAbundVectors();
679 		}
680 
681 
682 		if (m->getControl_pressed()) { return lookup;  }
683 
684 		//output error messages about any remaining user labels
685 		bool needToRun = false;
686 		for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
687 			m->mothurOut("Your file does not include the label " + *it);
688             if (processedLabels.count(lastLabel) != 1)  { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true;  }
689 			else                                        { m->mothurOut(". Please refer to " + lastLabel + ".\n");               }
690 		}
691 
692 		//run last label if you need to
693 		if (needToRun )  {
694 			delete lookup;
695 			lookup = input.getSharedRAbundVectors(lastLabel);
696 		}
697 
698         return lookup;
699     }
700 	catch(exception& e) {
701 		m->errorOut(e, "CreateDatabaseCommand", "getShared");
702 		exit(1);
703 	}
704 }
705 //**********************************************************************************************************************
getRelabund()706 SharedRAbundFloatVectors* CreateDatabaseCommand::getRelabund(){
707     try {
708         InputData input(relabundfile, "relabund", nullVector);
709         SharedRAbundFloatVectors* lookupFloat = input.getSharedRAbundFloatVectors();
710         string lastLabel = lookupFloat->getLabel();
711 
712         if (label == "") { label = lastLabel;  return lookupFloat; }
713 
714         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
715         set<string> labels; labels.insert(label);
716         set<string> processedLabels;
717         set<string> userLabels = labels;
718 
719         //as long as you are not at the end of the file or done wih the lines you want
720         while((lookupFloat != NULL) && (userLabels.size() != 0)) {
721 
722             if (m->getControl_pressed()) {  return 0;  }
723 
724             if(labels.count(lookupFloat->getLabel()) == 1){
725                 processedLabels.insert(lookupFloat->getLabel());
726                 userLabels.erase(lookupFloat->getLabel());
727                 break;
728             }
729 
730             if ((util.anyLabelsToProcess(lookupFloat->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
731                 string saveLabel = lookupFloat->getLabel();
732 
733                 delete lookupFloat;
734                 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
735 
736                 processedLabels.insert(lookupFloat->getLabel());
737                 userLabels.erase(lookupFloat->getLabel());
738 
739                 //restore real lastlabel to save below
740                 lookupFloat->setLabels(saveLabel);
741                 break;
742             }
743 
744             lastLabel = lookupFloat->getLabel();
745 
746             //get next line to process
747             //prevent memory leak
748             delete lookupFloat;
749             lookupFloat = input.getSharedRAbundFloatVectors();
750         }
751 
752 
753         if (m->getControl_pressed()) { return 0;  }
754 
755         //output error messages about any remaining user labels
756         bool needToRun = false;
757         for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
758             m->mothurOut("Your file does not include the label " + *it);
759             if (processedLabels.count(lastLabel) != 1)  { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true;  }
760             else                                        { m->mothurOut(". Please refer to " + lastLabel + ".\n");               }
761         }
762 
763         //run last label if you need to
764         if (needToRun )  {
765             delete lookupFloat;
766             lookupFloat = input.getSharedRAbundFloatVectors();
767         }
768 
769         return lookupFloat;
770     }
771     catch(exception& e) {
772         m->errorOut(e, "CreateDatabaseCommand", "getRelabund");
773         exit(1);
774     }
775 }
776 
777 
778 //**********************************************************************************************************************
779 
780 
781