1 //
2 //  biomsimple.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 10/26/20.
6 //  Copyright © 2020 Schloss Lab. All rights reserved.
7 //
8 
9 #include "biomsimple.hpp"
10 #include "picrust.hpp"
11 
12 /**************************************************************************************************/
BiomSimple()13 BiomSimple::BiomSimple() : Biom("Biological Observation Matrix 1.0.0"){
14     try {
15 
16         matrixFormat = "sparse";
17     }
18     catch(exception& e) {
19         m->errorOut(e, "BiomSimple", "BiomSimple");
20         exit(1);
21     }
22 }
23 
24 /**************************************************************************************************/
BiomSimple(string fname,string l)25 BiomSimple::BiomSimple(string fname, string l) : Biom("Biological Observation Matrix 1.0.0"){
26     try {
27         label = l; matrixFormat = "sparse";
28         read(fname);
29     }
30     catch(exception& e) {
31         m->errorOut(e, "BiomSimple", "BiomSimple");
32         exit(1);
33     }
34 }
35 /**************************************************************************************************/
read(string fname)36 void BiomSimple::read(string fname){
37     try {
38 
39         /*{
40          "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
41          "format": "Biological Observation Matrix 0.9.1",
42          "format_url": "http://biom-format.org",
43          "type": "OTU table",
44          "generated_by": "mothur1.44.0",
45          "date": "Tue Apr 17 13:12:07 2020",
46 
47          rows represent OTUS
48          columns represent samples
49 
50          */
51 
52         ifstream in; util.openInputFile(fname, in);
53 
54         matrixFormat = ""; matrixElementType = "";
55         vector<string> otuNames;  vector<string> groupNames;
56         map<string, string> fileLines;
57         //vector<string> names;
58         int numOTUs, numCols;
59         bool hasTaxonomy;
60 
61         numOTUs = 0; numCols = 0; maxLevel = 0;
62         int shapeNumRows = 0; int shapeNumCols = 0;
63 
64         int countOpenBrace = 0; int countClosedBrace = 0;
65         int closeParen = 0; int openParen = -1; //account for opening brace
66         bool ignoreCommas = false; bool atComma = false;
67 
68         string line = "";
69         bool printHeaders = true;
70 
71         while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
72             if (m->getControl_pressed()) { break; }
73 
74             char c = in.get(); util.gobble(in);
75 
76             if (c == '[')               { countOpenBrace++;     }
77             else if (c == ']')          { countClosedBrace++;   }
78             else if (c == '{')          { openParen++;          }
79             else if (c == '}')          { closeParen++;         }
80             else if ((!ignoreCommas) && (c == ','))          { atComma = true;       }
81 
82             if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true;  }
83             else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false;  }
84             if (atComma && !ignoreCommas) {
85                 if (fileLines.size() == 0) { //clip first {
86                     line = line.substr(1);
87                 }
88                 string tag = getTag(line);
89                 fileLines[tag] = line;
90 
91                 line = "";
92                 atComma = false;
93                 ignoreCommas = false;
94 
95             }else {  line += c;  }
96 
97         }
98         if (line != "") {
99             line = line.substr(0, line.length()-1);
100             string tag = getTag(line);
101             fileLines[tag] = line;
102         }
103         in.close();
104 
105         //check for required fields
106         map<string, string>::iterator it;
107         it = fileLines.find("type");
108         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
109         else {
110             string thisLine = it->second;
111             tableType = getTag(thisLine);
112         }
113 
114         if (m->getControl_pressed()) { return; }
115 
116         it = fileLines.find("matrix_type");
117         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
118         else {
119             string thisLine = it->second;
120             matrixFormat = getTag(thisLine);
121             if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->setControl_pressed(true); }
122         }
123 
124         if (m->getControl_pressed()) { return; }
125 
126         it = fileLines.find("matrix_element_type");
127         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
128         else {
129             string thisLine = it->second;
130             matrixElementType = getTag(thisLine);
131             if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->setControl_pressed(true); }
132         }
133 
134         if (m->getControl_pressed()) { return; }
135 
136         map<string, string> otuTaxonomies;
137         it = fileLines.find("rows");
138         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
139         else {
140             maxLevel = 0;
141             string thisLine = it->second;
142 
143             bool hasTaxonomy = false;
144             vector< vector<string> > results = extractTaxonomyData(thisLine, numOTUs, hasTaxonomy);
145 
146             if ((tableType == "Taxon table") || (tableType == "Taxontable")) {
147                 vector<string> taxonomies = results[0];
148 
149                 //create OTU names
150                 string snumBins = toString(numOTUs);
151                 for (int i = 0; i < numOTUs; i++) {
152 
153                     //if there is a bin label use it otherwise make one
154                     string binLabel = "OTU";
155                     string sbinNumber = toString(i+1);
156                     if (sbinNumber.length() < snumBins.length()) {
157                         int diff = snumBins.length() - sbinNumber.length();
158                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
159                     }
160                     binLabel += sbinNumber;
161 
162                     otuNames.push_back(binLabel);
163                     otuTaxonomies[otuNames[i]] = taxonomies[i];
164                 }
165 
166             }else{
167                 otuNames = results[0];
168                 if (hasTaxonomy) {
169                     for (int i = 0; i < otuNames.size(); i++) { otuTaxonomies[otuNames[i]] = results[1][i]; }
170                 }
171             }
172         }
173 
174         if (m->getControl_pressed()) {  return; }
175 
176         it = fileLines.find("columns");
177         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
178         else {
179             string thisLine = it->second;
180 
181             //read sample names
182             maxLevel = 0;
183             bool hasTaxonomy = false;
184             vector< vector<string> > results = extractTaxonomyData(thisLine, numCols, hasTaxonomy);
185             groupNames = results[0];
186             if (hasTaxonomy) {
187                 for (int i = 0; i < results[1].size(); i++) {
188                     if (m->getControl_pressed()) { break; }
189 
190                     string completeTax = util.addUnclassifieds(results[1][i], maxLevel, false);
191                     groupTaxonomies[results[0][i]] = completeTax;
192                 }
193             }
194         }
195 
196         if (m->getControl_pressed()) {  return; }
197 
198         it = fileLines.find("shape");
199         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
200         else {
201             string thisLine = it->second;
202             getDims(thisLine, shapeNumRows, shapeNumCols);
203 
204             //check shape
205             if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->setControl_pressed(true); }
206 
207             if (shapeNumRows != numOTUs) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numOTUs) + " rows.\n"); m->setControl_pressed(true); }
208         }
209 
210         if (m->getControl_pressed()) {  return; }
211 
212         it = fileLines.find("data");
213         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
214         else {
215             string thisLine = it->second;
216 
217             if (shared != NULL) { delete shared; }
218 
219             shared = extractOTUData(thisLine, groupNames, numOTUs);
220             shared->setOTUNames(otuNames);
221             m->mothurOut("\n"+shared->getLabel()+"\n");
222 
223             if (otuTaxonomies.size() != 0) {
224                 //sanity check
225                 if ((shared->getNumBins() == otuTaxonomies.size()) && (shared->getNumBins() == numOTUs)) {
226 
227                     for (int i = 0; i < shared->getNumBins(); i++) {
228                         if (m->getControl_pressed()) { break; }
229 
230                         string thisOTUsTax = otuTaxonomies[otuNames[i]];
231                         string newTax = util.addUnclassifieds(thisOTUsTax, maxLevel, false);
232                         Taxonomy thisOTUsTaxonomy(otuNames[i], newTax, shared->getOTUTotal(i));
233                         consTax.push_back(thisOTUsTaxonomy);
234                     }
235                 }
236             }
237         }
238 
239     }
240     catch(exception& e) {
241         m->errorOut(e, "BiomSimple", "read");
242         exit(1);
243     }
244 }
245 //**********************************************************************************************************************
246 //designed for things like "type": "OTU table", returns type
getTag(string & line)247 string BiomSimple::getTag(string& line) {
248     try {
249         bool inQuotes = false;
250         string tag = "";
251         char c = '\"';
252 
253         for (int i = 0; i < line.length(); i++) {
254 
255             //you want to ignore any ; until you reach the next '
256             if ((line[i] == c) && (!inQuotes)) {  inQuotes = true;  }
257             else if ((line[i] == c) && (inQuotes)) {
258                 inQuotes= false;
259                 line = line.substr(i+1);
260                 return tag;
261             }
262 
263             if (inQuotes) {  if (line[i] != c) { tag += line[i]; }  }
264         }
265 
266         return tag;
267     }
268     catch(exception& e) {
269         m->errorOut(e, "BiomSimple", "getTag");
270         exit(1);
271     }
272 }
273 //**********************************************************************************************************************
274 //readRows
extractTaxonomyData(string line,int & numOTUs,bool & hasTaxonomy)275 vector< vector<string> > BiomSimple::extractTaxonomyData(string line, int& numOTUs, bool& hasTaxonomy) {
276     try {
277         /*"rows":[
278          {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
279          {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
280          ...
281 
282          "rows":[{"id": "k__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae", "metadata": null},
283          {"id": "k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae", "metadata": null}
284          ....
285 
286          make look like above
287 
288 
289          ],*/
290 
291         vector< vector<string> > results; results.resize(2);
292         int countOpenBrace = 0; int countClosedBrace = 0; int openParen = 0; int closeParen = 0;
293         string nextRow = "";
294         bool end = false; bool allBlank = true;
295 
296         for (int i = 0; i < line.length(); i++) {
297 
298             if (m->getControl_pressed()) { return results; }
299 
300             if (line[i] == '[')         { countOpenBrace++;     }
301             else if (line[i] == ']')    { countClosedBrace++;   }
302             else if (line[i] == '{')    { openParen++;          }
303             else if (line[i] == '}')    { closeParen++;         }
304             else if (openParen != 0)    { nextRow += line[i];   }  //you are reading the row info
305 
306             //you have reached the end of the rows info
307             if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
308             if ((openParen == closeParen) && (closeParen != 0)) { //process row
309                 numOTUs++;
310 
311                 vector<string> result = getNamesAndTaxonomies(nextRow);
312                 if (result.size() != 0) { results[0].push_back(result[0]); results[1].push_back(result[1]); if (result[1] != "") { allBlank = false; } }
313 
314                 nextRow = ""; openParen = 0; closeParen = 0;
315             }
316         }
317 
318         if (allBlank) { hasTaxonomy = false; }
319         else { hasTaxonomy = true; }
320 
321         return results;
322     }
323     catch(exception& e) {
324         m->errorOut(e, "BiomSimple", "extractTaxonomyData");
325         exit(1);
326     }
327 }
328 //**********************************************************************************************************************
329 //items[0] = id, items[1] = taxonomy, if items[2] then thats the taxonomy bootstrap values
getNamesAndTaxonomies(string line)330 vector<string> BiomSimple::getNamesAndTaxonomies(string line) {
331     try {
332         /*"rows":[
333          {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
334          {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
335          ...
336 
337          "rows":[{"id": "k__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae", "metadata": null},
338          {"id": "k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae", "metadata": null}
339          ....
340 
341          make look like above
342 
343 
344          ],*/
345 
346         vector<string> results;
347         if (line == "") { return results; }
348 
349         int pos = line.find_first_of(',');
350         if (pos == string::npos) { //some kind of error?? we expect at least metadata : null, just grab name
351             results.push_back(getName(line)); results.push_back("");
352         }else {
353             string value;
354             util.splitAtComma(value, line);  //value hold name portion ("id":"Otu01") line holds rest
355             results.push_back(getName(value));
356 
357             string taxonomy = ""; string bootstrap = "";
358             int pos = line.find("taxonomy");
359             if (pos != string::npos) { //no taxonomy info given
360                 int pos2 = line.find("bootstrap");
361                 if (pos2 != string::npos) { //no taxonomy info given
362                     taxonomy = line.substr(pos, (pos2-pos));
363                     taxonomy = taxonomy.substr(0, taxonomy.find_last_of(','));
364                     bootstrap = line.substr(pos2);
365                 }else {
366                     taxonomy = line.substr(pos);
367                 }
368             }
369 
370             results.push_back(getTaxonomy(taxonomy, bootstrap));
371         }
372 
373         return results;
374     }
375     catch(exception& e) {
376         m->errorOut(e, "BiomSimple", "getNamesAndTaxonomies");
377         exit(1);
378     }
379 }
380 //**********************************************************************************************************************
getName(string line)381 string BiomSimple::getName(string line) {
382     try {
383         vector<string> nameItems;
384         util.splitAtChar(line, nameItems, ':'); //split part we want containing the ids
385         string name = nameItems[1];
386 
387         //remove "" if needed
388         int pos = name.find("\"");
389         if (pos != string::npos) {
390             string newName = "";
391             for (int k = 0; k < name.length(); k++) {
392                 if (name[k] != '\"') { newName += name[k]; }
393             }
394             name = newName;
395         }
396 
397         return name;
398     }
399     catch(exception& e) {
400         m->errorOut(e, "BiomSimple", "getName");
401         exit(1);
402     }
403 }
404 //**********************************************************************************************************************
405 //"taxonomy":"Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified",
406 //"bootstrap":100, 100, 100, 100, 100, 100
getTaxonomy(string taxonomy,string bootstrap)407 string BiomSimple::getTaxonomy(string taxonomy, string bootstrap) {
408     try {
409         vector<string> results;
410 
411         if (taxonomy != "") {
412             vector<string> taxItems;
413             util.splitAtChar(taxonomy, taxItems, ':'); //split part we want containing the ids
414             string taxons = taxItems[1];
415 
416             string taxon;
417             while((taxons.find_first_of(',') != -1)) {
418                 if (m->getControl_pressed()) {break;}
419                 util.splitAtComma(taxon, taxons);
420                 results.push_back(taxon);
421             }
422             if (!util.stringBlank(taxons)) { results.push_back(taxons); }
423         }
424 
425         if (bootstrap != "") {
426             vector<string> bootItems;
427             util.splitAtChar(bootstrap, bootItems, ':'); //split part we want containing the ids
428             string bootValues = bootItems[1];
429 
430             string bootValue;
431             int i = 0;
432             while((bootValues.find_first_of(',') != -1)) {
433                 if (m->getControl_pressed()) {break;}
434                 util.splitAtComma(bootValue, bootValues);
435                 results[i]+="("+bootValue+")";
436                 i++;
437             }
438             if (!util.stringBlank(bootValues)) { results[i]+="("+bootValues+")"; }
439         }
440 
441         string result = "";
442         for (int i = 0; i < results.size(); i++) {
443             if (m->getControl_pressed()) {result = ""; break;}
444             result += results[i] + ";";
445         }
446 
447         if (results.size() > maxLevel) { maxLevel = results.size(); }
448 
449         return result;
450     }
451     catch(exception& e) {
452         m->errorOut(e, "BiomSimple", "getTaxonomy");
453         exit(1);
454     }
455 }
456 //**********************************************************************************************************************
getDims(string line,int & shapeNumRows,int & shapeNumCols)457 void BiomSimple::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
458     try {
459         //get shape
460         bool inBar = false;
461         string num = "";
462 
463         for (int i = 0; i < line.length(); i++) {
464 
465             //you want to ignore any ; until you reach the next '
466             if ((line[i] == '[') && (!inBar)) {  inBar = true; i++;  if (!(i < line.length())) { break; } }
467             else if ((line[i] == ']') && (inBar)) {
468                 inBar= false;
469                 util.mothurConvert(num, shapeNumCols);
470                 break;
471             }
472 
473             if (inBar) {
474                 if (line[i] == ',') {
475                     util.mothurConvert(num, shapeNumRows);
476                     num = "";
477                 }else { if (!isspace(line[i])) { num += line[i]; }  }
478             }
479         }
480     }
481     catch(exception& e) {
482         m->errorOut(e, "BiomSimple", "getDims");
483         exit(1);
484     }
485 }
486 //**********************************************************************************************************************
487 //readData
extractOTUData(string line,vector<string> & groupNames,int numOTUs)488 SharedRAbundVectors* BiomSimple::extractOTUData(string line, vector<string>& groupNames, int numOTUs) {
489     try {
490         SharedRAbundVectors* lookup = new SharedRAbundVectors();
491 
492         //creates new sharedRAbunds
493         for (int i = 0; i < groupNames.size(); i++) {
494             SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
495             temp->setLabel(label);
496             temp->setGroup(groupNames[i]);
497             lookup->push_back(temp);
498         }
499 
500         if (matrixElementType == "float") {
501 
502             if (sharedFloat != NULL) { delete sharedFloat; }
503             sharedFloat = new SharedRAbundFloatVectors();
504 
505             //creates new sharedRAbunds
506             for (int i = 0; i < groupNames.size(); i++) {
507                 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(numOTUs); //sets all abunds to 0
508                 temp->setLabel(label);
509                 temp->setGroup(groupNames[i]);
510                 sharedFloat->push_back(temp);
511             }
512         }
513 
514         bool dataStart = false;
515         bool inBrackets = false;
516         string num = "";
517         vector<int> nums;
518         vector<float> numsFloat;
519         int otuCount = 0;
520         for (int i = 0; i < line.length(); i++) {
521 
522             if (m->getControl_pressed()) { return lookup; }
523 
524             //look for opening [ to indicate data is starting
525             if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++;  if (!(i < line.length())) { break; } }
526             else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
527 
528             if (dataStart) {
529                 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++;  if (!(i < line.length())) { break; } }
530                 else if ((line[i] == ']') && (inBrackets)) {
531                     inBrackets = false;
532                     int temp;
533                     float temp2;
534                     if (matrixElementType == "float") {
535                         util.mothurConvert(num, temp2);
536                         numsFloat.push_back(temp2);
537                         temp = (int)temp2;
538                     }else { util.mothurConvert(num, temp); }
539 
540                     nums.push_back(temp);
541                     num = "";
542 
543                     //save info to vectors
544                     if (matrixFormat == "dense") {
545 
546                         //sanity check
547                         if (nums.size() != lookup->size()) { m->mothurOut("[ERROR]: trouble parsing OTU data.  OTU " + toString(otuCount) + " causing errors.\n"); m->setControl_pressed(true); }
548 
549                         //set abundances for this otu
550                         //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
551                         for (int j = 0; j < groupNames.size(); j++) { lookup->set(otuCount, nums[j], groupNames[j]); }
552 
553 
554                         if (matrixElementType == "float") {
555                             //sanity check
556                             if (numsFloat.size() != sharedFloat->size()) { m->mothurOut("[ERROR]: trouble parsing OTU data.  OTU " + toString(otuCount) + " causing errors.\n"); m->setControl_pressed(true); }
557 
558                             //set abundances for this otu
559                             //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
560                             for (int j = 0; j < groupNames.size(); j++) { sharedFloat->set(otuCount, numsFloat[j], groupNames[j]); }
561 
562                         }
563 
564                         otuCount++;
565 
566                     }else {
567                         //sanity check
568                         if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->setControl_pressed(true); }
569 
570                         //nums contains [otuNum, sampleNum, abundance]
571                         lookup->set(nums[0], nums[2], groupNames[nums[1]]);
572 
573                         if (matrixElementType == "float") {
574                             //nums contains [otuNum, sampleNum, abundance]
575                             sharedFloat->set(nums[0], numsFloat[2], groupNames[nums[1]]);
576                         }
577                     }
578                     nums.clear(); numsFloat.clear();
579                 }
580 
581                 if (inBrackets) {
582                     if (line[i] == ',') {
583                         float temp2;
584                         util.mothurConvert(num, temp2);
585                         numsFloat.push_back(temp2);
586                         nums.push_back((int)temp2);
587                         num = "";
588                     }else { if (!isspace(line[i])) { num += line[i]; }  }
589                 }
590             }
591         }
592 
593         return lookup;
594     }
595     catch(exception& e) {
596         m->errorOut(e, "BiomSimple", "extractOTUData");
597         exit(1);
598     }
599 }
600 //**********************************************************************************************************************
print(string filename,vector<string> sampleMetadata,Picrust * picrust)601 void BiomSimple::print(string filename, vector<string> sampleMetadata, Picrust* picrust) {
602     try {
603         vector<string> metadata = getMetaDataShared(picrust);
604         int numBins = shared->getNumBins();
605         int numSamples = shared->size();
606         vector<string> currentLabels = shared->getOTUNames();
607         vector<string> namesOfGroups = shared->getNamesGroups();
608 
609         if (m->getControl_pressed()) { return; }
610 
611         time_t rawtime; struct tm * timeinfo;
612         time ( &rawtime );
613         timeinfo = localtime ( &rawtime );
614         string dateString = asctime (timeinfo);
615         int pos = dateString.find('\n');
616         if (pos != string::npos) { dateString = dateString.substr(0, pos);}
617         string spaces = "      ";
618 
619         ofstream out; util.openOutputFile(filename, out);
620 
621         out << "{\n" + spaces + "\"id\":\"" + util.getSimpleName(sharedFileName) + "-" + label + "\",\n" + spaces + "\"format\": \"" + version + "\",\n" + spaces + "\"format_url\": \"" + formatURL + "\",\n";
622         out << spaces + "\"type\": \"" + tableType + " \",\n" + spaces + "\"generated_by\": \"" << mothurVersion << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
623 
624 
625 
626         //get row info
627         /*"rows":[
628                 {"id":"GG_OTU_1", "metadata":null},
629                 {"id":"GG_OTU_2", "metadata":null},
630                 {"id":"GG_OTU_3", "metadata":null},
631                 {"id":"GG_OTU_4", "metadata":null},
632                 {"id":"GG_OTU_5", "metadata":null}
633                 ],*/
634 
635         out << spaces + "\"rows\":[\n";
636         string rowFront = spaces + spaces + "{\"id\":\"";
637         string rowBack = "\", \"metadata\":";
638 
639         for (int i = 0; i < numBins-1; i++) {
640             if (m->getControl_pressed()) {  out.close(); return; }
641             out << rowFront << currentLabels[i] << rowBack << metadata[i] << "},\n";
642         }
643         out << rowFront << currentLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n";
644 
645         //get column info
646         /*"columns": [
647                     {"id":"Sample1", "metadata":null},
648                     {"id":"Sample2", "metadata":null},
649                     {"id":"Sample3", "metadata":null},
650                     {"id":"Sample4", "metadata":null},
651                     {"id":"Sample5", "metadata":null},
652                     {"id":"Sample6", "metadata":null}
653                     ],*/
654 
655         string colBack = "\", \"metadata\":";
656         out << spaces + "\"columns\":[\n";
657 
658         for (int i = 0; i < namesOfGroups.size()-1; i++) {
659             if (m->getControl_pressed()) {  out.close(); return; }
660             out << rowFront << namesOfGroups[i] << colBack << sampleMetadata[i] << "},\n";
661         }
662         out << rowFront << namesOfGroups[(namesOfGroups.size()-1)] << colBack << sampleMetadata[numSamples-1] << "}\n" + spaces + "],\n";
663 
664         out << spaces + "\"matrix_type\": \"" << matrixFormat << "\",\n" + spaces + "\"matrix_element_type\": \"" + matrixElementType + "\",\n";
665         out <<  spaces + "\"shape\": [" << numBins << "," << numSamples << "],\n";
666         out << spaces + "\"data\":  [";
667 
668         vector<string> dataRows;
669         if (matrixFormat == "sparse") {
670             /*"data":[[0,2,1],
671              [1,0,5],
672              [1,1,1],
673              [1,3,2],
674              [1,4,3],
675              [1,5,1],
676              [2,2,1],
677              [2,3,4],
678              [2,4,2],
679              [3,0,2],
680              [3,1,1],
681              [3,2,1],
682              [3,5,1],
683              [4,1,1],
684              [4,2,1]
685              ]*/
686 
687             if (matrixElementType == "int") {
688                 for (int i = 0; i < shared->getNumBins(); i++) {
689 
690                     if (m->getControl_pressed()) { out.close(); return; }
691                     vector<int> binAbunds = shared->getOTU(i);
692 
693                     for (int j = 0; j < binAbunds.size(); j++) {
694                         int abund = binAbunds[j];
695                         string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(abund) + "]";
696                         //only print non zero values
697                         if (abund != 0) { dataRows.push_back(binInfo); }
698                     }
699                 }
700             }else {
701                 float zero = 0.0;
702                 for (int i = 0; i < sharedFloat->getNumBins(); i++) {
703 
704                     if (m->getControl_pressed()) { out.close(); return; }
705                     vector<float> binAbunds = sharedFloat->getOTU(i);
706 
707                     for (int j = 0; j < binAbunds.size(); j++) {
708                         float abund = binAbunds[j];
709                         string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(abund) + "]";
710                         //only print non zero values
711                         if (!util.isEqual(abund,zero)) { dataRows.push_back(binInfo); }
712                     }
713                 }
714             }
715         }else {
716 
717             /* "matrix_type": "dense",
718              "matrix_element_type": "int",
719              "shape": [5,6],
720              "data":  [[0,0,1,0,0,0],
721              [5,1,0,2,3,1],
722              [0,0,1,4,2,0],
723              [2,1,1,0,0,1],
724              [0,1,1,0,0,0]]*/
725 
726             if (matrixElementType == "int") {
727                 for (int i = 0; i < shared->getNumBins(); i++) {
728 
729                     if (m->getControl_pressed()) { out.close(); return; }
730 
731                     string binInfo = "[";
732                     vector<int> binAbund = shared->getOTU(i);
733                     for (int j = 0; j < binAbund.size()-1; j++) {  binInfo += toString(binAbund[j]) + ","; }
734                     binInfo += toString(binAbund[binAbund.size()-1]) + "]";
735                     dataRows.push_back(binInfo);
736                 }
737             }else {
738                 for (int i = 0; i < sharedFloat->getNumBins(); i++) {
739 
740                     if (m->getControl_pressed()) { out.close(); return; }
741 
742                     string binInfo = "[";
743                     vector<float> binAbund = sharedFloat->getOTU(i);
744                     for (int j = 0; j < binAbund.size()-1; j++) {  binInfo += toString(binAbund[j]) + ","; }
745                     binInfo += toString(binAbund[binAbund.size()-1]) + "]";
746                     dataRows.push_back(binInfo);
747                 }
748             }
749         }
750 
751         for (int i = 0; i < dataRows.size()-1; i++) {
752             out << dataRows[i] << ",\n" + spaces  + spaces;
753         }
754         out << dataRows[dataRows.size()-1] << "]\n";
755 
756         out << "}\n";
757 
758     }
759     catch(exception& e) {
760         m->errorOut(e, "BiomSimple", "print");
761         exit(1);
762     }
763 }
764 //**********************************************************************************************************************
getMetaDataShared(Picrust * picrust)765 vector<string> BiomSimple::getMetaDataShared(Picrust* picrust){
766     try {
767         vector<string> metadata;
768 
769         if (consTax.size() == 0) { for (int i = 0; i < shared->getNumBins(); i++) {  metadata.push_back("null");  } }
770         else {
771 
772             if (shared == NULL) { m->setControl_pressed(true); return metadata; }
773 
774             //should the labels be Otu001 or PhyloType001
775             vector<string> otuNames = shared->getOTUNames();
776             string firstBin = otuNames[0];
777             string binTag = "Otu";
778             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
779 
780             map<string, string> labelTaxMap;
781             string snumBins = toString(otuNames.size());
782             for (int i = 0; i < consTax.size(); i++) {
783 
784                 if (m->getControl_pressed()) { return metadata; }
785 
786                 string thisOtuLabel = consTax[i].getName();
787 
788                 //if there is a bin label use it otherwise make one
789                 if (util.isContainingOnlyDigits(thisOtuLabel)) {
790                     string binLabel = binTag;
791                     string sbinNumber = thisOtuLabel;
792                     if (sbinNumber.length() < snumBins.length()) {
793                         int diff = snumBins.length() - sbinNumber.length();
794                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
795                     }
796                     binLabel += sbinNumber;
797                     binLabel = util.getSimpleLabel(binLabel);
798                     labelTaxMap[binLabel] = consTax[i].getConsTaxString();
799                 }else {
800                     map<string, string>::iterator it = labelTaxMap.find(util.getSimpleLabel(thisOtuLabel));
801                     if (it == labelTaxMap.end()) {
802                         labelTaxMap[util.getSimpleLabel(thisOtuLabel)] = consTax[i].getConsTaxString();
803                     }else {
804                         m->mothurOut("[ERROR]: Cannot add OTULabel " +  thisOtuLabel + " because it's simple label " + util.getSimpleLabel(consTax[i].getName()) + " has already been added and will result in downstream errors. Have you mixed mothur labels and non mothur labels? To make the files work well together and backwards compatible mothur treats 1, OTU01, OTU001, OTU0001 all the same. We do this by removing any non numeric characters and leading zeros. For eaxample: Otu000018 and OtuMY18 both map to 18.\n"); m->setControl_pressed(true);
805                     }
806                 }
807             }
808 
809             //sanity check for file issues - do you have the same number of bins in the shared and constaxonomy file
810             if (shared->getNumBins() != labelTaxMap.size()) {
811                 m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(shared->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata;
812             }
813 
814             //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus.  overwritting of otulabels is fine because constaxonomy only allows for one label to be processed.  If this assumption changes, could cause bug.
815             if (picrust != NULL) {
816                 picrust->setGGOTUIDs(labelTaxMap, shared);
817             }
818 
819             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
820 
821             //traverse the binLabels forming the metadata strings and saving them
822             //make sure to sanity check
823             map<string, string>::iterator it;
824             vector<string> currentLabels = shared->getOTUNames();
825             for (int i = 0; i < shared->getNumBins(); i++) {
826 
827                 if (m->getControl_pressed()) { return metadata; }
828 
829                 it = labelTaxMap.find(util.getSimpleLabel(currentLabels[i]));
830 
831                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + currentLabels[i] + ".\n"); m->setControl_pressed(true); }
832                 else {
833                     vector<string> bootstrapValues;
834                     string data = "{\"taxonomy\":[";
835 
836                     vector<string> scores;
837                     vector<string> taxonomies = util.parseTax(it->second, scores);
838 
839                     for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
840                     data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
841 
842                     //add bootstrap values if available
843                     if (scores[0] != "null") {
844                         data += ", \"bootstrap\":[";
845 
846                         for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
847                         data += scores[scores.size()-1] + "]";
848 
849                     }
850                     data += "}";
851 
852                     metadata.push_back(data);
853                 }
854             }
855         }
856 
857         return metadata;
858 
859     }
860     catch(exception& e) {
861         m->errorOut(e, "BiomSimple", "getMetadataShared");
862         exit(1);
863     }
864 
865 }
866 //**********************************************************************************************************************
getMetaDataFloat(Picrust * picrust)867 vector<string> BiomSimple::getMetaDataFloat(Picrust* picrust){
868     try {
869         vector<string> metadata;
870 
871         if (consTax.size() == 0) { for (int i = 0; i < sharedFloat->getNumBins(); i++) {  metadata.push_back("null");  } }
872         else {
873 
874             if (sharedFloat == NULL) { m->setControl_pressed(true); return metadata; }
875 
876             //should the labels be Otu001 or PhyloType001
877             vector<string> otuNames = sharedFloat->getOTUNames();
878             string firstBin = otuNames[0];
879             string binTag = "Otu";
880             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
881 
882             map<string, string> labelTaxMap;
883             string snumBins = toString(otuNames.size());
884             for (int i = 0; i < consTax.size(); i++) {
885 
886                 if (m->getControl_pressed()) { return metadata; }
887 
888                 string thisOtuLabel = consTax[i].getName();
889 
890                 //if there is a bin label use it otherwise make one
891                 if (util.isContainingOnlyDigits(thisOtuLabel)) {
892                     string binLabel = binTag;
893                     string sbinNumber = thisOtuLabel;
894                     if (sbinNumber.length() < snumBins.length()) {
895                         int diff = snumBins.length() - sbinNumber.length();
896                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
897                     }
898                     binLabel += sbinNumber;
899                     binLabel = util.getSimpleLabel(binLabel);
900                     labelTaxMap[binLabel] = consTax[i].getConsTaxString();
901                 }else {
902                     map<string, string>::iterator it = labelTaxMap.find(util.getSimpleLabel(thisOtuLabel));
903                     if (it == labelTaxMap.end()) {
904                         labelTaxMap[util.getSimpleLabel(thisOtuLabel)] = consTax[i].getConsTaxString();
905                     }else {
906                         m->mothurOut("[ERROR]: Cannot add OTULabel " +  thisOtuLabel + " because it's simple label " + util.getSimpleLabel(consTax[i].getName()) + " has already been added and will result in downstream errors. Have you mixed mothur labels and non mothur labels? To make the files work well together and backwards compatible mothur treats 1, OTU01, OTU001, OTU0001 all the same. We do this by removing any non numeric characters and leading zeros. For eaxample: Otu000018 and OtuMY18 both map to 18.\n"); m->setControl_pressed(true);
907                     }
908                 }
909             }
910 
911             //sanity check for file issues - do you have the same number of bins in the shared and constaxonomy file
912             if (sharedFloat->getNumBins() != labelTaxMap.size()) {
913                 m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(sharedFloat->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata;
914             }
915 
916             //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus.  overwritting of otulabels is fine because constaxonomy only allows for one label to be processed.  If this assumption changes, could cause bug.
917             if (picrust != NULL) {
918                 picrust->setGGOTUIDs(labelTaxMap, sharedFloat);
919             }
920 
921             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
922 
923             //traverse the binLabels forming the metadata strings and saving them
924             //make sure to sanity check
925             map<string, string>::iterator it;
926             vector<string> currentLabels = sharedFloat->getOTUNames();
927             for (int i = 0; i < sharedFloat->getNumBins(); i++) {
928 
929                 if (m->getControl_pressed()) { return metadata; }
930 
931                 it = labelTaxMap.find(util.getSimpleLabel(currentLabels[i]));
932 
933                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + currentLabels[i] + ".\n"); m->setControl_pressed(true); }
934                 else {
935                     vector<string> bootstrapValues;
936                     string data = "{\"taxonomy\":[";
937 
938                     vector<string> scores;
939                     vector<string> taxonomies = util.parseTax(it->second, scores);
940 
941                     for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
942                     data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
943 
944                     //add bootstrap values if available
945                     if (scores[0] != "null") {
946                         data += ", \"bootstrap\":[";
947 
948                         for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
949                         data += scores[scores.size()-1] + "]";
950 
951                     }
952                     data += "}";
953 
954                     metadata.push_back(data);
955                 }
956             }
957         }
958 
959         return metadata;
960 
961     }
962     catch(exception& e) {
963         m->errorOut(e, "BiomSimple", "getMetadataFloat");
964         exit(1);
965     }
966 
967 }
968 /**************************************************************************************************/
969