1 //
2 // biomsimple.cpp
3 // Mothur
4 //
5 // Created by Sarah Westcott on 10/26/20.
6 // Copyright © 2020 Schloss Lab. All rights reserved.
7 //
8
9 #include "biomsimple.hpp"
10 #include "picrust.hpp"
11
12 /**************************************************************************************************/
BiomSimple()13 BiomSimple::BiomSimple() : Biom("Biological Observation Matrix 1.0.0"){
14 try {
15
16 matrixFormat = "sparse";
17 }
18 catch(exception& e) {
19 m->errorOut(e, "BiomSimple", "BiomSimple");
20 exit(1);
21 }
22 }
23
24 /**************************************************************************************************/
BiomSimple(string fname,string l)25 BiomSimple::BiomSimple(string fname, string l) : Biom("Biological Observation Matrix 1.0.0"){
26 try {
27 label = l; matrixFormat = "sparse";
28 read(fname);
29 }
30 catch(exception& e) {
31 m->errorOut(e, "BiomSimple", "BiomSimple");
32 exit(1);
33 }
34 }
35 /**************************************************************************************************/
read(string fname)36 void BiomSimple::read(string fname){
37 try {
38
39 /*{
40 "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
41 "format": "Biological Observation Matrix 0.9.1",
42 "format_url": "http://biom-format.org",
43 "type": "OTU table",
44 "generated_by": "mothur1.44.0",
45 "date": "Tue Apr 17 13:12:07 2020",
46
47 rows represent OTUS
48 columns represent samples
49
50 */
51
52 ifstream in; util.openInputFile(fname, in);
53
54 matrixFormat = ""; matrixElementType = "";
55 vector<string> otuNames; vector<string> groupNames;
56 map<string, string> fileLines;
57 //vector<string> names;
58 int numOTUs, numCols;
59 bool hasTaxonomy;
60
61 numOTUs = 0; numCols = 0; maxLevel = 0;
62 int shapeNumRows = 0; int shapeNumCols = 0;
63
64 int countOpenBrace = 0; int countClosedBrace = 0;
65 int closeParen = 0; int openParen = -1; //account for opening brace
66 bool ignoreCommas = false; bool atComma = false;
67
68 string line = "";
69 bool printHeaders = true;
70
71 while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
72 if (m->getControl_pressed()) { break; }
73
74 char c = in.get(); util.gobble(in);
75
76 if (c == '[') { countOpenBrace++; }
77 else if (c == ']') { countClosedBrace++; }
78 else if (c == '{') { openParen++; }
79 else if (c == '}') { closeParen++; }
80 else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
81
82 if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
83 else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
84 if (atComma && !ignoreCommas) {
85 if (fileLines.size() == 0) { //clip first {
86 line = line.substr(1);
87 }
88 string tag = getTag(line);
89 fileLines[tag] = line;
90
91 line = "";
92 atComma = false;
93 ignoreCommas = false;
94
95 }else { line += c; }
96
97 }
98 if (line != "") {
99 line = line.substr(0, line.length()-1);
100 string tag = getTag(line);
101 fileLines[tag] = line;
102 }
103 in.close();
104
105 //check for required fields
106 map<string, string>::iterator it;
107 it = fileLines.find("type");
108 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
109 else {
110 string thisLine = it->second;
111 tableType = getTag(thisLine);
112 }
113
114 if (m->getControl_pressed()) { return; }
115
116 it = fileLines.find("matrix_type");
117 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
118 else {
119 string thisLine = it->second;
120 matrixFormat = getTag(thisLine);
121 if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->setControl_pressed(true); }
122 }
123
124 if (m->getControl_pressed()) { return; }
125
126 it = fileLines.find("matrix_element_type");
127 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
128 else {
129 string thisLine = it->second;
130 matrixElementType = getTag(thisLine);
131 if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->setControl_pressed(true); }
132 }
133
134 if (m->getControl_pressed()) { return; }
135
136 map<string, string> otuTaxonomies;
137 it = fileLines.find("rows");
138 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
139 else {
140 maxLevel = 0;
141 string thisLine = it->second;
142
143 bool hasTaxonomy = false;
144 vector< vector<string> > results = extractTaxonomyData(thisLine, numOTUs, hasTaxonomy);
145
146 if ((tableType == "Taxon table") || (tableType == "Taxontable")) {
147 vector<string> taxonomies = results[0];
148
149 //create OTU names
150 string snumBins = toString(numOTUs);
151 for (int i = 0; i < numOTUs; i++) {
152
153 //if there is a bin label use it otherwise make one
154 string binLabel = "OTU";
155 string sbinNumber = toString(i+1);
156 if (sbinNumber.length() < snumBins.length()) {
157 int diff = snumBins.length() - sbinNumber.length();
158 for (int h = 0; h < diff; h++) { binLabel += "0"; }
159 }
160 binLabel += sbinNumber;
161
162 otuNames.push_back(binLabel);
163 otuTaxonomies[otuNames[i]] = taxonomies[i];
164 }
165
166 }else{
167 otuNames = results[0];
168 if (hasTaxonomy) {
169 for (int i = 0; i < otuNames.size(); i++) { otuTaxonomies[otuNames[i]] = results[1][i]; }
170 }
171 }
172 }
173
174 if (m->getControl_pressed()) { return; }
175
176 it = fileLines.find("columns");
177 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
178 else {
179 string thisLine = it->second;
180
181 //read sample names
182 maxLevel = 0;
183 bool hasTaxonomy = false;
184 vector< vector<string> > results = extractTaxonomyData(thisLine, numCols, hasTaxonomy);
185 groupNames = results[0];
186 if (hasTaxonomy) {
187 for (int i = 0; i < results[1].size(); i++) {
188 if (m->getControl_pressed()) { break; }
189
190 string completeTax = util.addUnclassifieds(results[1][i], maxLevel, false);
191 groupTaxonomies[results[0][i]] = completeTax;
192 }
193 }
194 }
195
196 if (m->getControl_pressed()) { return; }
197
198 it = fileLines.find("shape");
199 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
200 else {
201 string thisLine = it->second;
202 getDims(thisLine, shapeNumRows, shapeNumCols);
203
204 //check shape
205 if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->setControl_pressed(true); }
206
207 if (shapeNumRows != numOTUs) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numOTUs) + " rows.\n"); m->setControl_pressed(true); }
208 }
209
210 if (m->getControl_pressed()) { return; }
211
212 it = fileLines.find("data");
213 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
214 else {
215 string thisLine = it->second;
216
217 if (shared != NULL) { delete shared; }
218
219 shared = extractOTUData(thisLine, groupNames, numOTUs);
220 shared->setOTUNames(otuNames);
221 m->mothurOut("\n"+shared->getLabel()+"\n");
222
223 if (otuTaxonomies.size() != 0) {
224 //sanity check
225 if ((shared->getNumBins() == otuTaxonomies.size()) && (shared->getNumBins() == numOTUs)) {
226
227 for (int i = 0; i < shared->getNumBins(); i++) {
228 if (m->getControl_pressed()) { break; }
229
230 string thisOTUsTax = otuTaxonomies[otuNames[i]];
231 string newTax = util.addUnclassifieds(thisOTUsTax, maxLevel, false);
232 Taxonomy thisOTUsTaxonomy(otuNames[i], newTax, shared->getOTUTotal(i));
233 consTax.push_back(thisOTUsTaxonomy);
234 }
235 }
236 }
237 }
238
239 }
240 catch(exception& e) {
241 m->errorOut(e, "BiomSimple", "read");
242 exit(1);
243 }
244 }
245 //**********************************************************************************************************************
246 //designed for things like "type": "OTU table", returns type
getTag(string & line)247 string BiomSimple::getTag(string& line) {
248 try {
249 bool inQuotes = false;
250 string tag = "";
251 char c = '\"';
252
253 for (int i = 0; i < line.length(); i++) {
254
255 //you want to ignore any ; until you reach the next '
256 if ((line[i] == c) && (!inQuotes)) { inQuotes = true; }
257 else if ((line[i] == c) && (inQuotes)) {
258 inQuotes= false;
259 line = line.substr(i+1);
260 return tag;
261 }
262
263 if (inQuotes) { if (line[i] != c) { tag += line[i]; } }
264 }
265
266 return tag;
267 }
268 catch(exception& e) {
269 m->errorOut(e, "BiomSimple", "getTag");
270 exit(1);
271 }
272 }
273 //**********************************************************************************************************************
274 //readRows
extractTaxonomyData(string line,int & numOTUs,bool & hasTaxonomy)275 vector< vector<string> > BiomSimple::extractTaxonomyData(string line, int& numOTUs, bool& hasTaxonomy) {
276 try {
277 /*"rows":[
278 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
279 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
280 ...
281
282 "rows":[{"id": "k__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae", "metadata": null},
283 {"id": "k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae", "metadata": null}
284 ....
285
286 make look like above
287
288
289 ],*/
290
291 vector< vector<string> > results; results.resize(2);
292 int countOpenBrace = 0; int countClosedBrace = 0; int openParen = 0; int closeParen = 0;
293 string nextRow = "";
294 bool end = false; bool allBlank = true;
295
296 for (int i = 0; i < line.length(); i++) {
297
298 if (m->getControl_pressed()) { return results; }
299
300 if (line[i] == '[') { countOpenBrace++; }
301 else if (line[i] == ']') { countClosedBrace++; }
302 else if (line[i] == '{') { openParen++; }
303 else if (line[i] == '}') { closeParen++; }
304 else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info
305
306 //you have reached the end of the rows info
307 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
308 if ((openParen == closeParen) && (closeParen != 0)) { //process row
309 numOTUs++;
310
311 vector<string> result = getNamesAndTaxonomies(nextRow);
312 if (result.size() != 0) { results[0].push_back(result[0]); results[1].push_back(result[1]); if (result[1] != "") { allBlank = false; } }
313
314 nextRow = ""; openParen = 0; closeParen = 0;
315 }
316 }
317
318 if (allBlank) { hasTaxonomy = false; }
319 else { hasTaxonomy = true; }
320
321 return results;
322 }
323 catch(exception& e) {
324 m->errorOut(e, "BiomSimple", "extractTaxonomyData");
325 exit(1);
326 }
327 }
328 //**********************************************************************************************************************
329 //items[0] = id, items[1] = taxonomy, if items[2] then thats the taxonomy bootstrap values
getNamesAndTaxonomies(string line)330 vector<string> BiomSimple::getNamesAndTaxonomies(string line) {
331 try {
332 /*"rows":[
333 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
334 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
335 ...
336
337 "rows":[{"id": "k__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae", "metadata": null},
338 {"id": "k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae", "metadata": null}
339 ....
340
341 make look like above
342
343
344 ],*/
345
346 vector<string> results;
347 if (line == "") { return results; }
348
349 int pos = line.find_first_of(',');
350 if (pos == string::npos) { //some kind of error?? we expect at least metadata : null, just grab name
351 results.push_back(getName(line)); results.push_back("");
352 }else {
353 string value;
354 util.splitAtComma(value, line); //value hold name portion ("id":"Otu01") line holds rest
355 results.push_back(getName(value));
356
357 string taxonomy = ""; string bootstrap = "";
358 int pos = line.find("taxonomy");
359 if (pos != string::npos) { //no taxonomy info given
360 int pos2 = line.find("bootstrap");
361 if (pos2 != string::npos) { //no taxonomy info given
362 taxonomy = line.substr(pos, (pos2-pos));
363 taxonomy = taxonomy.substr(0, taxonomy.find_last_of(','));
364 bootstrap = line.substr(pos2);
365 }else {
366 taxonomy = line.substr(pos);
367 }
368 }
369
370 results.push_back(getTaxonomy(taxonomy, bootstrap));
371 }
372
373 return results;
374 }
375 catch(exception& e) {
376 m->errorOut(e, "BiomSimple", "getNamesAndTaxonomies");
377 exit(1);
378 }
379 }
380 //**********************************************************************************************************************
getName(string line)381 string BiomSimple::getName(string line) {
382 try {
383 vector<string> nameItems;
384 util.splitAtChar(line, nameItems, ':'); //split part we want containing the ids
385 string name = nameItems[1];
386
387 //remove "" if needed
388 int pos = name.find("\"");
389 if (pos != string::npos) {
390 string newName = "";
391 for (int k = 0; k < name.length(); k++) {
392 if (name[k] != '\"') { newName += name[k]; }
393 }
394 name = newName;
395 }
396
397 return name;
398 }
399 catch(exception& e) {
400 m->errorOut(e, "BiomSimple", "getName");
401 exit(1);
402 }
403 }
404 //**********************************************************************************************************************
405 //"taxonomy":"Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified",
406 //"bootstrap":100, 100, 100, 100, 100, 100
getTaxonomy(string taxonomy,string bootstrap)407 string BiomSimple::getTaxonomy(string taxonomy, string bootstrap) {
408 try {
409 vector<string> results;
410
411 if (taxonomy != "") {
412 vector<string> taxItems;
413 util.splitAtChar(taxonomy, taxItems, ':'); //split part we want containing the ids
414 string taxons = taxItems[1];
415
416 string taxon;
417 while((taxons.find_first_of(',') != -1)) {
418 if (m->getControl_pressed()) {break;}
419 util.splitAtComma(taxon, taxons);
420 results.push_back(taxon);
421 }
422 if (!util.stringBlank(taxons)) { results.push_back(taxons); }
423 }
424
425 if (bootstrap != "") {
426 vector<string> bootItems;
427 util.splitAtChar(bootstrap, bootItems, ':'); //split part we want containing the ids
428 string bootValues = bootItems[1];
429
430 string bootValue;
431 int i = 0;
432 while((bootValues.find_first_of(',') != -1)) {
433 if (m->getControl_pressed()) {break;}
434 util.splitAtComma(bootValue, bootValues);
435 results[i]+="("+bootValue+")";
436 i++;
437 }
438 if (!util.stringBlank(bootValues)) { results[i]+="("+bootValues+")"; }
439 }
440
441 string result = "";
442 for (int i = 0; i < results.size(); i++) {
443 if (m->getControl_pressed()) {result = ""; break;}
444 result += results[i] + ";";
445 }
446
447 if (results.size() > maxLevel) { maxLevel = results.size(); }
448
449 return result;
450 }
451 catch(exception& e) {
452 m->errorOut(e, "BiomSimple", "getTaxonomy");
453 exit(1);
454 }
455 }
456 //**********************************************************************************************************************
getDims(string line,int & shapeNumRows,int & shapeNumCols)457 void BiomSimple::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
458 try {
459 //get shape
460 bool inBar = false;
461 string num = "";
462
463 for (int i = 0; i < line.length(); i++) {
464
465 //you want to ignore any ; until you reach the next '
466 if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } }
467 else if ((line[i] == ']') && (inBar)) {
468 inBar= false;
469 util.mothurConvert(num, shapeNumCols);
470 break;
471 }
472
473 if (inBar) {
474 if (line[i] == ',') {
475 util.mothurConvert(num, shapeNumRows);
476 num = "";
477 }else { if (!isspace(line[i])) { num += line[i]; } }
478 }
479 }
480 }
481 catch(exception& e) {
482 m->errorOut(e, "BiomSimple", "getDims");
483 exit(1);
484 }
485 }
486 //**********************************************************************************************************************
487 //readData
extractOTUData(string line,vector<string> & groupNames,int numOTUs)488 SharedRAbundVectors* BiomSimple::extractOTUData(string line, vector<string>& groupNames, int numOTUs) {
489 try {
490 SharedRAbundVectors* lookup = new SharedRAbundVectors();
491
492 //creates new sharedRAbunds
493 for (int i = 0; i < groupNames.size(); i++) {
494 SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
495 temp->setLabel(label);
496 temp->setGroup(groupNames[i]);
497 lookup->push_back(temp);
498 }
499
500 if (matrixElementType == "float") {
501
502 if (sharedFloat != NULL) { delete sharedFloat; }
503 sharedFloat = new SharedRAbundFloatVectors();
504
505 //creates new sharedRAbunds
506 for (int i = 0; i < groupNames.size(); i++) {
507 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(numOTUs); //sets all abunds to 0
508 temp->setLabel(label);
509 temp->setGroup(groupNames[i]);
510 sharedFloat->push_back(temp);
511 }
512 }
513
514 bool dataStart = false;
515 bool inBrackets = false;
516 string num = "";
517 vector<int> nums;
518 vector<float> numsFloat;
519 int otuCount = 0;
520 for (int i = 0; i < line.length(); i++) {
521
522 if (m->getControl_pressed()) { return lookup; }
523
524 //look for opening [ to indicate data is starting
525 if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
526 else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
527
528 if (dataStart) {
529 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
530 else if ((line[i] == ']') && (inBrackets)) {
531 inBrackets = false;
532 int temp;
533 float temp2;
534 if (matrixElementType == "float") {
535 util.mothurConvert(num, temp2);
536 numsFloat.push_back(temp2);
537 temp = (int)temp2;
538 }else { util.mothurConvert(num, temp); }
539
540 nums.push_back(temp);
541 num = "";
542
543 //save info to vectors
544 if (matrixFormat == "dense") {
545
546 //sanity check
547 if (nums.size() != lookup->size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->setControl_pressed(true); }
548
549 //set abundances for this otu
550 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
551 for (int j = 0; j < groupNames.size(); j++) { lookup->set(otuCount, nums[j], groupNames[j]); }
552
553
554 if (matrixElementType == "float") {
555 //sanity check
556 if (numsFloat.size() != sharedFloat->size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->setControl_pressed(true); }
557
558 //set abundances for this otu
559 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
560 for (int j = 0; j < groupNames.size(); j++) { sharedFloat->set(otuCount, numsFloat[j], groupNames[j]); }
561
562 }
563
564 otuCount++;
565
566 }else {
567 //sanity check
568 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->setControl_pressed(true); }
569
570 //nums contains [otuNum, sampleNum, abundance]
571 lookup->set(nums[0], nums[2], groupNames[nums[1]]);
572
573 if (matrixElementType == "float") {
574 //nums contains [otuNum, sampleNum, abundance]
575 sharedFloat->set(nums[0], numsFloat[2], groupNames[nums[1]]);
576 }
577 }
578 nums.clear(); numsFloat.clear();
579 }
580
581 if (inBrackets) {
582 if (line[i] == ',') {
583 float temp2;
584 util.mothurConvert(num, temp2);
585 numsFloat.push_back(temp2);
586 nums.push_back((int)temp2);
587 num = "";
588 }else { if (!isspace(line[i])) { num += line[i]; } }
589 }
590 }
591 }
592
593 return lookup;
594 }
595 catch(exception& e) {
596 m->errorOut(e, "BiomSimple", "extractOTUData");
597 exit(1);
598 }
599 }
600 //**********************************************************************************************************************
print(string filename,vector<string> sampleMetadata,Picrust * picrust)601 void BiomSimple::print(string filename, vector<string> sampleMetadata, Picrust* picrust) {
602 try {
603 vector<string> metadata = getMetaDataShared(picrust);
604 int numBins = shared->getNumBins();
605 int numSamples = shared->size();
606 vector<string> currentLabels = shared->getOTUNames();
607 vector<string> namesOfGroups = shared->getNamesGroups();
608
609 if (m->getControl_pressed()) { return; }
610
611 time_t rawtime; struct tm * timeinfo;
612 time ( &rawtime );
613 timeinfo = localtime ( &rawtime );
614 string dateString = asctime (timeinfo);
615 int pos = dateString.find('\n');
616 if (pos != string::npos) { dateString = dateString.substr(0, pos);}
617 string spaces = " ";
618
619 ofstream out; util.openOutputFile(filename, out);
620
621 out << "{\n" + spaces + "\"id\":\"" + util.getSimpleName(sharedFileName) + "-" + label + "\",\n" + spaces + "\"format\": \"" + version + "\",\n" + spaces + "\"format_url\": \"" + formatURL + "\",\n";
622 out << spaces + "\"type\": \"" + tableType + " \",\n" + spaces + "\"generated_by\": \"" << mothurVersion << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
623
624
625
626 //get row info
627 /*"rows":[
628 {"id":"GG_OTU_1", "metadata":null},
629 {"id":"GG_OTU_2", "metadata":null},
630 {"id":"GG_OTU_3", "metadata":null},
631 {"id":"GG_OTU_4", "metadata":null},
632 {"id":"GG_OTU_5", "metadata":null}
633 ],*/
634
635 out << spaces + "\"rows\":[\n";
636 string rowFront = spaces + spaces + "{\"id\":\"";
637 string rowBack = "\", \"metadata\":";
638
639 for (int i = 0; i < numBins-1; i++) {
640 if (m->getControl_pressed()) { out.close(); return; }
641 out << rowFront << currentLabels[i] << rowBack << metadata[i] << "},\n";
642 }
643 out << rowFront << currentLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n";
644
645 //get column info
646 /*"columns": [
647 {"id":"Sample1", "metadata":null},
648 {"id":"Sample2", "metadata":null},
649 {"id":"Sample3", "metadata":null},
650 {"id":"Sample4", "metadata":null},
651 {"id":"Sample5", "metadata":null},
652 {"id":"Sample6", "metadata":null}
653 ],*/
654
655 string colBack = "\", \"metadata\":";
656 out << spaces + "\"columns\":[\n";
657
658 for (int i = 0; i < namesOfGroups.size()-1; i++) {
659 if (m->getControl_pressed()) { out.close(); return; }
660 out << rowFront << namesOfGroups[i] << colBack << sampleMetadata[i] << "},\n";
661 }
662 out << rowFront << namesOfGroups[(namesOfGroups.size()-1)] << colBack << sampleMetadata[numSamples-1] << "}\n" + spaces + "],\n";
663
664 out << spaces + "\"matrix_type\": \"" << matrixFormat << "\",\n" + spaces + "\"matrix_element_type\": \"" + matrixElementType + "\",\n";
665 out << spaces + "\"shape\": [" << numBins << "," << numSamples << "],\n";
666 out << spaces + "\"data\": [";
667
668 vector<string> dataRows;
669 if (matrixFormat == "sparse") {
670 /*"data":[[0,2,1],
671 [1,0,5],
672 [1,1,1],
673 [1,3,2],
674 [1,4,3],
675 [1,5,1],
676 [2,2,1],
677 [2,3,4],
678 [2,4,2],
679 [3,0,2],
680 [3,1,1],
681 [3,2,1],
682 [3,5,1],
683 [4,1,1],
684 [4,2,1]
685 ]*/
686
687 if (matrixElementType == "int") {
688 for (int i = 0; i < shared->getNumBins(); i++) {
689
690 if (m->getControl_pressed()) { out.close(); return; }
691 vector<int> binAbunds = shared->getOTU(i);
692
693 for (int j = 0; j < binAbunds.size(); j++) {
694 int abund = binAbunds[j];
695 string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(abund) + "]";
696 //only print non zero values
697 if (abund != 0) { dataRows.push_back(binInfo); }
698 }
699 }
700 }else {
701 float zero = 0.0;
702 for (int i = 0; i < sharedFloat->getNumBins(); i++) {
703
704 if (m->getControl_pressed()) { out.close(); return; }
705 vector<float> binAbunds = sharedFloat->getOTU(i);
706
707 for (int j = 0; j < binAbunds.size(); j++) {
708 float abund = binAbunds[j];
709 string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(abund) + "]";
710 //only print non zero values
711 if (!util.isEqual(abund,zero)) { dataRows.push_back(binInfo); }
712 }
713 }
714 }
715 }else {
716
717 /* "matrix_type": "dense",
718 "matrix_element_type": "int",
719 "shape": [5,6],
720 "data": [[0,0,1,0,0,0],
721 [5,1,0,2,3,1],
722 [0,0,1,4,2,0],
723 [2,1,1,0,0,1],
724 [0,1,1,0,0,0]]*/
725
726 if (matrixElementType == "int") {
727 for (int i = 0; i < shared->getNumBins(); i++) {
728
729 if (m->getControl_pressed()) { out.close(); return; }
730
731 string binInfo = "[";
732 vector<int> binAbund = shared->getOTU(i);
733 for (int j = 0; j < binAbund.size()-1; j++) { binInfo += toString(binAbund[j]) + ","; }
734 binInfo += toString(binAbund[binAbund.size()-1]) + "]";
735 dataRows.push_back(binInfo);
736 }
737 }else {
738 for (int i = 0; i < sharedFloat->getNumBins(); i++) {
739
740 if (m->getControl_pressed()) { out.close(); return; }
741
742 string binInfo = "[";
743 vector<float> binAbund = sharedFloat->getOTU(i);
744 for (int j = 0; j < binAbund.size()-1; j++) { binInfo += toString(binAbund[j]) + ","; }
745 binInfo += toString(binAbund[binAbund.size()-1]) + "]";
746 dataRows.push_back(binInfo);
747 }
748 }
749 }
750
751 for (int i = 0; i < dataRows.size()-1; i++) {
752 out << dataRows[i] << ",\n" + spaces + spaces;
753 }
754 out << dataRows[dataRows.size()-1] << "]\n";
755
756 out << "}\n";
757
758 }
759 catch(exception& e) {
760 m->errorOut(e, "BiomSimple", "print");
761 exit(1);
762 }
763 }
764 //**********************************************************************************************************************
getMetaDataShared(Picrust * picrust)765 vector<string> BiomSimple::getMetaDataShared(Picrust* picrust){
766 try {
767 vector<string> metadata;
768
769 if (consTax.size() == 0) { for (int i = 0; i < shared->getNumBins(); i++) { metadata.push_back("null"); } }
770 else {
771
772 if (shared == NULL) { m->setControl_pressed(true); return metadata; }
773
774 //should the labels be Otu001 or PhyloType001
775 vector<string> otuNames = shared->getOTUNames();
776 string firstBin = otuNames[0];
777 string binTag = "Otu";
778 if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType"; }
779
780 map<string, string> labelTaxMap;
781 string snumBins = toString(otuNames.size());
782 for (int i = 0; i < consTax.size(); i++) {
783
784 if (m->getControl_pressed()) { return metadata; }
785
786 string thisOtuLabel = consTax[i].getName();
787
788 //if there is a bin label use it otherwise make one
789 if (util.isContainingOnlyDigits(thisOtuLabel)) {
790 string binLabel = binTag;
791 string sbinNumber = thisOtuLabel;
792 if (sbinNumber.length() < snumBins.length()) {
793 int diff = snumBins.length() - sbinNumber.length();
794 for (int h = 0; h < diff; h++) { binLabel += "0"; }
795 }
796 binLabel += sbinNumber;
797 binLabel = util.getSimpleLabel(binLabel);
798 labelTaxMap[binLabel] = consTax[i].getConsTaxString();
799 }else {
800 map<string, string>::iterator it = labelTaxMap.find(util.getSimpleLabel(thisOtuLabel));
801 if (it == labelTaxMap.end()) {
802 labelTaxMap[util.getSimpleLabel(thisOtuLabel)] = consTax[i].getConsTaxString();
803 }else {
804 m->mothurOut("[ERROR]: Cannot add OTULabel " + thisOtuLabel + " because it's simple label " + util.getSimpleLabel(consTax[i].getName()) + " has already been added and will result in downstream errors. Have you mixed mothur labels and non mothur labels? To make the files work well together and backwards compatible mothur treats 1, OTU01, OTU001, OTU0001 all the same. We do this by removing any non numeric characters and leading zeros. For eaxample: Otu000018 and OtuMY18 both map to 18.\n"); m->setControl_pressed(true);
805 }
806 }
807 }
808
809 //sanity check for file issues - do you have the same number of bins in the shared and constaxonomy file
810 if (shared->getNumBins() != labelTaxMap.size()) {
811 m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(shared->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata;
812 }
813
814 //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus. overwritting of otulabels is fine because constaxonomy only allows for one label to be processed. If this assumption changes, could cause bug.
815 if (picrust != NULL) {
816 picrust->setGGOTUIDs(labelTaxMap, shared);
817 }
818
819 //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
820
821 //traverse the binLabels forming the metadata strings and saving them
822 //make sure to sanity check
823 map<string, string>::iterator it;
824 vector<string> currentLabels = shared->getOTUNames();
825 for (int i = 0; i < shared->getNumBins(); i++) {
826
827 if (m->getControl_pressed()) { return metadata; }
828
829 it = labelTaxMap.find(util.getSimpleLabel(currentLabels[i]));
830
831 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + currentLabels[i] + ".\n"); m->setControl_pressed(true); }
832 else {
833 vector<string> bootstrapValues;
834 string data = "{\"taxonomy\":[";
835
836 vector<string> scores;
837 vector<string> taxonomies = util.parseTax(it->second, scores);
838
839 for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
840 data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
841
842 //add bootstrap values if available
843 if (scores[0] != "null") {
844 data += ", \"bootstrap\":[";
845
846 for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
847 data += scores[scores.size()-1] + "]";
848
849 }
850 data += "}";
851
852 metadata.push_back(data);
853 }
854 }
855 }
856
857 return metadata;
858
859 }
860 catch(exception& e) {
861 m->errorOut(e, "BiomSimple", "getMetadataShared");
862 exit(1);
863 }
864
865 }
866 //**********************************************************************************************************************
getMetaDataFloat(Picrust * picrust)867 vector<string> BiomSimple::getMetaDataFloat(Picrust* picrust){
868 try {
869 vector<string> metadata;
870
871 if (consTax.size() == 0) { for (int i = 0; i < sharedFloat->getNumBins(); i++) { metadata.push_back("null"); } }
872 else {
873
874 if (sharedFloat == NULL) { m->setControl_pressed(true); return metadata; }
875
876 //should the labels be Otu001 or PhyloType001
877 vector<string> otuNames = sharedFloat->getOTUNames();
878 string firstBin = otuNames[0];
879 string binTag = "Otu";
880 if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType"; }
881
882 map<string, string> labelTaxMap;
883 string snumBins = toString(otuNames.size());
884 for (int i = 0; i < consTax.size(); i++) {
885
886 if (m->getControl_pressed()) { return metadata; }
887
888 string thisOtuLabel = consTax[i].getName();
889
890 //if there is a bin label use it otherwise make one
891 if (util.isContainingOnlyDigits(thisOtuLabel)) {
892 string binLabel = binTag;
893 string sbinNumber = thisOtuLabel;
894 if (sbinNumber.length() < snumBins.length()) {
895 int diff = snumBins.length() - sbinNumber.length();
896 for (int h = 0; h < diff; h++) { binLabel += "0"; }
897 }
898 binLabel += sbinNumber;
899 binLabel = util.getSimpleLabel(binLabel);
900 labelTaxMap[binLabel] = consTax[i].getConsTaxString();
901 }else {
902 map<string, string>::iterator it = labelTaxMap.find(util.getSimpleLabel(thisOtuLabel));
903 if (it == labelTaxMap.end()) {
904 labelTaxMap[util.getSimpleLabel(thisOtuLabel)] = consTax[i].getConsTaxString();
905 }else {
906 m->mothurOut("[ERROR]: Cannot add OTULabel " + thisOtuLabel + " because it's simple label " + util.getSimpleLabel(consTax[i].getName()) + " has already been added and will result in downstream errors. Have you mixed mothur labels and non mothur labels? To make the files work well together and backwards compatible mothur treats 1, OTU01, OTU001, OTU0001 all the same. We do this by removing any non numeric characters and leading zeros. For eaxample: Otu000018 and OtuMY18 both map to 18.\n"); m->setControl_pressed(true);
907 }
908 }
909 }
910
911 //sanity check for file issues - do you have the same number of bins in the shared and constaxonomy file
912 if (sharedFloat->getNumBins() != labelTaxMap.size()) {
913 m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(sharedFloat->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata;
914 }
915
916 //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus. overwritting of otulabels is fine because constaxonomy only allows for one label to be processed. If this assumption changes, could cause bug.
917 if (picrust != NULL) {
918 picrust->setGGOTUIDs(labelTaxMap, sharedFloat);
919 }
920
921 //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
922
923 //traverse the binLabels forming the metadata strings and saving them
924 //make sure to sanity check
925 map<string, string>::iterator it;
926 vector<string> currentLabels = sharedFloat->getOTUNames();
927 for (int i = 0; i < sharedFloat->getNumBins(); i++) {
928
929 if (m->getControl_pressed()) { return metadata; }
930
931 it = labelTaxMap.find(util.getSimpleLabel(currentLabels[i]));
932
933 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + currentLabels[i] + ".\n"); m->setControl_pressed(true); }
934 else {
935 vector<string> bootstrapValues;
936 string data = "{\"taxonomy\":[";
937
938 vector<string> scores;
939 vector<string> taxonomies = util.parseTax(it->second, scores);
940
941 for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
942 data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
943
944 //add bootstrap values if available
945 if (scores[0] != "null") {
946 data += ", \"bootstrap\":[";
947
948 for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
949 data += scores[scores.size()-1] + "]";
950
951 }
952 data += "}";
953
954 metadata.push_back(data);
955 }
956 }
957 }
958
959 return metadata;
960
961 }
962 catch(exception& e) {
963 m->errorOut(e, "BiomSimple", "getMetadataFloat");
964 exit(1);
965 }
966
967 }
968 /**************************************************************************************************/
969