1 /*
2 * Reporter.cpp
3 *
4 * Author: Anthony San Lucas
5 * Email: sanlucas@gmail.com
6 */
7
8 #include "Reporter.h"
9
10 namespace haplohseq {
11
Reporter()12 Reporter::Reporter() {
13 // TODO Auto-generated constructor stub
14
15 }
16
~Reporter()17 Reporter::~Reporter() {
18 // TODO Auto-generated destructor stub
19 }
20
21
writeToFile(std::string & filename,std::vector<double> & values,std::string & delimiter,std::string & header)22 void Reporter::writeToFile(std::string& filename, std::vector<double>& values, std::string& delimiter, std::string& header) {
23 std::ofstream file;
24 file.open(filename.c_str());
25 if (header != "") {
26 file << header << std::endl;
27 }
28 for (unsigned int i = 0; i < values.size()-1; i++) {
29 file << values[i] << delimiter;
30 }
31 file << values[values.size()-1];
32 file.close();
33 }
34
writeToFile(std::string & filename,std::vector<double> & values,std::string & delimiter)35 void Reporter::writeToFile(std::string& filename, std::vector<double>& values, std::string& delimiter) {
36 std::string header = "";
37 this->writeToFile(filename, values, delimiter, header);
38 }
39
writeToFile(std::string & filename,std::vector<unsigned long> & values,std::string & delimiter,std::string & header)40 void Reporter::writeToFile(std::string& filename, std::vector<unsigned long>& values, std::string& delimiter, std::string& header) {
41 std::ofstream file;
42 file.open(filename.c_str());
43 if (header != "") {
44 file << header << std::endl;
45 }
46 for (unsigned int i = 0; i < values.size()-1; i++) {
47 file << values[i] << delimiter;
48 }
49 file << values[values.size()-1];
50 file.close();
51 }
52
writeToFile(std::string & filename,std::vector<unsigned long> & values,std::string & delimiter)53 void Reporter::writeToFile(std::string& filename, std::vector<unsigned long>& values, std::string& delimiter) {
54 std::string header = "";
55 this->writeToFile(filename, values, delimiter, header);
56 }
57
writeToFile(std::string & filename,std::vector<std::string> & values,std::string & delimiter,std::string & header)58 void Reporter::writeToFile(std::string& filename, std::vector<std::string>& values, std::string& delimiter, std::string& header) {
59 std::ofstream file;
60 file.open(filename.c_str());
61 if (header != "") {
62 file << header << std::endl;
63 }
64 for (unsigned int i = 0; i < values.size()-1; i++) {
65 file << values[i] << delimiter;
66 }
67 file << values[values.size()-1];
68 file.close();
69 }
70
writeToFile(std::string & filename,std::vector<std::string> & values,std::string & delimiter)71 void Reporter::writeToFile(std::string& filename, std::vector<std::string>& values, std::string& delimiter) {
72 std::string header = "";
73 this->writeToFile(filename, values, delimiter, header);
74 }
75
reportBafBasedResults(std::vector<std::string> & orderedChrs,std::map<std::string,boost::shared_ptr<Hmm>> & chrHmms,std::vector<unsigned long> & informativeIndices,std::vector<std::vector<char>> phasedAlleles,std::map<std::string,std::vector<std::string>> & bafStrFields,std::map<std::string,std::vector<unsigned int>> & bafIntFields,std::map<std::string,std::vector<double>> & bafDoubleFields,std::string & destinationDir,std::string & outputPrefix)76 void Reporter::reportBafBasedResults( std::vector<std::string>& orderedChrs,
77 std::map<std::string, boost::shared_ptr<Hmm> >& chrHmms,
78 std::vector<unsigned long>& informativeIndices,
79 std::vector< std::vector<char> > phasedAlleles,
80 std::map<std::string, std::vector<std::string> >& bafStrFields,
81 std::map<std::string, std::vector<unsigned int> >& bafIntFields,
82 std::map<std::string, std::vector<double> >& bafDoubleFields,
83 std::string& destinationDir,
84 std::string& outputPrefix) {
85
86 unsigned int numBafEntries = bafStrFields[hlsutil::InputProcessor::BAF_CHR].size();
87 std::cout << "MDF entries size = " << numBafEntries << "\n";
88 std::string SPACE = " ";
89 std::string ENDL = "\n";
90 std::string TAB = "\t";
91 std::string nullString = "NA";
92
93 if (destinationDir != " " and destinationDir[-1] != '/') {
94 destinationDir += "/";
95 }
96
97 hlsutil::StringUtil str;
98 hlsutil::VcfUtil vcf;
99
100 // INITIALIZE FILES
101 std::string posteriorFilename = destinationDir + outputPrefix + ".posterior.dat";
102 std::string hmmParamFilename = destinationDir + outputPrefix + ".param.dat";
103 std::string hmmLikelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
104 std::string eventFilename = destinationDir + outputPrefix + ".events.dat";
105
106 std::ofstream posteriorFile(posteriorFilename.c_str(), std::ofstream::out);
107 std::ofstream hmmParamFile(hmmParamFilename.c_str(), std::ofstream::out);
108 std::ofstream hmmLikelihoodsFile(hmmLikelihoodsFilename.c_str(), std::ofstream::out);
109 std::ofstream eventFile(eventFilename.c_str(), std::ofstream::out);
110
111 std::string posteriorHeader = "CHR\tPOS\tREF\tALT\tINDEX\tPHASE_CONCORDANCE_SWITCH\tHAP\tGT\tRAF\tLRR";
112 std::string eventHeader = "CHR\tBEGIN\tEND\tEVENT_STATE\tNUM_INFORMATIVE_MARKERS\tMEAN_POSTPROB\tPHASE_CONCORDANCE";
113
114 // GET POSSIBLE STATES (possible states for each chr are the same)
115 std::string firstChr = orderedChrs[0];
116 Hmm &firstHmm = *chrHmms[firstChr];
117 std::vector<unsigned long> states = firstHmm.getStates();
118
119 // COLLECT POSTERIOR PROBABILITIES FOR ALL STATES AT ALL INFORMATIVE MARKERS
120 std::vector< std::vector<double> > statePosteriors;
121 for (unsigned int i=0; i < states.size(); i++) {
122 unsigned long stateId = states[i];
123 posteriorHeader += "\t" + (*chrHmms[firstChr]).getStr(stateId);
124 std::vector<double> posteriors;
125
126 for (unsigned int i = 0; i < orderedChrs.size(); i++) {
127 std::string chr = orderedChrs[i];
128 Hmm &chrHmm = *chrHmms[chr];
129 std::vector<double> chrPosteriors = chrHmm.extractPosteriors(stateId);
130 posteriors.insert(posteriors.end(), chrPosteriors.begin(), chrPosteriors.end());
131 }
132 statePosteriors.push_back(posteriors);
133 }
134
135 // COLLECT SWITCHES
136 std::vector<std::string> observations;
137 for (unsigned int i = 0; i < orderedChrs.size(); i++) {
138 std::string chr = orderedChrs[i];
139 Hmm &chrHmm = *chrHmms[chr];
140 std::vector<std::string> chrObservations = chrHmm.getObservations();
141 observations.insert(observations.end(), chrObservations.begin(), chrObservations.end());
142 }
143
144 // TRACK PUTATIVE EVENTS
145 unsigned int eventState = 0;
146 bool inEvent = false; // in possible event if in stretch where posterior >= eventPosteriorThreshold
147 bool validEvent = false; // event is valid if any posterior >= validEventPosteriorThreshold
148 unsigned int eventBegin = 0;
149 unsigned int eventEnd = 0;
150 unsigned int eventMarkerCount = 0;
151 double validEventPosteriorThreshold = 0.9;
152 double eventPosteriorThreshold = 0.5;
153 std::vector<double> eventPosteriors;
154 std::vector<int> eventSwitches;
155
156 // OUTPUT POSTERIOR AND EVENT DAT FILE (TODO: refactor this into its own method)
157 posteriorFile << posteriorHeader << std::endl;
158 eventFile << eventHeader << std::endl;
159 unsigned long posIndex = 0;
160 unsigned long informativeIndex = 0;
161 bool chrChange = false;
162 std::string previousChr = "_initChr";
163 for (unsigned int bafIndex = 0; bafIndex < numBafEntries; bafIndex++) {
164 std::string gt = bafStrFields[hlsutil::InputProcessor::BAF_GT][bafIndex];
165 std::string chr = bafStrFields[hlsutil::InputProcessor::BAF_CHR][bafIndex];
166 unsigned int pos = bafIntFields[hlsutil::InputProcessor::BAF_POS][bafIndex];
167 std::string ref = bafStrFields[hlsutil::InputProcessor::BAF_REF][bafIndex];
168 std::string alt = bafStrFields[hlsutil::InputProcessor::BAF_ALT][bafIndex];
169 double logRR = bafDoubleFields[hlsutil::InputProcessor::BAF_LOGRR][bafIndex];
170 double refFreq = bafDoubleFields[hlsutil::InputProcessor::BAF_RAF][bafIndex];
171
172 if (posIndex == informativeIndices[informativeIndex]) {
173 int haplotype = 0;
174 // Each marker is a SNP so each ref string is only 1 character long.
175 if (phasedAlleles[0][posIndex] == ref[0]) {
176 haplotype = 1;
177 } else {
178 if (phasedAlleles[1][posIndex] == ref[0]) {
179 haplotype = 2;
180 }
181 }
182
183 // There is 1 less posterior probability than het sites for phase_concordance.
184 // In the HMM, the last
185 // posterior is stored as 0 across states. For a more accurate output,
186 // we'll output a null string for this last state.
187 if (chr != previousChr) {
188 chrChange = true;
189 } else {
190 chrChange = false;
191 }
192
193 int phaseConcordanceSwitch = atoi(observations[informativeIndex].c_str());
194
195 std::string gtNum = vcf.abToNumStr(gt, nullString);
196
197 posteriorFile << chr << "\t"
198 << pos << "\t"
199 << ref << "\t"
200 << alt << "\t"
201 << posIndex << "\t"
202 << phaseConcordanceSwitch << "\t"
203 << haplotype << "\t"
204 << gt << "\t"
205 << refFreq << "\t"
206 << logRR;
207
208 for (unsigned int stateIndex = 0; stateIndex < statePosteriors.size(); stateIndex++) {
209 if (chrChange) {
210 posteriorFile << "\t" << nullString;
211
212 // UPDATE EVENT DETAILS
213 // if chrChange then check if was inEvent
214 // if inEvent, output event and reset values
215 if (inEvent && eventState == stateIndex && validEvent) {
216 double posteriorSum = 0;
217 BOOST_FOREACH(double ep, eventPosteriors) {
218 posteriorSum += ep;
219 }
220 double eventPosterior = posteriorSum / (double) eventMarkerCount;
221
222 double switchSum = 0;
223 BOOST_FOREACH(int ss, eventSwitches) {
224 switchSum += ss;
225 }
226 double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
227
228 eventFile << previousChr << "\t"
229 << eventBegin << "\t"
230 << eventEnd << "\t"
231 << "S" << eventState << "\t"
232 << eventMarkerCount << "\t"
233 << eventPosterior << "\t"
234 << eventPhaseConcordance << "\n";
235
236 // reset values for chr changes
237 inEvent = false;
238 validEvent = false;
239 eventState = 0;
240 eventMarkerCount = 0;
241 eventPosteriors.clear();
242 eventSwitches.clear();
243 }
244
245 }
246 else {
247 double posterior = statePosteriors[stateIndex][informativeIndex];
248 posteriorFile << "\t" << posterior;
249
250 if (stateIndex == 0 || (inEvent && eventState != stateIndex)) {
251 continue;
252 }
253
254 // enter event (really event state shouldn't have to equal 0....if it doesn't, switching event states)
255 if (!inEvent && posterior >= eventPosteriorThreshold) {
256 inEvent = true;
257
258 eventBegin = pos;
259 eventEnd = pos;
260 eventMarkerCount += 1;
261 eventPosteriors.push_back(posterior);
262 eventSwitches.push_back(phaseConcordanceSwitch);
263 eventState = stateIndex;
264 if (posterior >= validEventPosteriorThreshold) {
265 validEvent = true;
266 }
267 } else {
268 // extend event
269 if (inEvent && eventState == stateIndex && posterior >= eventPosteriorThreshold) {
270 eventEnd = pos;
271 eventMarkerCount += 1;
272 eventPosteriors.push_back(posterior);
273 eventSwitches.push_back(phaseConcordanceSwitch);
274 if (!validEvent) {
275 if (posterior >= validEventPosteriorThreshold) {
276 validEvent = true;
277 }
278 }
279 // std::cout << "event " << stateIndex << "\t" << chr << "\t" << eventBegin << "\t" << eventEnd << "\t" << eventMarkerCount << "\n";
280 } else {
281 // leave event
282 if (inEvent && eventState == stateIndex && posterior < eventPosteriorThreshold) {
283
284 if (validEvent) {
285 double posteriorSum = 0;
286 BOOST_FOREACH(double ep, eventPosteriors) {
287 posteriorSum += ep;
288 }
289 double eventPosterior = posteriorSum / (double) eventMarkerCount;
290
291 double switchSum = 0;
292 BOOST_FOREACH(int ss, eventSwitches) {
293 switchSum += ss;
294 }
295 double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
296
297 eventFile << chr << "\t"
298 << eventBegin << "\t"
299 << eventEnd << "\t"
300 << "S" << eventState << "\t"
301 << eventMarkerCount << "\t"
302 << eventPosterior << "\t"
303 << eventPhaseConcordance << "\n";
304 }
305
306 inEvent = false;
307 validEvent = false;
308 eventState = 0;
309 eventMarkerCount = 0;
310 eventPosteriors.clear();
311 eventSwitches.clear();
312 }
313 }
314 }
315 }
316 }
317 posteriorFile << std::endl;
318 informativeIndex += 1;
319
320 if (chrChange) {
321 previousChr = chr;
322 }
323 }
324 posIndex += 1;
325 }
326 posteriorFile.close();
327
328 // EXPORT CHROMOSOME BASED REPORTS
329 std::cout << "Reporting data for chromosomes.\n";
330 for (unsigned int i = 0; i < orderedChrs.size(); i++) {
331 std::string chr = orderedChrs[i];
332 std::cout << "Reporting data for chromosome " << chr << "\n";
333 Hmm &chrHmm = *chrHmms[chr];
334 chrHmm.calcLogProbObs();
335 hmmLikelihoodsFile << "> " << chr << "\t(" << chrHmm.getLogProbObs() << ")\n";
336 std::vector<double> likelihoodProfile = chrHmm.getLikelihoodProfile();
337 for (unsigned int iteration = 0; iteration < likelihoodProfile.size(); iteration++) {
338 hmmLikelihoodsFile << likelihoodProfile[iteration] << "\t";
339 }
340 hmmLikelihoodsFile << "\n";
341 // allow writing of some type of header for each chr and matrix type
342 hmmParamFile << "> " << chr << "\t(from\tto\tprob)\n";
343 chrHmm.writeTrans(hmmParamFile);
344 chrHmm.writeEmit(hmmParamFile);
345 }
346 hmmParamFile.close();
347 }
348
reportVcfBasedResults(std::vector<std::string> & orderedChrs,std::map<std::string,boost::shared_ptr<Hmm>> & chrHmms,std::vector<unsigned long> & informativeIndices,std::vector<std::vector<char>> phasedAlleles,std::map<std::string,std::vector<std::string>> & vcfStrFields,std::map<std::string,std::vector<unsigned int>> & vcfIntFields,std::map<std::string,std::vector<double>> & vcfDoubleFields,std::string & destinationDir,std::string & outputPrefix)349 void Reporter::reportVcfBasedResults( std::vector<std::string>& orderedChrs,
350 std::map<std::string, boost::shared_ptr<Hmm> >& chrHmms,
351 std::vector<unsigned long>& informativeIndices,
352 std::vector< std::vector<char> > phasedAlleles,
353 std::map<std::string, std::vector<std::string> >& vcfStrFields,
354 std::map<std::string, std::vector<unsigned int> >& vcfIntFields,
355 std::map<std::string, std::vector<double> >& vcfDoubleFields,
356 std::string& destinationDir,
357 std::string& outputPrefix) {
358
359 unsigned int numVcfEntries = vcfStrFields[hlsutil::InputProcessor::VCF_CHR].size();
360 // std::cout << "VCF entries size = " << numVcfEntries << "\n";
361 std::string SPACE = " ";
362 std::string ENDL = "\n";
363 std::string TAB = "\t";
364 std::string nullString = "NA";
365
366 if (destinationDir != " " and destinationDir[-1] != '/') {
367 destinationDir += "/";
368 }
369
370 hlsutil::StringUtil str;
371 hlsutil::VcfUtil vcf;
372
373 // INITIALIZE FILES
374 std::string posteriorFilename = destinationDir + outputPrefix + ".posterior.dat";
375 std::string hmmParamFilename = destinationDir + outputPrefix + ".param.dat";
376 std::string hmmLikelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
377 std::string eventFilename = destinationDir + outputPrefix + ".events.dat";
378
379 std::ofstream posteriorFile(posteriorFilename.c_str(), std::ofstream::out);
380 std::ofstream hmmParamFile(hmmParamFilename.c_str(), std::ofstream::out);
381 std::ofstream hmmLikelihoodsFile(hmmLikelihoodsFilename.c_str(), std::ofstream::out);
382 std::ofstream eventFile(eventFilename.c_str(), std::ofstream::out);
383
384 std::string posteriorHeader = "CHR\tPOS\tREF\tALT\tINDEX\tPHASE_CONCORDANCE_SWITCH\tHAP\tGT\tRAF\tDP";
385 std::string eventHeader = "CHR\tBEGIN\tEND\tEVENT_STATE\tNUM_INFORMATIVE_MARKERS\tMEAN_POSTPROB\tPHASE_CONCORDANCE";
386
387 // GET POSSIBLE STATES (possible states for each chr are the same)
388 std::string firstChr = orderedChrs[0];
389 Hmm &firstHmm = *chrHmms[firstChr];
390 std::vector<unsigned long> states = firstHmm.getStates();
391
392 // COLLECT POSTERIOR PROBABILITIES FOR ALL STATES AT ALL INFORMATIVE MARKERS
393 std::vector< std::vector<double> > statePosteriors;
394 for (unsigned int i=0; i < states.size(); i++) {
395 unsigned long stateId = states[i];
396 posteriorHeader += "\t" + (*chrHmms[firstChr]).getStr(stateId);
397 std::vector<double> posteriors;
398
399 for (unsigned int i = 0; i < orderedChrs.size(); i++) {
400 std::string chr = orderedChrs[i];
401 Hmm &chrHmm = *chrHmms[chr];
402 std::vector<double> chrPosteriors = chrHmm.extractPosteriors(stateId);
403 posteriors.insert(posteriors.end(), chrPosteriors.begin(), chrPosteriors.end());
404 }
405 statePosteriors.push_back(posteriors);
406 }
407
408 // COLLECT SWITCHES
409 std::vector<std::string> observations;
410 for (unsigned int i = 0; i < orderedChrs.size(); i++) {
411 std::string chr = orderedChrs[i];
412 Hmm &chrHmm = *chrHmms[chr];
413 std::vector<std::string> chrObservations = chrHmm.getObservations();
414 observations.insert(observations.end(), chrObservations.begin(), chrObservations.end());
415 }
416
417 // TRACK PUTATIVE EVENTS
418 unsigned int eventState = 0;
419 bool inEvent = false; // in possible event if in stretch where posterior >= eventPosteriorThreshold
420 bool validEvent = false; // event is valid if any posterior >= validEventPosteriorThreshold
421 unsigned int eventBegin = 0;
422 unsigned int eventEnd = 0;
423 unsigned int eventMarkerCount = 0;
424 double validEventPosteriorThreshold = 0.9;
425 double eventPosteriorThreshold = 0.5;
426 std::vector<double> eventPosteriors;
427 std::vector<int> eventSwitches;
428
429 // OUTPUT POSTERIOR AND EVENT DAT FILE (TODO: refactor this into its own method)
430 posteriorFile << posteriorHeader << std::endl;
431 eventFile << eventHeader << std::endl;
432 unsigned long posIndex = 0;
433 unsigned long informativeIndex = 0;
434 bool chrChange = false;
435 std::string previousChr = "_initChr";
436 for (unsigned int vcfIndex = 0; vcfIndex < numVcfEntries; vcfIndex++) {
437
438 std::string gt = vcfStrFields[hlsutil::InputProcessor::VCF_GT][vcfIndex];
439 std::string chr = vcfStrFields[hlsutil::InputProcessor::VCF_CHR][vcfIndex];
440 unsigned int pos = vcfIntFields[hlsutil::InputProcessor::VCF_POS][vcfIndex];
441 std::string ref = vcfStrFields[hlsutil::InputProcessor::VCF_REF][vcfIndex];
442 std::string alt = vcfStrFields[hlsutil::InputProcessor::VCF_ALT][vcfIndex];
443 unsigned int depth = vcfIntFields[hlsutil::InputProcessor::VCF_DP][vcfIndex];
444 double refFreq = vcfDoubleFields[hlsutil::InputProcessor::VCF_REF_FREQ][vcfIndex];
445
446 std::string gtNum = vcf.gtToNumStr(gt, nullString);
447
448 if (posIndex == informativeIndices[informativeIndex]) {
449 int haplotype = 0;
450 // Each marker is a SNP so each ref string is only 1 character long.
451 if (phasedAlleles[0][posIndex] == ref[0]) { //bAllele[0]) {
452 haplotype = 1;
453 } else {
454 if (phasedAlleles[1][posIndex] == ref[0]) { //bAllele[0]) {
455 haplotype = 2;
456 }
457 }
458
459 // There is 1 less posterior probability than het sites for phase_concordance.
460 // In the HMM, the last
461 // posterior is stored as 0 across states. For a more accurate output,
462 // we'll output a null string for this last state.
463 if (chr != previousChr) {
464 chrChange = true;
465 } else {
466 chrChange = false;
467 }
468
469 int phaseConcordanceSwitch = atoi(observations[informativeIndex].c_str());
470
471 std::string gtNum = vcf.abToNumStr(gt, nullString);
472
473 posteriorFile << chr << "\t"
474 << pos << "\t"
475 << ref << "\t"
476 << alt << "\t"
477 << posIndex << "\t"
478 << phaseConcordanceSwitch << "\t"
479 << haplotype << "\t"
480 << gt << "\t"
481 << refFreq << "\t"
482 << depth;
483
484 for (unsigned int stateIndex = 0; stateIndex < statePosteriors.size(); stateIndex++) {
485 if (chrChange) {
486 posteriorFile << "\t" << nullString;
487
488 // UPDATE EVENT DETAILS
489 // if chrChange then check if was inEvent
490 // if inEvent, output event and reset values
491 if (inEvent && eventState == stateIndex && validEvent) {
492 double posteriorSum = 0;
493 BOOST_FOREACH(double ep, eventPosteriors) {
494 posteriorSum += ep;
495 }
496 double eventPosterior = posteriorSum / (double) eventMarkerCount;
497
498 double switchSum = 0;
499 BOOST_FOREACH(int ss, eventSwitches) {
500 switchSum += ss;
501 }
502 double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
503
504 eventFile << previousChr << "\t"
505 << eventBegin << "\t"
506 << eventEnd << "\t"
507 << "S" << eventState << "\t"
508 << eventMarkerCount << "\t"
509 << eventPosterior << "\t"
510 << eventPhaseConcordance << "\n";
511
512 // reset values for chr changes
513 inEvent = false;
514 validEvent = false;
515 eventState = 0;
516 eventMarkerCount = 0;
517 eventPosteriors.clear();
518 eventSwitches.clear();
519 }
520
521 }
522 else {
523 double posterior = statePosteriors[stateIndex][informativeIndex];
524 posteriorFile << "\t" << posterior;
525
526 if (stateIndex == 0 || (inEvent && eventState != stateIndex)) {
527 continue;
528 }
529
530 // enter event (really event state shouldn't have to equal 0....if it doesn't, switching event states)
531 if (!inEvent && posterior >= eventPosteriorThreshold) {
532 inEvent = true;
533
534 eventBegin = pos;
535 eventEnd = pos;
536 eventMarkerCount += 1;
537 eventPosteriors.push_back(posterior);
538 eventSwitches.push_back(phaseConcordanceSwitch);
539 eventState = stateIndex;
540 if (posterior >= validEventPosteriorThreshold) {
541 validEvent = true;
542 }
543 } else {
544 // extend event unless we are at the last informative index (then go to else)
545 if (inEvent && eventState == stateIndex && posterior >= eventPosteriorThreshold && informativeIndex != informativeIndices.size() - 2) {
546 eventEnd = pos;
547 eventMarkerCount += 1;
548 eventPosteriors.push_back(posterior);
549 eventSwitches.push_back(phaseConcordanceSwitch);
550 if (!validEvent) {
551 if (posterior >= validEventPosteriorThreshold) {
552 validEvent = true;
553 }
554 }
555 // std::cout << "event " << stateIndex << "\t" << chr << "\t" << eventBegin << "\t" << eventEnd << "\t" << eventMarkerCount << "\n";
556 } else {
557 // leave event if in event and posterior drops below minimum posterior threshold for an event OR if we are at the last posterior in our dataset and in an event
558 if (inEvent && eventState == stateIndex && (posterior < eventPosteriorThreshold || informativeIndex == informativeIndices.size() - 2)) {
559
560 if (validEvent) {
561 double posteriorSum = 0;
562 BOOST_FOREACH(double ep, eventPosteriors) {
563 posteriorSum += ep;
564 }
565 double eventPosterior = posteriorSum / (double) eventMarkerCount;
566
567 double switchSum = 0;
568 BOOST_FOREACH(int ss, eventSwitches) {
569 switchSum += ss;
570 }
571 double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
572
573 eventFile << chr << "\t"
574 << eventBegin << "\t"
575 << eventEnd << "\t"
576 << "S" << eventState << "\t"
577 << eventMarkerCount << "\t"
578 << eventPosterior << "\t"
579 << eventPhaseConcordance << "\n";
580 }
581
582 inEvent = false;
583 validEvent = false;
584 eventState = 0;
585 eventMarkerCount = 0;
586 eventPosteriors.clear();
587 eventSwitches.clear();
588 }
589 }
590 }
591 }
592 }
593 posteriorFile << std::endl;
594 informativeIndex += 1;
595
596 if (chrChange) {
597 previousChr = chr;
598 }
599 }
600 posIndex += 1;
601 }
602 posteriorFile.close();
603
604 // EXPORT CHROMOSOME BASED REPORTS
605 for (unsigned int i = 0; i < orderedChrs.size(); i++) {
606 std::string chr = orderedChrs[i];
607 Hmm &chrHmm = *chrHmms[chr];
608 chrHmm.calcLogProbObs();
609 hmmLikelihoodsFile << "> " << chr << "\t(" << chrHmm.getLogProbObs() << ")\n";
610 std::vector<double> likelihoodProfile = chrHmm.getLikelihoodProfile();
611 for (unsigned int iteration = 0; iteration < likelihoodProfile.size(); iteration++) {
612 hmmLikelihoodsFile << likelihoodProfile[iteration] << "\t";
613 }
614 hmmLikelihoodsFile << "\n";
615 // allow writing of some type of header for each chr and matrix type
616 hmmParamFile << "> " << chr << "\t(from\tto\tprob)\n";
617 chrHmm.writeTrans(hmmParamFile);
618 chrHmm.writeEmit(hmmParamFile);
619 }
620 hmmParamFile.close();
621 }
622
623 //void Reporter::reportVcfBasedResults( std::vector<std::string>& orderedChrs,
624 // std::map<std::string, boost::shared_ptr<Hmm> >& chrHmms,
625 // std::vector<unsigned long>& informativeIndices,
626 // std::vector< std::vector<char> > phasedAlleles,
627 // std::map<std::string, std::vector<std::string> >& vcfStrFields,
628 // std::map<std::string, std::vector<unsigned int> >& vcfIntFields,
629 // std::map<std::string, std::vector<double> >& vcfDoubleFields,
630 // std::string& destinationDir,
631 // std::string& outputPrefix) {
632 //
633 // unsigned int numVcfEntries = vcfStrFields[hlsutil::InputProcessor::VCF_CHR].size();
634 // std::cout << "vcf entries size = " << numVcfEntries << "\n";
635 // std::string SPACE = " ";
636 // std::string ENDL = "\n";
637 // std::string TAB = "\t";
638 // std::string nullString = "NA";
639 //
640 // if (destinationDir != " " and destinationDir[-1] != '/') {
641 // destinationDir += "/";
642 // }
643 //
644 // hlsutil::StringUtil str;
645 // hlsutil::VcfUtil vcf;
646 //
647 //// std::string datFilename = destinationDir + outputPrefix + ".dat";
648 // std::string posteriorDatFilename = destinationDir + outputPrefix + ".posterior.dat";
649 // std::string hmmParamFilename = destinationDir + outputPrefix + ".param.dat";
650 //// std::string hmmLikelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
651 // //std::string likelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
652 //
653 //// std::ifstream infile(vcfFilename.c_str(), std::ifstream::in);
654 //
655 //// std::ofstream datFile(datFilename.c_str(), std::ofstream::out);
656 // std::ofstream posteriorDatFile(posteriorDatFilename.c_str(), std::ofstream::out);
657 //// std::ofstream hmmLikelihoodsFile(hmmLikelihoodsFilename.c_str(), std::ofstream::out);
658 //
659 // std::string datHeader = "chr\tpos\tref\talt\tgt\tcov\trefFreq\tindex";
660 //// datFile << datHeader << std::endl;
661 //
662 // // get posteriors for each state
663 // std::string posteriorDatHeader = datHeader + "\tref_hap";
664 //
665 // // possible states for each chr are the same
666 // std::string firstChr = orderedChrs[0];
667 // Hmm &firstHmm = *chrHmms[firstChr];
668 // std::vector<unsigned long> states = firstHmm.getStates();
669 //// std::cout << "states size " << firstHmm.getStates().size() << "\n";
670 //
671 // std::vector< std::vector<double> > statePosteriors;
672 // for (unsigned int i=0; i < states.size(); i++) {
673 // unsigned long stateId = states[i];
674 //// std::cout << "About to access state " << firstChr << "\t" << stateId << "\n";
675 // posteriorDatHeader += "\t" + (*chrHmms[firstChr]).getStr(stateId);
676 // std::vector<double> posteriors;
677 //
678 // for (unsigned int i = 0; i < orderedChrs.size(); i++) {
679 // std::string chr = orderedChrs[i];
680 // Hmm &chrHmm = *chrHmms[chr];
681 //// std::cout << "accessing " << chr << "\n";
682 // std::vector<double> chrPosteriors = chrHmm.extractPosteriors(stateId);
683 // posteriors.insert(posteriors.end(), chrPosteriors.begin(), chrPosteriors.end());
684 // }
685 //// std::cerr << "Num posteriors " << stateId << ": " << posteriors.size() << std::endl;
686 // statePosteriors.push_back(posteriors);
687 // }
688 // posteriorDatFile << posteriorDatHeader << std::endl;
689 //
690 // unsigned long posIndex = 0;
691 // unsigned long informativeIndex = 0;
692 // bool chrChange = false;
693 // std::string previousChr = "_initChr";
694 // for (unsigned int vcfIndex = 0; vcfIndex < numVcfEntries; vcfIndex++) {
695 //
696 // std::string gt = vcfStrFields[hlsutil::InputProcessor::VCF_GT][vcfIndex];
697 // std::string chr = vcfStrFields[hlsutil::InputProcessor::VCF_CHR][vcfIndex];
698 // unsigned int pos = vcfIntFields[hlsutil::InputProcessor::VCF_POS][vcfIndex];
699 // std::string ref = vcfStrFields[hlsutil::InputProcessor::VCF_REF][vcfIndex];
700 // std::string alt = vcfStrFields[hlsutil::InputProcessor::VCF_ALT][vcfIndex];
701 // unsigned int depth = vcfIntFields[hlsutil::InputProcessor::VCF_DP][vcfIndex];
702 // double refFreq = vcfDoubleFields[hlsutil::InputProcessor::VCF_REF_FREQ][vcfIndex];
703 //
704 // std::string gtNum = vcf.gtToNumStr(gt, nullString);
705 //
706 //
707 // if (posIndex == informativeIndices[informativeIndex]) {
708 // int haplotype = 0;
709 // // Each marker is a SNP so each ref string is only 1 character long.
710 // if (phasedAlleles[0][posIndex] == ref[0]) {
711 // haplotype = 1;
712 // } else {
713 // if (phasedAlleles[1][posIndex] == ref[0]) {
714 // haplotype = 2;
715 // }
716 // }
717 // posteriorDatFile << chr << "\t"
718 // << pos << "\t"
719 // << ref << "\t"
720 // << alt << "\t"
721 // << gtNum << "\t"
722 // << depth << "\t"
723 // << refFreq << "\t"
724 // << posIndex << "\t"
725 // << haplotype;
726 //
727 // // There is 1 less posterior probability than het sites for phase_concordance.
728 // // In the HMM, the last
729 // // posterior is stored as 0 across states. For a more accurate output,
730 // // we'll output a null string for this last state.
731 // if (chr != previousChr) {
732 // chrChange = true;
733 // previousChr = chr;
734 // } else {
735 // chrChange = false;
736 // }
737 //
738 // for (unsigned int stateIndex = 0; stateIndex < statePosteriors.size(); stateIndex++) {
739 // if (chrChange == true) {
740 // posteriorDatFile << "\t" << nullString;
741 // }
742 // else {
743 // double posterior = statePosteriors[stateIndex][informativeIndex];
744 // posteriorDatFile << "\t" << posterior;
745 // }
746 // }
747 //
748 // posteriorDatFile << std::endl;
749 //
750 // informativeIndex += 1;
751 // }
752 //
753 // posIndex += 1;
754 //
755 // }
756 //
757 // posteriorDatFile.close();
758 //// datFile.close();
759 //
760 //// std::ofstream hmmParamFile(hmmParamFilename.c_str(), std::ofstream::out);
761 //// for (unsigned int i = 0; i < orderedChrs.size(); i++) {
762 //// std::string chr = orderedChrs[i];
763 //// Hmm &chrHmm = *chrHmms[chr];
764 //// chrHmm.calcLogProbObs();
765 //// hmmLikelihoodsFile << chr << "\t" << chrHmm.getLogProbObs() << "\n";
766 //// // allow writing of some type of header for each chr and matrix type
767 //// chrHmm.writeTrans(hmmParamFile);
768 //// chrHmm.writeEmit(hmmParamFile);
769 //// }
770 //}
771
772 } /* namespace haplohseq */
773