1 /*
2  * Reporter.cpp
3  *
4  * Author: Anthony San Lucas
5  * Email:  sanlucas@gmail.com
6  */
7 
8 #include "Reporter.h"
9 
10 namespace haplohseq {
11 
Reporter()12 Reporter::Reporter() {
13 	// TODO Auto-generated constructor stub
14 
15 }
16 
~Reporter()17 Reporter::~Reporter() {
18 	// TODO Auto-generated destructor stub
19 }
20 
21 
writeToFile(std::string & filename,std::vector<double> & values,std::string & delimiter,std::string & header)22 void Reporter::writeToFile(std::string& filename, std::vector<double>& values, std::string& delimiter, std::string& header) {
23 	std::ofstream file;
24 	file.open(filename.c_str());
25 	if (header != "") {
26 		file << header << std::endl;
27 	}
28 	for (unsigned int i = 0; i < values.size()-1; i++) {
29 		file << values[i] << delimiter;
30 	}
31 	file << values[values.size()-1];
32 	file.close();
33 }
34 
writeToFile(std::string & filename,std::vector<double> & values,std::string & delimiter)35 void Reporter::writeToFile(std::string& filename, std::vector<double>& values, std::string& delimiter) {
36 	std::string header = "";
37 	this->writeToFile(filename, values, delimiter, header);
38 }
39 
writeToFile(std::string & filename,std::vector<unsigned long> & values,std::string & delimiter,std::string & header)40 void Reporter::writeToFile(std::string& filename, std::vector<unsigned long>& values, std::string& delimiter, std::string& header) {
41 	std::ofstream file;
42 	file.open(filename.c_str());
43 	if (header != "") {
44 		file << header << std::endl;
45 	}
46 	for (unsigned int i = 0; i < values.size()-1; i++) {
47 		file << values[i] << delimiter;
48 	}
49 	file << values[values.size()-1];
50 	file.close();
51 }
52 
writeToFile(std::string & filename,std::vector<unsigned long> & values,std::string & delimiter)53 void Reporter::writeToFile(std::string& filename, std::vector<unsigned long>& values, std::string& delimiter) {
54 	std::string header = "";
55 	this->writeToFile(filename, values, delimiter, header);
56 }
57 
writeToFile(std::string & filename,std::vector<std::string> & values,std::string & delimiter,std::string & header)58 void Reporter::writeToFile(std::string& filename, std::vector<std::string>& values, std::string& delimiter, std::string& header) {
59 	std::ofstream file;
60 	file.open(filename.c_str());
61 	if (header != "") {
62 		file << header << std::endl;
63 	}
64 	for (unsigned int i = 0; i < values.size()-1; i++) {
65 		file << values[i] << delimiter;
66 	}
67 	file << values[values.size()-1];
68 	file.close();
69 }
70 
writeToFile(std::string & filename,std::vector<std::string> & values,std::string & delimiter)71 void Reporter::writeToFile(std::string& filename, std::vector<std::string>& values, std::string& delimiter) {
72 	std::string header = "";
73 	this->writeToFile(filename, values, delimiter, header);
74 }
75 
reportBafBasedResults(std::vector<std::string> & orderedChrs,std::map<std::string,boost::shared_ptr<Hmm>> & chrHmms,std::vector<unsigned long> & informativeIndices,std::vector<std::vector<char>> phasedAlleles,std::map<std::string,std::vector<std::string>> & bafStrFields,std::map<std::string,std::vector<unsigned int>> & bafIntFields,std::map<std::string,std::vector<double>> & bafDoubleFields,std::string & destinationDir,std::string & outputPrefix)76 void Reporter::reportBafBasedResults(	std::vector<std::string>& orderedChrs,
77 						std::map<std::string, boost::shared_ptr<Hmm> >& chrHmms,
78 						std::vector<unsigned long>& informativeIndices,
79 						std::vector< std::vector<char> > phasedAlleles,
80 						std::map<std::string, std::vector<std::string> >& bafStrFields,
81 						std::map<std::string, std::vector<unsigned int> >& bafIntFields,
82 						std::map<std::string, std::vector<double> >& bafDoubleFields,
83 						std::string& destinationDir,
84 						std::string& outputPrefix) {
85 
86 	unsigned int numBafEntries = bafStrFields[hlsutil::InputProcessor::BAF_CHR].size();
87 	std::cout << "MDF entries size = " << numBafEntries << "\n";
88 	std::string SPACE = " ";
89 	std::string ENDL = "\n";
90 	std::string TAB = "\t";
91 	std::string nullString = "NA";
92 
93 	if (destinationDir != " " and destinationDir[-1] != '/') {
94 		destinationDir += "/";
95 	}
96 
97 	hlsutil::StringUtil str;
98 	hlsutil::VcfUtil vcf;
99 
100 	// INITIALIZE FILES
101 	std::string posteriorFilename = destinationDir + outputPrefix + ".posterior.dat";
102 	std::string hmmParamFilename = destinationDir + outputPrefix + ".param.dat";
103 	std::string hmmLikelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
104 	std::string eventFilename = destinationDir + outputPrefix + ".events.dat";
105 
106 	std::ofstream posteriorFile(posteriorFilename.c_str(), std::ofstream::out);
107 	std::ofstream hmmParamFile(hmmParamFilename.c_str(), std::ofstream::out);
108 	std::ofstream hmmLikelihoodsFile(hmmLikelihoodsFilename.c_str(), std::ofstream::out);
109 	std::ofstream eventFile(eventFilename.c_str(), std::ofstream::out);
110 
111 	std::string posteriorHeader = "CHR\tPOS\tREF\tALT\tINDEX\tPHASE_CONCORDANCE_SWITCH\tHAP\tGT\tRAF\tLRR";
112 	std::string eventHeader = "CHR\tBEGIN\tEND\tEVENT_STATE\tNUM_INFORMATIVE_MARKERS\tMEAN_POSTPROB\tPHASE_CONCORDANCE";
113 
114 	// GET POSSIBLE STATES (possible states for each chr are the same)
115 	std::string firstChr = orderedChrs[0];
116 	Hmm &firstHmm = *chrHmms[firstChr];
117 	std::vector<unsigned long> states = firstHmm.getStates();
118 
119 	// COLLECT POSTERIOR PROBABILITIES FOR ALL STATES AT ALL INFORMATIVE MARKERS
120 	std::vector< std::vector<double> > statePosteriors;
121 	for (unsigned int i=0; i < states.size(); i++) {
122 		unsigned long stateId = states[i];
123 		posteriorHeader += "\t" + (*chrHmms[firstChr]).getStr(stateId);
124 		std::vector<double> posteriors;
125 
126 		for (unsigned int i = 0; i < orderedChrs.size(); i++) {
127 			std::string chr = orderedChrs[i];
128 			Hmm &chrHmm = *chrHmms[chr];
129 			std::vector<double> chrPosteriors = chrHmm.extractPosteriors(stateId);
130 			posteriors.insert(posteriors.end(), chrPosteriors.begin(), chrPosteriors.end());
131 		}
132 		statePosteriors.push_back(posteriors);
133 	}
134 
135 	// COLLECT SWITCHES
136 	std::vector<std::string> observations;
137 	for (unsigned int i = 0; i < orderedChrs.size(); i++) {
138 		std::string chr = orderedChrs[i];
139 		Hmm &chrHmm = *chrHmms[chr];
140 		std::vector<std::string> chrObservations = chrHmm.getObservations();
141 		observations.insert(observations.end(), chrObservations.begin(), chrObservations.end());
142 	}
143 
144 	// TRACK PUTATIVE EVENTS
145 	unsigned int eventState = 0;
146 	bool inEvent = false;							// in possible event if in stretch where posterior >= eventPosteriorThreshold
147 	bool validEvent = false;						// event is valid if any posterior >= validEventPosteriorThreshold
148 	unsigned int eventBegin = 0;
149 	unsigned int eventEnd = 0;
150 	unsigned int eventMarkerCount = 0;
151 	double validEventPosteriorThreshold = 0.9;
152 	double eventPosteriorThreshold = 0.5;
153 	std::vector<double> eventPosteriors;
154 	std::vector<int> eventSwitches;
155 
156 	// OUTPUT POSTERIOR AND EVENT DAT FILE (TODO: refactor this into its own method)
157 	posteriorFile << posteriorHeader << std::endl;
158 	eventFile << eventHeader << std::endl;
159 	unsigned long posIndex = 0;
160 	unsigned long informativeIndex = 0;
161 	bool chrChange = false;
162 	std::string previousChr = "_initChr";
163 	for (unsigned int bafIndex = 0; bafIndex < numBafEntries; bafIndex++) {
164 		std::string gt = bafStrFields[hlsutil::InputProcessor::BAF_GT][bafIndex];
165 		std::string chr = bafStrFields[hlsutil::InputProcessor::BAF_CHR][bafIndex];
166 		unsigned int pos = bafIntFields[hlsutil::InputProcessor::BAF_POS][bafIndex];
167 		std::string ref = bafStrFields[hlsutil::InputProcessor::BAF_REF][bafIndex];
168 		std::string alt = bafStrFields[hlsutil::InputProcessor::BAF_ALT][bafIndex];
169 		double logRR = bafDoubleFields[hlsutil::InputProcessor::BAF_LOGRR][bafIndex];
170 		double refFreq = bafDoubleFields[hlsutil::InputProcessor::BAF_RAF][bafIndex];
171 
172 		if (posIndex == informativeIndices[informativeIndex]) {
173 			int haplotype = 0;
174 			// Each marker is a SNP so each ref string is only 1 character long.
175 			if (phasedAlleles[0][posIndex] == ref[0]) {
176 				haplotype = 1;
177 			} else {
178 				if (phasedAlleles[1][posIndex] == ref[0]) {
179 					haplotype = 2;
180 				}
181 			}
182 
183 			// There is 1 less posterior probability than het sites for phase_concordance.
184 			// In the HMM, the last
185 			// posterior is stored as 0 across states.  For a more accurate output,
186 			// we'll output a null string for this last state.
187 			if (chr != previousChr) {
188 				chrChange = true;
189 			} else {
190 				chrChange = false;
191 			}
192 
193 			int phaseConcordanceSwitch = atoi(observations[informativeIndex].c_str());
194 
195 			std::string gtNum = vcf.abToNumStr(gt, nullString);
196 
197 			posteriorFile 	<< chr << "\t"
198 							<< pos << "\t"
199 							<< ref << "\t"
200 							<< alt << "\t"
201 							<< posIndex << "\t"
202 							<< phaseConcordanceSwitch << "\t"
203 							<< haplotype << "\t"
204 							<< gt << "\t"
205 							<< refFreq << "\t"
206 							<< logRR;
207 
208 			for (unsigned int stateIndex = 0; stateIndex < statePosteriors.size(); stateIndex++) {
209 				if (chrChange) {
210 					posteriorFile << "\t" << nullString;
211 
212 					// UPDATE EVENT DETAILS
213 					// if chrChange then check if was inEvent
214 					// if inEvent, output event and reset values
215 					if (inEvent && eventState == stateIndex && validEvent) {
216 						double posteriorSum = 0;
217 						BOOST_FOREACH(double ep, eventPosteriors) {
218 							posteriorSum += ep;
219 						}
220 						double eventPosterior = posteriorSum / (double) eventMarkerCount;
221 
222 						double switchSum = 0;
223 						BOOST_FOREACH(int ss, eventSwitches) {
224 							switchSum += ss;
225 						}
226 						double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
227 
228 						eventFile 	<< previousChr << "\t"
229 									<< eventBegin << "\t"
230 									<< eventEnd << "\t"
231 									<< "S" << eventState << "\t"
232 									<< eventMarkerCount << "\t"
233 									<< eventPosterior << "\t"
234 									<< eventPhaseConcordance << "\n";
235 
236 						// reset values for chr changes
237 						inEvent = false;
238 						validEvent = false;
239 						eventState = 0;
240 						eventMarkerCount = 0;
241 						eventPosteriors.clear();
242 						eventSwitches.clear();
243 					}
244 
245 				}
246 				else {
247 					double posterior = statePosteriors[stateIndex][informativeIndex];
248 					posteriorFile << "\t" << posterior;
249 
250 					if (stateIndex == 0 || (inEvent && eventState != stateIndex)) {
251 						continue;
252 					}
253 
254 					// enter event (really event state shouldn't have to equal 0....if it doesn't, switching event states)
255 					if (!inEvent && posterior >= eventPosteriorThreshold) {
256 						inEvent = true;
257 
258 						eventBegin = pos;
259 						eventEnd = pos;
260 						eventMarkerCount += 1;
261 						eventPosteriors.push_back(posterior);
262 						eventSwitches.push_back(phaseConcordanceSwitch);
263 						eventState = stateIndex;
264 						if (posterior >= validEventPosteriorThreshold) {
265 							validEvent = true;
266 						}
267 					} else {
268 						// extend event
269 						if (inEvent && eventState == stateIndex && posterior >= eventPosteriorThreshold) {
270 							eventEnd = pos;
271 							eventMarkerCount += 1;
272 							eventPosteriors.push_back(posterior);
273 							eventSwitches.push_back(phaseConcordanceSwitch);
274 							if (!validEvent) {
275 								if (posterior >= validEventPosteriorThreshold) {
276 									validEvent = true;
277 								}
278 							}
279 //							std::cout << "event " << stateIndex << "\t" << chr << "\t" << eventBegin << "\t" << eventEnd << "\t" << eventMarkerCount << "\n";
280 						} else {
281 							// leave event
282 							if (inEvent && eventState == stateIndex && posterior < eventPosteriorThreshold) {
283 
284 								if (validEvent) {
285 									double posteriorSum = 0;
286 									BOOST_FOREACH(double ep, eventPosteriors) {
287 										posteriorSum += ep;
288 									}
289 									double eventPosterior = posteriorSum / (double) eventMarkerCount;
290 
291 									double switchSum = 0;
292 									BOOST_FOREACH(int ss, eventSwitches) {
293 										switchSum += ss;
294 									}
295 									double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
296 
297 									eventFile 	<< chr << "\t"
298 												<< eventBegin << "\t"
299 												<< eventEnd << "\t"
300 												<< "S" << eventState << "\t"
301 												<< eventMarkerCount << "\t"
302 												<< eventPosterior << "\t"
303 												<< eventPhaseConcordance << "\n";
304 								}
305 
306 								inEvent = false;
307 								validEvent = false;
308 								eventState = 0;
309 								eventMarkerCount = 0;
310 								eventPosteriors.clear();
311 								eventSwitches.clear();
312 							}
313 						}
314 					}
315 				}
316 			}
317 			posteriorFile << std::endl;
318 			informativeIndex += 1;
319 
320 			if (chrChange) {
321 				previousChr = chr;
322 			}
323 		}
324 		posIndex += 1;
325 	}
326 	posteriorFile.close();
327 
328 	// EXPORT CHROMOSOME BASED REPORTS
329 	std::cout << "Reporting data for chromosomes.\n";
330 	for (unsigned int i = 0; i < orderedChrs.size(); i++) {
331 		std::string chr = orderedChrs[i];
332 		std::cout << "Reporting data for chromosome " << chr << "\n";
333 		Hmm &chrHmm = *chrHmms[chr];
334 		chrHmm.calcLogProbObs();
335 		hmmLikelihoodsFile << "> " << chr << "\t(" << chrHmm.getLogProbObs() << ")\n";
336 		std::vector<double> likelihoodProfile = chrHmm.getLikelihoodProfile();
337 		for (unsigned int iteration = 0; iteration < likelihoodProfile.size(); iteration++) {
338 			hmmLikelihoodsFile << likelihoodProfile[iteration] << "\t";
339 		}
340 		hmmLikelihoodsFile << "\n";
341 		// allow writing of some type of header for each chr and matrix type
342 		hmmParamFile << "> " << chr << "\t(from\tto\tprob)\n";
343 		chrHmm.writeTrans(hmmParamFile);
344 		chrHmm.writeEmit(hmmParamFile);
345 	}
346 	hmmParamFile.close();
347 }
348 
reportVcfBasedResults(std::vector<std::string> & orderedChrs,std::map<std::string,boost::shared_ptr<Hmm>> & chrHmms,std::vector<unsigned long> & informativeIndices,std::vector<std::vector<char>> phasedAlleles,std::map<std::string,std::vector<std::string>> & vcfStrFields,std::map<std::string,std::vector<unsigned int>> & vcfIntFields,std::map<std::string,std::vector<double>> & vcfDoubleFields,std::string & destinationDir,std::string & outputPrefix)349 void Reporter::reportVcfBasedResults(	std::vector<std::string>& orderedChrs,
350 						std::map<std::string, boost::shared_ptr<Hmm> >& chrHmms,
351 						std::vector<unsigned long>& informativeIndices,
352 						std::vector< std::vector<char> > phasedAlleles,
353 						std::map<std::string, std::vector<std::string> >& vcfStrFields,
354 						std::map<std::string, std::vector<unsigned int> >& vcfIntFields,
355 						std::map<std::string, std::vector<double> >& vcfDoubleFields,
356 						std::string& destinationDir,
357 						std::string& outputPrefix) {
358 
359 	unsigned int numVcfEntries = vcfStrFields[hlsutil::InputProcessor::VCF_CHR].size();
360 //	std::cout << "VCF entries size = " << numVcfEntries << "\n";
361 	std::string SPACE = " ";
362 	std::string ENDL = "\n";
363 	std::string TAB = "\t";
364 	std::string nullString = "NA";
365 
366 	if (destinationDir != " " and destinationDir[-1] != '/') {
367 		destinationDir += "/";
368 	}
369 
370 	hlsutil::StringUtil str;
371 	hlsutil::VcfUtil vcf;
372 
373 	// INITIALIZE FILES
374 	std::string posteriorFilename = destinationDir + outputPrefix + ".posterior.dat";
375 	std::string hmmParamFilename = destinationDir + outputPrefix + ".param.dat";
376 	std::string hmmLikelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
377 	std::string eventFilename = destinationDir + outputPrefix + ".events.dat";
378 
379 	std::ofstream posteriorFile(posteriorFilename.c_str(), std::ofstream::out);
380 	std::ofstream hmmParamFile(hmmParamFilename.c_str(), std::ofstream::out);
381 	std::ofstream hmmLikelihoodsFile(hmmLikelihoodsFilename.c_str(), std::ofstream::out);
382 	std::ofstream eventFile(eventFilename.c_str(), std::ofstream::out);
383 
384 	std::string posteriorHeader = "CHR\tPOS\tREF\tALT\tINDEX\tPHASE_CONCORDANCE_SWITCH\tHAP\tGT\tRAF\tDP";
385 	std::string eventHeader = "CHR\tBEGIN\tEND\tEVENT_STATE\tNUM_INFORMATIVE_MARKERS\tMEAN_POSTPROB\tPHASE_CONCORDANCE";
386 
387 	// GET POSSIBLE STATES (possible states for each chr are the same)
388 	std::string firstChr = orderedChrs[0];
389 	Hmm &firstHmm = *chrHmms[firstChr];
390 	std::vector<unsigned long> states = firstHmm.getStates();
391 
392 	// COLLECT POSTERIOR PROBABILITIES FOR ALL STATES AT ALL INFORMATIVE MARKERS
393 	std::vector< std::vector<double> > statePosteriors;
394 	for (unsigned int i=0; i < states.size(); i++) {
395 		unsigned long stateId = states[i];
396 		posteriorHeader += "\t" + (*chrHmms[firstChr]).getStr(stateId);
397 		std::vector<double> posteriors;
398 
399 		for (unsigned int i = 0; i < orderedChrs.size(); i++) {
400 			std::string chr = orderedChrs[i];
401 			Hmm &chrHmm = *chrHmms[chr];
402 			std::vector<double> chrPosteriors = chrHmm.extractPosteriors(stateId);
403 			posteriors.insert(posteriors.end(), chrPosteriors.begin(), chrPosteriors.end());
404 		}
405 		statePosteriors.push_back(posteriors);
406 	}
407 
408 	// COLLECT SWITCHES
409 	std::vector<std::string> observations;
410 	for (unsigned int i = 0; i < orderedChrs.size(); i++) {
411 		std::string chr = orderedChrs[i];
412 		Hmm &chrHmm = *chrHmms[chr];
413 		std::vector<std::string> chrObservations = chrHmm.getObservations();
414 		observations.insert(observations.end(), chrObservations.begin(), chrObservations.end());
415 	}
416 
417 	// TRACK PUTATIVE EVENTS
418 	unsigned int eventState = 0;
419 	bool inEvent = false;							// in possible event if in stretch where posterior >= eventPosteriorThreshold
420 	bool validEvent = false;						// event is valid if any posterior >= validEventPosteriorThreshold
421 	unsigned int eventBegin = 0;
422 	unsigned int eventEnd = 0;
423 	unsigned int eventMarkerCount = 0;
424 	double validEventPosteriorThreshold = 0.9;
425 	double eventPosteriorThreshold = 0.5;
426 	std::vector<double> eventPosteriors;
427 	std::vector<int> eventSwitches;
428 
429 	// OUTPUT POSTERIOR AND EVENT DAT FILE (TODO: refactor this into its own method)
430 	posteriorFile << posteriorHeader << std::endl;
431 	eventFile << eventHeader << std::endl;
432 	unsigned long posIndex = 0;
433 	unsigned long informativeIndex = 0;
434 	bool chrChange = false;
435 	std::string previousChr = "_initChr";
436 	for (unsigned int vcfIndex = 0; vcfIndex < numVcfEntries; vcfIndex++) {
437 
438 		std::string gt = vcfStrFields[hlsutil::InputProcessor::VCF_GT][vcfIndex];
439 		std::string chr = vcfStrFields[hlsutil::InputProcessor::VCF_CHR][vcfIndex];
440 		unsigned int pos = vcfIntFields[hlsutil::InputProcessor::VCF_POS][vcfIndex];
441 		std::string ref = vcfStrFields[hlsutil::InputProcessor::VCF_REF][vcfIndex];
442 		std::string alt = vcfStrFields[hlsutil::InputProcessor::VCF_ALT][vcfIndex];
443 		unsigned int depth = vcfIntFields[hlsutil::InputProcessor::VCF_DP][vcfIndex];
444 		double refFreq = vcfDoubleFields[hlsutil::InputProcessor::VCF_REF_FREQ][vcfIndex];
445 
446 		std::string gtNum = vcf.gtToNumStr(gt, nullString);
447 
448 		if (posIndex == informativeIndices[informativeIndex]) {
449 			int haplotype = 0;
450 			// Each marker is a SNP so each ref string is only 1 character long.
451 			if (phasedAlleles[0][posIndex] == ref[0]) { //bAllele[0]) {
452 				haplotype = 1;
453 			} else {
454 				if (phasedAlleles[1][posIndex] == ref[0]) { //bAllele[0]) {
455 					haplotype = 2;
456 				}
457 			}
458 
459 			// There is 1 less posterior probability than het sites for phase_concordance.
460 			// In the HMM, the last
461 			// posterior is stored as 0 across states.  For a more accurate output,
462 			// we'll output a null string for this last state.
463 			if (chr != previousChr) {
464 				chrChange = true;
465 			} else {
466 				chrChange = false;
467 			}
468 
469 			int phaseConcordanceSwitch = atoi(observations[informativeIndex].c_str());
470 
471 			std::string gtNum = vcf.abToNumStr(gt, nullString);
472 
473 			posteriorFile 	<< chr << "\t"
474 								<< pos << "\t"
475 								<< ref << "\t"
476 								<< alt << "\t"
477 								<< posIndex << "\t"
478 								<< phaseConcordanceSwitch << "\t"
479 								<< haplotype << "\t"
480 								<< gt << "\t"
481 								<< refFreq << "\t"
482 								<< depth;
483 
484 			for (unsigned int stateIndex = 0; stateIndex < statePosteriors.size(); stateIndex++) {
485 				if (chrChange) {
486 					posteriorFile << "\t" << nullString;
487 
488 					// UPDATE EVENT DETAILS
489 					// if chrChange then check if was inEvent
490 					// if inEvent, output event and reset values
491 					if (inEvent && eventState == stateIndex && validEvent) {
492 						double posteriorSum = 0;
493 						BOOST_FOREACH(double ep, eventPosteriors) {
494 							posteriorSum += ep;
495 						}
496 						double eventPosterior = posteriorSum / (double) eventMarkerCount;
497 
498 						double switchSum = 0;
499 						BOOST_FOREACH(int ss, eventSwitches) {
500 							switchSum += ss;
501 						}
502 						double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
503 
504 						eventFile 	<< previousChr << "\t"
505 									<< eventBegin << "\t"
506 									<< eventEnd << "\t"
507 									<< "S" << eventState << "\t"
508 									<< eventMarkerCount << "\t"
509 									<< eventPosterior << "\t"
510 									<< eventPhaseConcordance << "\n";
511 
512 						// reset values for chr changes
513 						inEvent = false;
514 						validEvent = false;
515 						eventState = 0;
516 						eventMarkerCount = 0;
517 						eventPosteriors.clear();
518 						eventSwitches.clear();
519 					}
520 
521 				}
522 				else {
523 					double posterior = statePosteriors[stateIndex][informativeIndex];
524 					posteriorFile << "\t" << posterior;
525 
526 					if (stateIndex == 0 || (inEvent && eventState != stateIndex)) {
527 						continue;
528 					}
529 
530 					// enter event (really event state shouldn't have to equal 0....if it doesn't, switching event states)
531 					if (!inEvent && posterior >= eventPosteriorThreshold) {
532 						inEvent = true;
533 
534 						eventBegin = pos;
535 						eventEnd = pos;
536 						eventMarkerCount += 1;
537 						eventPosteriors.push_back(posterior);
538 						eventSwitches.push_back(phaseConcordanceSwitch);
539 						eventState = stateIndex;
540 						if (posterior >= validEventPosteriorThreshold) {
541 							validEvent = true;
542 						}
543 					} else {
544 						// extend event unless we are at the last informative index (then go to else)
545 						if (inEvent && eventState == stateIndex && posterior >= eventPosteriorThreshold && informativeIndex != informativeIndices.size() - 2) {
546 							eventEnd = pos;
547 							eventMarkerCount += 1;
548 							eventPosteriors.push_back(posterior);
549 							eventSwitches.push_back(phaseConcordanceSwitch);
550 							if (!validEvent) {
551 								if (posterior >= validEventPosteriorThreshold) {
552 									validEvent = true;
553 								}
554 							}
555 //							std::cout << "event " << stateIndex << "\t" << chr << "\t" << eventBegin << "\t" << eventEnd << "\t" << eventMarkerCount << "\n";
556 						} else {
557 							// leave event if in event and posterior drops below minimum posterior threshold for an event OR if we are at the last posterior in our dataset and in an event
558 							if (inEvent && eventState == stateIndex && (posterior < eventPosteriorThreshold || informativeIndex == informativeIndices.size() - 2)) {
559 
560 								if (validEvent) {
561 									double posteriorSum = 0;
562 									BOOST_FOREACH(double ep, eventPosteriors) {
563 										posteriorSum += ep;
564 									}
565 									double eventPosterior = posteriorSum / (double) eventMarkerCount;
566 
567 									double switchSum = 0;
568 									BOOST_FOREACH(int ss, eventSwitches) {
569 										switchSum += ss;
570 									}
571 									double eventPhaseConcordance = switchSum / (double) eventMarkerCount;
572 
573 									eventFile 	<< chr << "\t"
574 												<< eventBegin << "\t"
575 												<< eventEnd << "\t"
576 												<< "S" << eventState << "\t"
577 												<< eventMarkerCount << "\t"
578 												<< eventPosterior << "\t"
579 												<< eventPhaseConcordance << "\n";
580 								}
581 
582 								inEvent = false;
583 								validEvent = false;
584 								eventState = 0;
585 								eventMarkerCount = 0;
586 								eventPosteriors.clear();
587 								eventSwitches.clear();
588 							}
589 						}
590 					}
591 				}
592 			}
593 			posteriorFile << std::endl;
594 			informativeIndex += 1;
595 
596 			if (chrChange) {
597 				previousChr = chr;
598 			}
599 		}
600 		posIndex += 1;
601 	}
602 	posteriorFile.close();
603 
604 	// EXPORT CHROMOSOME BASED REPORTS
605 	for (unsigned int i = 0; i < orderedChrs.size(); i++) {
606 		std::string chr = orderedChrs[i];
607 		Hmm &chrHmm = *chrHmms[chr];
608 		chrHmm.calcLogProbObs();
609 		hmmLikelihoodsFile << "> " << chr << "\t(" << chrHmm.getLogProbObs() << ")\n";
610 		std::vector<double> likelihoodProfile = chrHmm.getLikelihoodProfile();
611 		for (unsigned int iteration = 0; iteration < likelihoodProfile.size(); iteration++) {
612 			hmmLikelihoodsFile << likelihoodProfile[iteration] << "\t";
613 		}
614 		hmmLikelihoodsFile << "\n";
615 		// allow writing of some type of header for each chr and matrix type
616 		hmmParamFile << "> " << chr << "\t(from\tto\tprob)\n";
617 		chrHmm.writeTrans(hmmParamFile);
618 		chrHmm.writeEmit(hmmParamFile);
619 	}
620 	hmmParamFile.close();
621 }
622 
623 //void Reporter::reportVcfBasedResults(	std::vector<std::string>& orderedChrs,
624 //						std::map<std::string, boost::shared_ptr<Hmm> >& chrHmms,
625 //						std::vector<unsigned long>& informativeIndices,
626 //						std::vector< std::vector<char> > phasedAlleles,
627 //						std::map<std::string, std::vector<std::string> >& vcfStrFields,
628 //						std::map<std::string, std::vector<unsigned int> >& vcfIntFields,
629 //						std::map<std::string, std::vector<double> >& vcfDoubleFields,
630 //						std::string& destinationDir,
631 //						std::string& outputPrefix) {
632 //
633 //	unsigned int numVcfEntries = vcfStrFields[hlsutil::InputProcessor::VCF_CHR].size();
634 //	std::cout << "vcf entries size = " << numVcfEntries << "\n";
635 //	std::string SPACE = " ";
636 //	std::string ENDL = "\n";
637 //	std::string TAB = "\t";
638 //	std::string nullString = "NA";
639 //
640 //	if (destinationDir != " " and destinationDir[-1] != '/') {
641 //		destinationDir += "/";
642 //	}
643 //
644 //	hlsutil::StringUtil str;
645 //	hlsutil::VcfUtil vcf;
646 //
647 ////	std::string datFilename = destinationDir + outputPrefix + ".dat";
648 //	std::string posteriorDatFilename = destinationDir + outputPrefix + ".posterior.dat";
649 //	std::string hmmParamFilename = destinationDir + outputPrefix + ".param.dat";
650 ////	std::string hmmLikelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
651 //	//std::string likelihoodsFilename = destinationDir + outputPrefix + ".likelihoods.dat";
652 //
653 ////	std::ifstream infile(vcfFilename.c_str(), std::ifstream::in);
654 //
655 ////	std::ofstream datFile(datFilename.c_str(), std::ofstream::out);
656 //	std::ofstream posteriorDatFile(posteriorDatFilename.c_str(), std::ofstream::out);
657 ////	std::ofstream hmmLikelihoodsFile(hmmLikelihoodsFilename.c_str(), std::ofstream::out);
658 //
659 //	std::string datHeader = "chr\tpos\tref\talt\tgt\tcov\trefFreq\tindex";
660 ////	datFile << datHeader << std::endl;
661 //
662 //	// get posteriors for each state
663 //	std::string posteriorDatHeader = datHeader + "\tref_hap";
664 //
665 //	// possible states for each chr are the same
666 //	std::string firstChr = orderedChrs[0];
667 //	Hmm &firstHmm = *chrHmms[firstChr];
668 //	std::vector<unsigned long> states = firstHmm.getStates();
669 ////	std::cout << "states size " << firstHmm.getStates().size()  << "\n";
670 //
671 //	std::vector< std::vector<double> > statePosteriors;
672 //	for (unsigned int i=0; i < states.size(); i++) {
673 //		unsigned long stateId = states[i];
674 ////		std::cout << "About to access state " << firstChr << "\t" << stateId << "\n";
675 //		posteriorDatHeader += "\t" + (*chrHmms[firstChr]).getStr(stateId);
676 //		std::vector<double> posteriors;
677 //
678 //		for (unsigned int i = 0; i < orderedChrs.size(); i++) {
679 //			std::string chr = orderedChrs[i];
680 //			Hmm &chrHmm = *chrHmms[chr];
681 ////			std::cout << "accessing " << chr << "\n";
682 //			std::vector<double> chrPosteriors = chrHmm.extractPosteriors(stateId);
683 //			posteriors.insert(posteriors.end(), chrPosteriors.begin(), chrPosteriors.end());
684 //		}
685 ////		std::cerr << "Num posteriors " << stateId << ": " << posteriors.size() << std::endl;
686 //		statePosteriors.push_back(posteriors);
687 //	}
688 //	posteriorDatFile << posteriorDatHeader << std::endl;
689 //
690 //	unsigned long posIndex = 0;
691 //	unsigned long informativeIndex = 0;
692 //	bool chrChange = false;
693 //	std::string previousChr = "_initChr";
694 //	for (unsigned int vcfIndex = 0; vcfIndex < numVcfEntries; vcfIndex++) {
695 //
696 //		std::string gt = vcfStrFields[hlsutil::InputProcessor::VCF_GT][vcfIndex];
697 //		std::string chr = vcfStrFields[hlsutil::InputProcessor::VCF_CHR][vcfIndex];
698 //		unsigned int pos = vcfIntFields[hlsutil::InputProcessor::VCF_POS][vcfIndex];
699 //		std::string ref = vcfStrFields[hlsutil::InputProcessor::VCF_REF][vcfIndex];
700 //		std::string alt = vcfStrFields[hlsutil::InputProcessor::VCF_ALT][vcfIndex];
701 //		unsigned int depth = vcfIntFields[hlsutil::InputProcessor::VCF_DP][vcfIndex];
702 //		double refFreq = vcfDoubleFields[hlsutil::InputProcessor::VCF_REF_FREQ][vcfIndex];
703 //
704 //		std::string gtNum = vcf.gtToNumStr(gt, nullString);
705 //
706 //
707 //		if (posIndex == informativeIndices[informativeIndex]) {
708 //			int haplotype = 0;
709 //			// Each marker is a SNP so each ref string is only 1 character long.
710 //			if (phasedAlleles[0][posIndex] == ref[0]) {
711 //				haplotype = 1;
712 //			} else {
713 //				if (phasedAlleles[1][posIndex] == ref[0]) {
714 //					haplotype = 2;
715 //				}
716 //			}
717 //			posteriorDatFile 	<< chr << "\t"
718 //								<< pos << "\t"
719 //								<< ref << "\t"
720 //								<< alt << "\t"
721 //								<< gtNum << "\t"
722 //								<< depth << "\t"
723 //								<< refFreq << "\t"
724 //								<< posIndex << "\t"
725 //								<< haplotype;
726 //
727 //			// There is 1 less posterior probability than het sites for phase_concordance.
728 //			// In the HMM, the last
729 //			// posterior is stored as 0 across states.  For a more accurate output,
730 //			// we'll output a null string for this last state.
731 //			if (chr != previousChr) {
732 //				chrChange = true;
733 //				previousChr = chr;
734 //			} else {
735 //				chrChange = false;
736 //			}
737 //
738 //			for (unsigned int stateIndex = 0; stateIndex < statePosteriors.size(); stateIndex++) {
739 //				if (chrChange == true) {
740 //					posteriorDatFile << "\t" << nullString;
741 //				}
742 //				else {
743 //					double posterior = statePosteriors[stateIndex][informativeIndex];
744 //					posteriorDatFile << "\t" << posterior;
745 //				}
746 //			}
747 //
748 //			posteriorDatFile << std::endl;
749 //
750 //			informativeIndex += 1;
751 //		}
752 //
753 //		posIndex += 1;
754 //
755 //	}
756 //
757 //	posteriorDatFile.close();
758 ////	datFile.close();
759 //
760 ////	std::ofstream hmmParamFile(hmmParamFilename.c_str(), std::ofstream::out);
761 ////	for (unsigned int i = 0; i < orderedChrs.size(); i++) {
762 ////		std::string chr = orderedChrs[i];
763 ////		Hmm &chrHmm = *chrHmms[chr];
764 ////		chrHmm.calcLogProbObs();
765 ////		hmmLikelihoodsFile << chr << "\t" << chrHmm.getLogProbObs() << "\n";
766 ////		// allow writing of some type of header for each chr and matrix type
767 ////		chrHmm.writeTrans(hmmParamFile);
768 ////		chrHmm.writeEmit(hmmParamFile);
769 ////	}
770 //}
771 
772 } /* namespace haplohseq */
773