1 /* The MIT License
2
3 Copyright (c) 2013 Adrian Tan <atks@umich.edu>
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "lhmm.h"
25
26 #define MAXLEN 250
27 #define S 0
28 #define X 1
29 #define Y 2
30 #define M 3
31 #define I 4
32 #define D 5
33 #define W 6
34 #define Z 7
35 #define E 8
36
37 /**
38 * Constructor.
39 */
LHMM()40 LHMM::LHMM()
41 {
42 initialize();
43 };
44
45 /**
46 * Destructor.
47 */
~LHMM()48 LHMM::~LHMM()
49 {
50 delete scoreX;
51 delete scoreY;
52 delete scoreM;
53 delete scoreI;
54 delete scoreD;
55 delete scoreW;
56 delete scoreZ;
57
58 delete pathX;
59 delete pathY;
60 delete pathM;
61 delete pathI;
62 delete pathD;
63 delete pathW;
64 delete pathZ;
65 };
66
67 /**
68 * Initializes object, helper function for constructor.
69 */
initialize()70 void LHMM::initialize()
71 {
72 delta = 0.001;
73 epsilon = 0.5;
74 tau = 0.1;
75 eta = 0.001;
76
77 logOneSixteenth = log10(1.0/16.0);
78
79 transition[X][X] = 0; //log10((1-eta)/(1-eta));
80 transition[X][Y] = 0; //log10((eta*(1-eta))/(eta*(1-eta)));
81 transition[Y][Y] = 0; //log10((1-eta)/(1-eta));
82
83 transition[X][M] = log10(1/((1-eta)*(1-eta))); //log10((eta*eta)/(eta*eta*(1-eta)*(1-eta)));
84 transition[Y][M] = transition[X][M]; //log10((eta)/(eta*(1-eta)*(1-eta)));
85 transition[S][M] = log10(1/(eta*(1-eta)*(1-eta))); //log10((eta*eta)/(eta*eta*eta*(1-eta)*(1-eta)));
86 transition[M][M] = log10((1-2*delta-tau)/((1-eta)*(1-eta)));
87 transition[I][M] = log10((1-epsilon)/(1-eta));
88 transition[D][M] = transition[I][M]; //log10((1-epsilon)/(1-eta));
89
90 transition[M][D] = log10(delta/(1-eta));
91 transition[D][D] = log10(epsilon/(1-eta));
92
93 transition[M][I] = transition[M][D]; //log10(delta::/(1-eta));
94 transition[I][I] = transition[D][D]; //log10(epsilon/(1-eta));
95
96 transition[M][W] = log10(tau/eta); //log10((tau*(1-eta))/(eta*(1-eta)));
97 transition[W][W] = 0; //log10((1-eta)/(1-eta));
98
99 transition[M][Z] = transition[M][W]; //log10((tau*eta*(1-eta))/(eta*eta*(1-eta)));
100 transition[W][Z] = 0; //log10((eta*(1-eta))/(eta*(1-eta)));
101 transition[Z][Z] = 0; //log10((1-eta)/(1-eta));
102
103 scoreX = new double[MAXLEN*MAXLEN];
104 scoreY = new double[MAXLEN*MAXLEN];
105 scoreM = new double[MAXLEN*MAXLEN];
106 scoreI = new double[MAXLEN*MAXLEN];
107 scoreD = new double[MAXLEN*MAXLEN];
108 scoreW = new double[MAXLEN*MAXLEN];
109 scoreZ = new double[MAXLEN*MAXLEN];
110
111 pathX = new char[MAXLEN*MAXLEN];
112 pathY = new char[MAXLEN*MAXLEN];
113 pathM = new char[MAXLEN*MAXLEN];
114 pathI = new char[MAXLEN*MAXLEN];
115 pathD = new char[MAXLEN*MAXLEN];
116 pathW = new char[MAXLEN*MAXLEN];
117 pathZ = new char[MAXLEN*MAXLEN];
118
119 //assume alignments can't possibly be maxLength bases or more
120 for (int32_t i=0; i<MAXLEN; ++i)
121 {
122 for (int32_t j=0; j<MAXLEN; ++j)
123 {
124 scoreX[i*MAXLEN+j] = -DBL_MAX;
125 scoreY[i*MAXLEN+j] = -DBL_MAX;
126 scoreM[i*MAXLEN+j] = -DBL_MAX;
127 scoreI[i*MAXLEN+j] = -DBL_MAX;
128 scoreD[i*MAXLEN+j] = -DBL_MAX;
129 scoreW[i*MAXLEN+j] = -DBL_MAX;
130 scoreZ[i*MAXLEN+j] = -DBL_MAX;
131
132 if (j)
133 {
134 pathX[i*MAXLEN+j] = 'Y';
135 pathY[i*MAXLEN+j] = 'Y';
136 pathM[i*MAXLEN+j] = 'Y';
137 pathI[i*MAXLEN+j] = 'Y';
138 pathD[i*MAXLEN+j] = 'Y';
139 pathW[i*MAXLEN+j] = 'Y';
140 pathZ[i*MAXLEN+j] = 'Y';
141 }
142 else
143 {
144 pathX[i*MAXLEN+j] = 'X';
145 pathY[i*MAXLEN+j] = 'X';
146 pathM[i*MAXLEN+j] = 'X';
147 pathI[i*MAXLEN+j] = 'X';
148 pathD[i*MAXLEN+j] = 'X';
149 pathW[i*MAXLEN+j] = 'X';
150 pathZ[i*MAXLEN+j] = 'X';
151 }
152 }
153 }
154
155 logEta = log10(eta);
156 logTau = log10(tau);
157
158 scoreX[0*MAXLEN+0] = 0;
159 scoreY[0*MAXLEN+0] = 0;
160 scoreM[0*MAXLEN+0] = 0;
161 scoreW[0*MAXLEN+0] = 0;
162 scoreZ[0*MAXLEN+0] = 0;
163 pathX[0*MAXLEN+0] = 'N';
164 pathX[1*MAXLEN+0] = 'S';
165 pathY[0*MAXLEN+0] = 'N';
166 pathY[0*MAXLEN+1] = 'S';
167 pathM[0*MAXLEN+0] = 'N';
168 pathM[1*MAXLEN+1] = 'S';
169
170 for (uint32_t k=1; k<MAXLEN; ++k)
171 {
172 scoreX[k*MAXLEN+0] = scoreX[(k-1)*MAXLEN+0] + transition[X][X];
173 scoreX[0*MAXLEN+k] = -DBL_MAX;
174 scoreY[k*MAXLEN+0] = -DBL_MAX;
175 scoreY[0*MAXLEN+k] = scoreY[0*MAXLEN+(k-1)] + transition[Y][Y];
176 scoreW[k*MAXLEN+0] = scoreW[(k-1)*MAXLEN+0] + transition[W][W];
177 scoreW[0*MAXLEN+k] = -DBL_MAX;
178 scoreZ[k*MAXLEN+0] = -DBL_MAX;
179 scoreZ[0*MAXLEN+k] = scoreZ[0*MAXLEN+(k-1)] + transition[Z][Z];
180 }
181
182 scoreX[0*MAXLEN+0] = -DBL_MAX;
183 scoreY[0*MAXLEN+0] = -DBL_MAX;
184 scoreW[0*MAXLEN+0] = -DBL_MAX;
185 scoreZ[0*MAXLEN+0] = -DBL_MAX;
186 };
187
188 /**
189 * Align y against x.
190 */
align(double & llk,const char * x,const char * y,const char * qual,bool debug)191 void LHMM::align(double& llk, const char* x, const char* y, const char* qual, bool debug)
192 {
193 //std::cerr << "Running this\n" ;
194 this->x = x;
195 this->y = y;
196 this->qual = qual;
197
198 //adds a starting character at the fron of each string that must be matched
199 xlen = strlen(x);
200 ylen = strlen(y);
201 double max = 0;
202 char maxPath = 'X';
203
204 //construct possible solutions
205 for (uint32_t i=1; i<=xlen; ++i)
206 {
207 for (uint32_t j=1; j<=ylen; ++j)
208 {
209 //X
210 double xx = scoreX[(i-1)*MAXLEN+j] + transition[X][X];
211
212 max = xx;
213 maxPath = 'X';
214
215 scoreX[i*MAXLEN+j] = max;
216 pathX[i*MAXLEN+j] = maxPath;
217
218 //Y
219 double xy = scoreX[i*MAXLEN+(j-1)] + transition[X][Y];
220 double yy = scoreY[i*MAXLEN+(j-1)] + transition[Y][Y];
221
222 max = xy;
223 maxPath = 'X';
224
225 if (yy>max)
226 {
227 max = yy;
228 maxPath = 'Y';
229 }
230
231 scoreY[i*MAXLEN+j] = max;
232 pathY[i*MAXLEN+j] = maxPath;
233
234 //M
235 double xm = scoreX[(i-1)*MAXLEN+(j-1)] + transition[X][M];
236 double ym = scoreY[(i-1)*MAXLEN+(j-1)] + transition[Y][M];
237 double mm = scoreM[(i-1)*MAXLEN+(j-1)] + ((i==1&&j==1) ? transition[S][M] : transition[M][M]);
238 double im = scoreI[(i-1)*MAXLEN+(j-1)] + transition[I][M];
239 double dm = scoreD[(i-1)*MAXLEN+(j-1)] + transition[D][M];
240
241 max = xm;
242 maxPath = 'X';
243
244 if (ym>max) //special case
245 {
246 max = ym;
247 maxPath = 'Y';
248 }
249 if (mm>max)
250 {
251 max = mm;
252 maxPath = (i==1&&j==1) ? 'S' : 'M';
253 }
254 if (im>max)
255 {
256 max = im;
257 maxPath = 'I';
258 }
259 if (dm>max)
260 {
261 max = dm;
262 maxPath = 'D';
263 }
264
265 scoreM[i*MAXLEN+j] = max + log10_emission_odds(x[i-1], y[j-1], LogTool::pl2prob((uint32_t) qual[j-1]-33));
266 pathM[i*MAXLEN+j] = maxPath;
267
268 //D
269 double md = scoreM[(i-1)*MAXLEN+j] + transition[M][D];
270 double dd = scoreD[(i-1)*MAXLEN+j] + transition[D][D];
271
272 max = md;
273 maxPath = 'M';
274
275 if (dd>max)
276 {
277 max = dd;
278 maxPath = 'D';
279 }
280
281 scoreD[i*MAXLEN+j] = max;
282 pathD[i*MAXLEN+j] = maxPath;
283
284 //I
285 double mi = scoreM[i*MAXLEN+(j-1)] + transition[M][I];
286 double ii = scoreI[i*MAXLEN+(j-1)] + transition[I][I];
287
288 max = mi;
289 maxPath = 'M';
290
291 if (ii>max)
292 {
293 max = ii;
294 maxPath = 'I';
295 }
296
297 scoreI[i*MAXLEN+j] = max;
298 pathI[i*MAXLEN+j] = maxPath;
299
300 //W
301 double mw = scoreM[(i-1)*MAXLEN+j] + transition[M][W];
302 double ww = scoreW[(i-1)*MAXLEN+j] + transition[W][W];
303
304 max = mw;
305 maxPath = 'M';
306
307 if (ww>max)
308 {
309 max = ww;
310 maxPath = 'W';
311 }
312
313 scoreW[i*MAXLEN+j] = max;
314 pathW[i*MAXLEN+j] = maxPath;
315
316 //Z
317 double mz = scoreM[i*MAXLEN+(j-1)] + transition[M][Z];
318 double wz = scoreW[i*MAXLEN+(j-1)] + transition[W][Z];
319 double zz = scoreZ[i*MAXLEN+(j-1)] + transition[Z][Z];
320
321 max = mz;
322 maxPath = 'M';
323
324 if (wz>max)
325 {
326 max = wz;
327 maxPath = 'W';
328 }
329 if (zz>max)
330 {
331 max = zz;
332 maxPath = 'Z';
333 }
334
335 scoreZ[i*MAXLEN+j] = max;
336 pathZ[i*MAXLEN+j] = maxPath;
337 }
338
339 scoreM[xlen*MAXLEN+ylen] += logTau-logEta;
340 }
341
342 if (debug)
343 {
344 std::cerr << "\n=X=\n";
345 print(scoreX, xlen+1, ylen+1);
346 std::cerr << "\n=Y=\n";
347 print(scoreY, xlen+1, ylen+1);
348 std::cerr << "\n=M=\n";
349 print(scoreM, xlen+1, ylen+1);
350 std::cerr << "\n=D=\n";
351 print(scoreD, xlen+1, ylen+1);
352 std::cerr << "\n=I=\n";
353 print(scoreI, xlen+1, ylen+1);
354 std::cerr << "\n=W=\n";
355 print(scoreW, xlen+1, ylen+1);
356 std::cerr << "\n=Z=\n";
357 print(scoreZ, xlen+1, ylen+1);
358 std::cerr << "\n=Path X=\n";
359 print(pathX, xlen+1, ylen+1);
360 std::cerr << "\n=Path Y=\n";
361 print(pathY, xlen+1, ylen+1);
362 std::cerr << "\n=Path M=\n";
363 print(pathM, xlen+1, ylen+1);
364 std::cerr << "\n=Path D=\n";
365 print(pathD, xlen+1, ylen+1);
366 std::cerr << "\n=Path I=\n";
367 print(pathI, xlen+1, ylen+1);
368 std::cerr << "\n=Path W=\n";
369 print(pathW, xlen+1, ylen+1);
370 std::cerr << "\n=Path Z=\n";
371 print(pathZ, xlen+1, ylen+1);
372 }
373
374 trace_path();
375 };
376
377 /**
378 * Updates matchStart, matchEnd, globalMaxPath and path.
379 */
trace_path()380 void LHMM::trace_path()
381 {
382 double globalMax = scoreM[xlen*MAXLEN+ylen];
383 char globalMaxPath = 'M';
384 if (scoreW[xlen*MAXLEN+ylen]>globalMax)
385 {
386 globalMax = scoreW[xlen*MAXLEN+ylen];
387 globalMaxPath = 'W';
388 }
389 if (scoreZ[xlen*MAXLEN+ylen]>globalMax)
390 {
391 globalMax = scoreZ[xlen*MAXLEN+ylen];
392 globalMaxPath = 'Z';
393 }
394
395 matchStartX = -1;
396 matchEndX = -1;
397 matchStartY = -1;
398 matchEndY = -1;
399 noBasesAligned = 0;
400 if (globalMaxPath == 'M')
401 {
402 matchEndX = xlen;
403 matchEndY = ylen;
404 ++noBasesAligned;
405 }
406
407 maxLogOdds = globalMax;
408
409 ss.str("");
410 ss << globalMaxPath;
411
412 //recursively trace path
413 trace_path(globalMaxPath, xlen, ylen);
414
415 path = reverse(ss.str());
416
417 bool inI = false;
418 bool inD = false;
419
420 //records appearance of Indels in probe and read
421 indelStartsInX.clear();
422 indelEndsInX.clear();
423 indelStartsInY.clear();
424 indelEndsInY.clear();
425 indelStartsInPath.clear();
426 indelEndsInPath.clear();
427 indelStatusInPath.clear();
428
429 matchedBases = 0;
430 mismatchedBases = 0;
431
432 uint32_t x_index=0, y_index=0;
433 for (uint32_t i=0; i<path.size(); ++i)
434 {
435 if (!inI && path[i]=='I')
436 {
437 inI = true;
438 indelStartsInPath.push_back(i);
439 indelStatusInPath.push_back('I');
440 indelStartsInX.push_back(x_index);
441 indelStartsInY.push_back(y_index);
442 }
443
444 if (inI && path[i]!='I')
445 {
446 inI = false;
447 indelEndsInPath.push_back(i);
448 indelEndsInX.push_back(x_index);
449 indelEndsInY.push_back(y_index);
450 }
451
452 if (!inD && path[i]=='D')
453 {
454 inD = true;
455 indelStartsInPath.push_back(i);
456 indelStatusInPath.push_back('D');
457 indelStartsInX.push_back(x_index);
458 indelStartsInY.push_back(y_index);
459 }
460
461 if (inD && path[i]!='D')
462 {
463 inD = false;
464 indelEndsInPath.push_back(i);
465 indelEndsInX.push_back(x_index);
466 indelEndsInY.push_back(y_index);
467 }
468
469 if (path[i]=='M')
470 {
471 if (x[x_index]== y[y_index])
472 {
473 ++matchedBases;
474 }
475 else
476 {
477 ++mismatchedBases;
478 }
479
480 ++x_index;
481 ++y_index;
482 }
483
484 if (path[i]=='I' || path[i]=='Y' || path[i]=='Z')
485 {
486 ++y_index;
487 }
488
489 if (path[i]=='D' || path[i]=='X' || path[i]=='W')
490 {
491 ++x_index;
492 }
493 }
494
495 left_align();
496 }
497
498 /**
499 * Recursive call for trace_path
500 */
trace_path(char state,uint32_t i,uint32_t j)501 void LHMM::trace_path(char state, uint32_t i, uint32_t j)
502 {
503 if (i>0 && j>0)
504 {
505 if (state=='X')
506 {
507 ss << pathX[i*MAXLEN+j];
508 trace_path(pathX[i*MAXLEN+j], i-1, j);
509 }
510 else if (state=='Y')
511 {
512 ss << pathY[i*MAXLEN+j];
513 trace_path(pathY[i*MAXLEN+j], i, j-1);
514 }
515 else if (state=='M')
516 {
517 if (matchStartX==-1 && (pathM[i*MAXLEN+j] =='X' || pathM[i*MAXLEN+j]=='Y'))
518 {
519 matchStartX = i;
520 matchStartY = j;
521 }
522
523 ss << pathM[i*MAXLEN+j];
524 trace_path(pathM[i*MAXLEN+j], i-1, j-1);
525 ++noBasesAligned;
526 }
527 else if (state=='I')
528 {
529 ss << pathI[i*MAXLEN+j];
530 trace_path(pathI[i*MAXLEN+j], i, j-1);
531 }
532 else if (state=='D')
533 {
534 ss << pathD[i*MAXLEN+j];
535 trace_path(pathD[i*MAXLEN+j], i-1, j);
536 ++noBasesAligned;
537 }
538 else if (state=='W')
539 {
540 if (matchEndX==-1 && pathW[i*MAXLEN+j] =='M')
541 {
542 matchEndX = i-1;
543 matchEndY = j;
544 }
545
546 ss << pathW[i*MAXLEN+j];
547 trace_path(pathW[i*MAXLEN+j], i-1, j);
548 }
549 else if (state=='Z')
550 {
551 if (matchEndX==-1 && pathZ[i*MAXLEN+j] =='M')
552 {
553 matchEndX = i;
554 matchEndY = j-1;
555 }
556
557 ss << pathZ[i*MAXLEN+j];
558 trace_path(pathZ[i*MAXLEN+j], i, j-1);
559 }
560 else if (state=='S')
561 {
562 if (matchStartX==-1)
563 {
564 matchStartX = i+1;
565 }
566
567 if (matchStartY==-1)
568 {
569 matchStartY = j+1;
570 }
571 }
572 }
573 else if (i==0 && j>0)
574 {
575 if (matchStartY==-1)
576 {
577 matchStartY = j+1;
578 }
579
580 ss << pathY[i*MAXLEN+j];
581 trace_path(pathY[i*MAXLEN+j], i, j-1);
582 }
583 else if (i>0 && j==0)
584 {
585 if (matchStartY==-1)
586 {
587 matchStartY = j+1;
588 }
589
590 ss << pathX[i*MAXLEN+j];
591 trace_path(pathX[i*MAXLEN+j], i-1, j);
592 }
593 else
594 {
595 }
596 }
597
598 /**
599 * Left align gaps in an alignment
600 */
left_align()601 void LHMM::left_align()
602 {
603 for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
604 {
605 if (indelStatusInPath[i]=='I')
606 {
607 while (indelStartsInX[i]>1 && indelStartsInY[i]>1)
608 {
609 if (//(indelStartsInY[i]!=indelEndsInY[i]-1) && //single indel need not be shifted
610 x[indelStartsInX[i]-1]==y[indelStartsInY[i]-1] &&
611 y[indelStartsInY[i]-1]==y[indelEndsInY[i]-1])
612 {
613 path[indelStartsInPath[i]-1] = 'I';
614 path[indelEndsInPath[i]-1] = 'M';
615
616 --indelStartsInX[i];
617 --indelEndsInX[i];
618 --indelStartsInY[i];
619 --indelEndsInY[i];
620 --indelStartsInPath[i];
621 --indelEndsInPath[i];
622 }
623 else
624 {
625 break;
626 }
627 }
628 }
629 else
630 {
631 while (indelStartsInX[i]>1 && indelStartsInY[i]>1)
632 {
633 if (//(indelStartsInX[i]!=indelEndsInX[i]-1) && //single indel need not be shifted
634 x[indelStartsInX[i]-1]==y[indelStartsInY[i]-1] &&
635 x[indelStartsInX[i]-1]==x[indelEndsInX[i]-1])
636 {
637 path[indelStartsInPath[i]-1] = 'D';
638 path[indelEndsInPath[i]-1] = 'M';
639
640 --indelStartsInX[i];
641 --indelEndsInX[i];
642 --indelStartsInY[i];
643 --indelEndsInY[i];
644 --indelStartsInPath[i];
645 --indelEndsInPath[i];
646 }
647 else
648 {
649 break;
650 }
651 }
652
653 }
654 }
655 }
656
657 /**
658 * Compute log10 emission odds based on equal error probability distribution.
659 * Substracting log10(1/16).
660 */
log10_emission_odds(char readBase,char probeBase,double e)661 double LHMM::log10_emission_odds(char readBase, char probeBase, double e)
662 {
663 //4 encodes for N
664 if (readBase=='N' || probeBase=='N')
665 {
666 //silent match
667 return 0;
668 }
669
670 if (readBase!=probeBase)
671 {
672 return log10(e/3)-logOneSixteenth;
673 }
674 else
675 {
676 return log10(1-e)-logOneSixteenth;
677 }
678 };
679
680 /**
681 * Reverses a string.
682 */
reverse(std::string s)683 std::string LHMM::reverse(std::string s)
684 {
685 std::string rs;
686 for (std::string::reverse_iterator rit=s.rbegin() ; rit < s.rend(); rit++ )
687 {
688 rs.push_back(*rit);
689 }
690
691 return rs;
692 };
693
694 /**
695 * Checks if insertion exists in alignment.
696 */
insertion_start_exists(uint32_t pos,uint32_t & rpos)697 bool LHMM::insertion_start_exists(uint32_t pos, uint32_t& rpos)
698 {
699 rpos = 0;
700 for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
701 {
702 if (indelStatusInPath[i]=='I' &&
703 indelStartsInX[i]==pos)
704 {
705 rpos = indelStartsInY[i];
706 return true;
707 }
708 }
709
710 return false;
711 }
712
713 /**
714 * Checks if deletion exists in alignment.
715 */
deletion_start_exists(uint32_t pos,uint32_t & rpos)716 bool LHMM::deletion_start_exists(uint32_t pos, uint32_t& rpos)
717 {
718 rpos = 0;
719 for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
720 {
721 if (indelStatusInPath[i]=='D' &&
722 indelStartsInX[i]==pos)
723 {
724 rpos = indelStartsInY[i];
725 return true;
726 }
727 }
728
729 return false;
730 }
731
732 /**
733 * Prints an alignment.
734 */
print_alignment()735 void LHMM::print_alignment()
736 {
737 std::string pad = "\t";
738 print_alignment(pad);
739 };
740
741 /**
742 * Prints an alignment with padding.
743 */
print_alignment(std::string & pad)744 void LHMM::print_alignment(std::string& pad)
745 {
746 std::stringstream xAligned;
747 std::stringstream yAligned;
748 std::stringstream qualAligned;
749 uint32_t xIndex=0, yIndex=0;
750 for (uint32_t i=0; i<path.size(); ++i)
751 {
752 char state = path.at(i);
753 if (state=='S' || state=='E')
754 {
755 xAligned << state;
756 yAligned << state;
757 qualAligned << state;
758 }
759 else if (state=='X'||state=='W')
760 {
761 xAligned << x[xIndex++];
762 yAligned << '.';
763 qualAligned << '.';
764 }
765 else if (state=='M')
766 {
767 xAligned << x[xIndex++];
768 yAligned << y[yIndex];
769 qualAligned << qual[yIndex++];
770 }
771 else if (state=='D')
772 {
773 xAligned << x[xIndex++];
774 yAligned << '-';
775 qualAligned << '-';
776 }
777 else if (state=='I')
778 {
779 xAligned << '-';
780 yAligned << y[yIndex];
781 qualAligned << qual[yIndex++];
782 }
783 else if (state=='Y'||state=='Z')
784 {
785 xAligned << '.';
786 yAligned << y[yIndex];
787 qualAligned << qual[yIndex++];
788 }
789 }
790
791 std::cerr << pad << "X " << xAligned.str() << "E\n";
792 std::cerr << pad << "Path " << path << "E\n";
793 std::cerr << pad << "Y " << yAligned.str() << "E\n";
794 std::cerr << pad << "Qual " << qualAligned.str() << "E\n\n";
795
796 std::cerr << pad << "Alignment in Probe : [" << matchStartX << "," << matchEndX<< "]\n";
797 std::cerr << pad << "Alignment in Read : [" << matchStartY << "," << matchEndY<< "]\n";
798 std::cerr << pad << "# Aligned Bases : " << noBasesAligned << "\n";
799 std::cerr << pad << "# Matched Bases : " << matchedBases << "\n";
800 std::cerr << pad << "# Mismatched Bases : " << mismatchedBases << "\n";
801
802 std::cerr << pad << "# INS in Probe : ";
803 for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
804 {
805 if (indelStatusInPath[i]=='I')
806 {
807 std::cerr << "[" << indelStartsInX[i] << "," << indelEndsInX[i] <<"] ";
808 }
809 }
810 std::cerr << "\n";
811
812 std::cerr << pad << "# DELS in Probe : ";
813 for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
814 {
815 if (indelStatusInPath[i]=='D')
816 {
817 std::cerr << "[" << indelStartsInX[i] << "," << indelEndsInX[i] <<"] ";
818 }
819 }
820 std::cerr << "\n";
821
822 std::cerr << pad << "# INS in Read : ";
823 for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
824 {
825 if (indelStatusInPath[i]=='I')
826 {
827 std::cerr << "[" << indelStartsInY[i] << "," << indelEndsInY[i] <<"] ";
828 }
829 }
830 std::cerr << "\n";
831
832 std::cerr << pad << "# DELS in Read : ";
833 for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
834 {
835 if (indelStatusInPath[i]=='D')
836 {
837 std::cerr << "[" << indelStartsInY[i] << "," << indelEndsInY[i] <<"] ";
838 }
839 }
840 std::cerr << "\n";
841
842 std::cerr << pad << "Max Log odds : " << maxLogOdds << "\n";
843 };
844
845 /**
846 * Prints a double matrix.
847 */
print(double * v,uint32_t xlen,uint32_t ylen)848 void LHMM::print(double *v, uint32_t xlen, uint32_t ylen)
849 {
850 for (uint32_t i=0; i<xlen; ++i)
851 {
852 for (uint32_t j=0; j<ylen; ++j)
853 {
854 std::cerr << (v[i*MAXLEN+j]==-DBL_MAX?-1000:v[i*MAXLEN+j]) << "\t";
855 }
856
857 std::cerr << "\n";
858 }
859 };
860
861 /**
862 * Prints a char matrix.
863 */
print(char * v,uint32_t xlen,uint32_t ylen)864 void LHMM::print(char *v, uint32_t xlen, uint32_t ylen)
865 {
866 for (uint32_t i=0; i<xlen; ++i)
867 {
868 for (uint32_t j=0; j<ylen; ++j)
869 {
870 std::cerr << v[i*MAXLEN+j] << "\t";
871 }
872
873 std::cerr << "\n";
874 }
875 };
876
877 #undef MAXLEN
878 #undef S
879 #undef X
880 #undef Y
881 #undef M
882 #undef I
883 #undef D
884 #undef W
885 #undef Z
886 #undef E