1 /* The MIT License
2 
3    Copyright (c) 2013 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #include "lhmm.h"
25 
26 #define MAXLEN 250
27 #define S 0
28 #define X 1
29 #define Y 2
30 #define M 3
31 #define I 4
32 #define D 5
33 #define W 6
34 #define Z 7
35 #define E 8
36 
37 /**
38  * Constructor.
39  */
LHMM()40 LHMM::LHMM()
41 {
42     initialize();
43 };
44 
45 /**
46  * Destructor.
47  */
~LHMM()48 LHMM::~LHMM()
49 {
50     delete scoreX;
51     delete scoreY;
52     delete scoreM;
53     delete scoreI;
54     delete scoreD;
55     delete scoreW;
56     delete scoreZ;
57 
58     delete pathX;
59     delete pathY;
60     delete pathM;
61     delete pathI;
62     delete pathD;
63     delete pathW;
64     delete pathZ;
65 };
66 
67 /**
68  * Initializes object, helper function for constructor.
69  */
initialize()70 void LHMM::initialize()
71 {
72     delta = 0.001;
73     epsilon = 0.5;
74     tau = 0.1;
75     eta = 0.001;
76 
77     logOneSixteenth = log10(1.0/16.0);
78 
79     transition[X][X] = 0; //log10((1-eta)/(1-eta));
80     transition[X][Y] = 0; //log10((eta*(1-eta))/(eta*(1-eta)));
81     transition[Y][Y] = 0; //log10((1-eta)/(1-eta));
82 
83     transition[X][M] = log10(1/((1-eta)*(1-eta))); //log10((eta*eta)/(eta*eta*(1-eta)*(1-eta)));
84     transition[Y][M] = transition[X][M]; //log10((eta)/(eta*(1-eta)*(1-eta)));
85     transition[S][M] = log10(1/(eta*(1-eta)*(1-eta))); //log10((eta*eta)/(eta*eta*eta*(1-eta)*(1-eta)));
86     transition[M][M] = log10((1-2*delta-tau)/((1-eta)*(1-eta)));
87     transition[I][M] = log10((1-epsilon)/(1-eta));
88     transition[D][M] = transition[I][M]; //log10((1-epsilon)/(1-eta));
89 
90     transition[M][D] = log10(delta/(1-eta));
91     transition[D][D] = log10(epsilon/(1-eta));
92 
93     transition[M][I] = transition[M][D]; //log10(delta::/(1-eta));
94     transition[I][I] = transition[D][D]; //log10(epsilon/(1-eta));
95 
96     transition[M][W] = log10(tau/eta); //log10((tau*(1-eta))/(eta*(1-eta)));
97     transition[W][W] = 0; //log10((1-eta)/(1-eta));
98 
99     transition[M][Z] = transition[M][W]; //log10((tau*eta*(1-eta))/(eta*eta*(1-eta)));
100     transition[W][Z] = 0; //log10((eta*(1-eta))/(eta*(1-eta)));
101     transition[Z][Z] = 0; //log10((1-eta)/(1-eta));
102 
103     scoreX = new double[MAXLEN*MAXLEN];
104     scoreY = new double[MAXLEN*MAXLEN];
105     scoreM = new double[MAXLEN*MAXLEN];
106     scoreI = new double[MAXLEN*MAXLEN];
107     scoreD = new double[MAXLEN*MAXLEN];
108     scoreW = new double[MAXLEN*MAXLEN];
109     scoreZ = new double[MAXLEN*MAXLEN];
110 
111     pathX = new char[MAXLEN*MAXLEN];
112     pathY = new char[MAXLEN*MAXLEN];
113     pathM = new char[MAXLEN*MAXLEN];
114     pathI = new char[MAXLEN*MAXLEN];
115     pathD = new char[MAXLEN*MAXLEN];
116     pathW = new char[MAXLEN*MAXLEN];
117     pathZ = new char[MAXLEN*MAXLEN];
118 
119     //assume alignments can't possibly be maxLength bases or more
120     for (int32_t i=0; i<MAXLEN; ++i)
121     {
122         for (int32_t j=0; j<MAXLEN; ++j)
123         {
124             scoreX[i*MAXLEN+j] = -DBL_MAX;
125             scoreY[i*MAXLEN+j] = -DBL_MAX;
126             scoreM[i*MAXLEN+j] = -DBL_MAX;
127             scoreI[i*MAXLEN+j] = -DBL_MAX;
128             scoreD[i*MAXLEN+j] = -DBL_MAX;
129             scoreW[i*MAXLEN+j] = -DBL_MAX;
130             scoreZ[i*MAXLEN+j] = -DBL_MAX;
131 
132             if (j)
133             {
134                 pathX[i*MAXLEN+j] = 'Y';
135                 pathY[i*MAXLEN+j] = 'Y';
136                 pathM[i*MAXLEN+j] = 'Y';
137                 pathI[i*MAXLEN+j] = 'Y';
138                 pathD[i*MAXLEN+j] = 'Y';
139                 pathW[i*MAXLEN+j] = 'Y';
140                 pathZ[i*MAXLEN+j] = 'Y';
141             }
142             else
143             {
144                 pathX[i*MAXLEN+j] = 'X';
145                 pathY[i*MAXLEN+j] = 'X';
146                 pathM[i*MAXLEN+j] = 'X';
147                 pathI[i*MAXLEN+j] = 'X';
148                 pathD[i*MAXLEN+j] = 'X';
149                 pathW[i*MAXLEN+j] = 'X';
150                 pathZ[i*MAXLEN+j] = 'X';
151             }
152         }
153     }
154 
155     logEta = log10(eta);
156     logTau = log10(tau);
157 
158     scoreX[0*MAXLEN+0] = 0;
159     scoreY[0*MAXLEN+0] = 0;
160     scoreM[0*MAXLEN+0] = 0;
161     scoreW[0*MAXLEN+0] = 0;
162     scoreZ[0*MAXLEN+0] = 0;
163     pathX[0*MAXLEN+0] = 'N';
164     pathX[1*MAXLEN+0] = 'S';
165     pathY[0*MAXLEN+0] = 'N';
166     pathY[0*MAXLEN+1] = 'S';
167     pathM[0*MAXLEN+0] = 'N';
168     pathM[1*MAXLEN+1] = 'S';
169 
170     for (uint32_t k=1; k<MAXLEN; ++k)
171     {
172         scoreX[k*MAXLEN+0] = scoreX[(k-1)*MAXLEN+0] + transition[X][X];
173         scoreX[0*MAXLEN+k] = -DBL_MAX;
174         scoreY[k*MAXLEN+0] = -DBL_MAX;
175         scoreY[0*MAXLEN+k] = scoreY[0*MAXLEN+(k-1)] + transition[Y][Y];
176         scoreW[k*MAXLEN+0] = scoreW[(k-1)*MAXLEN+0] + transition[W][W];
177         scoreW[0*MAXLEN+k] = -DBL_MAX;
178         scoreZ[k*MAXLEN+0] = -DBL_MAX;
179         scoreZ[0*MAXLEN+k] = scoreZ[0*MAXLEN+(k-1)] + transition[Z][Z];
180     }
181 
182     scoreX[0*MAXLEN+0] = -DBL_MAX;
183     scoreY[0*MAXLEN+0] = -DBL_MAX;
184     scoreW[0*MAXLEN+0] = -DBL_MAX;
185     scoreZ[0*MAXLEN+0] = -DBL_MAX;
186 };
187 
188 /**
189  * Align y against x.
190  */
align(double & llk,const char * x,const char * y,const char * qual,bool debug)191 void LHMM::align(double& llk, const char* x, const char* y, const char* qual, bool debug)
192 {
193     //std::cerr << "Running this\n" ;
194     this->x = x;
195     this->y = y;
196     this->qual = qual;
197 
198     //adds a starting character at the fron of each string that must be matched
199     xlen = strlen(x);
200     ylen = strlen(y);
201     double max = 0;
202     char maxPath = 'X';
203 
204     //construct possible solutions
205     for (uint32_t i=1; i<=xlen; ++i)
206     {
207         for (uint32_t j=1; j<=ylen; ++j)
208         {
209             //X
210             double xx = scoreX[(i-1)*MAXLEN+j] + transition[X][X];
211 
212             max = xx;
213             maxPath = 'X';
214 
215             scoreX[i*MAXLEN+j] = max;
216             pathX[i*MAXLEN+j] = maxPath;
217 
218             //Y
219             double xy = scoreX[i*MAXLEN+(j-1)] + transition[X][Y];
220             double yy = scoreY[i*MAXLEN+(j-1)] + transition[Y][Y];
221 
222             max = xy;
223             maxPath = 'X';
224 
225             if (yy>max)
226             {
227                 max = yy;
228                 maxPath = 'Y';
229             }
230 
231             scoreY[i*MAXLEN+j] = max;
232             pathY[i*MAXLEN+j] = maxPath;
233 
234             //M
235             double xm = scoreX[(i-1)*MAXLEN+(j-1)] + transition[X][M];
236             double ym = scoreY[(i-1)*MAXLEN+(j-1)] + transition[Y][M];
237             double mm = scoreM[(i-1)*MAXLEN+(j-1)] + ((i==1&&j==1) ? transition[S][M] : transition[M][M]);
238             double im = scoreI[(i-1)*MAXLEN+(j-1)] + transition[I][M];
239             double dm = scoreD[(i-1)*MAXLEN+(j-1)] + transition[D][M];
240 
241             max = xm;
242             maxPath = 'X';
243 
244             if (ym>max) //special case
245             {
246                 max = ym;
247                 maxPath = 'Y';
248             }
249             if (mm>max)
250             {
251                 max = mm;
252                 maxPath = (i==1&&j==1) ? 'S' : 'M';
253             }
254             if (im>max)
255             {
256                 max = im;
257                 maxPath = 'I';
258             }
259             if (dm>max)
260             {
261                 max = dm;
262                 maxPath = 'D';
263             }
264 
265             scoreM[i*MAXLEN+j] = max + log10_emission_odds(x[i-1], y[j-1], LogTool::pl2prob((uint32_t) qual[j-1]-33));
266             pathM[i*MAXLEN+j] = maxPath;
267 
268             //D
269             double md = scoreM[(i-1)*MAXLEN+j] + transition[M][D];
270             double dd = scoreD[(i-1)*MAXLEN+j] + transition[D][D];
271 
272             max = md;
273             maxPath = 'M';
274 
275             if (dd>max)
276             {
277                 max = dd;
278                 maxPath = 'D';
279             }
280 
281             scoreD[i*MAXLEN+j] = max;
282             pathD[i*MAXLEN+j] = maxPath;
283 
284             //I
285             double mi = scoreM[i*MAXLEN+(j-1)] + transition[M][I];
286             double ii = scoreI[i*MAXLEN+(j-1)] + transition[I][I];
287 
288             max = mi;
289             maxPath = 'M';
290 
291             if (ii>max)
292             {
293                 max = ii;
294                 maxPath = 'I';
295             }
296 
297             scoreI[i*MAXLEN+j] = max;
298             pathI[i*MAXLEN+j] = maxPath;
299 
300             //W
301             double mw = scoreM[(i-1)*MAXLEN+j] + transition[M][W];
302             double ww = scoreW[(i-1)*MAXLEN+j] + transition[W][W];
303 
304             max = mw;
305             maxPath = 'M';
306 
307             if (ww>max)
308             {
309                 max = ww;
310                 maxPath = 'W';
311             }
312 
313             scoreW[i*MAXLEN+j] = max;
314             pathW[i*MAXLEN+j] = maxPath;
315 
316             //Z
317             double mz = scoreM[i*MAXLEN+(j-1)] + transition[M][Z];
318             double wz = scoreW[i*MAXLEN+(j-1)] + transition[W][Z];
319             double zz = scoreZ[i*MAXLEN+(j-1)] + transition[Z][Z];
320 
321             max = mz;
322             maxPath = 'M';
323 
324             if (wz>max)
325             {
326                 max = wz;
327                 maxPath = 'W';
328             }
329             if (zz>max)
330             {
331                 max = zz;
332                 maxPath = 'Z';
333             }
334 
335             scoreZ[i*MAXLEN+j] = max;
336             pathZ[i*MAXLEN+j] = maxPath;
337         }
338 
339         scoreM[xlen*MAXLEN+ylen] += logTau-logEta;
340     }
341 
342     if (debug)
343     {
344         std::cerr << "\n=X=\n";
345         print(scoreX, xlen+1, ylen+1);
346         std::cerr << "\n=Y=\n";
347         print(scoreY, xlen+1, ylen+1);
348         std::cerr << "\n=M=\n";
349         print(scoreM, xlen+1, ylen+1);
350         std::cerr << "\n=D=\n";
351         print(scoreD, xlen+1, ylen+1);
352         std::cerr << "\n=I=\n";
353         print(scoreI, xlen+1, ylen+1);
354         std::cerr << "\n=W=\n";
355         print(scoreW, xlen+1, ylen+1);
356         std::cerr << "\n=Z=\n";
357         print(scoreZ, xlen+1, ylen+1);
358         std::cerr << "\n=Path X=\n";
359         print(pathX, xlen+1, ylen+1);
360         std::cerr << "\n=Path Y=\n";
361         print(pathY, xlen+1, ylen+1);
362         std::cerr << "\n=Path M=\n";
363         print(pathM, xlen+1, ylen+1);
364         std::cerr << "\n=Path D=\n";
365         print(pathD, xlen+1, ylen+1);
366         std::cerr << "\n=Path I=\n";
367         print(pathI, xlen+1, ylen+1);
368         std::cerr << "\n=Path W=\n";
369         print(pathW, xlen+1, ylen+1);
370         std::cerr << "\n=Path Z=\n";
371         print(pathZ, xlen+1, ylen+1);
372     }
373 
374     trace_path();
375 };
376 
377 /**
378  * Updates matchStart, matchEnd, globalMaxPath and path.
379  */
trace_path()380 void LHMM::trace_path()
381 {
382     double globalMax = scoreM[xlen*MAXLEN+ylen];
383     char globalMaxPath = 'M';
384     if (scoreW[xlen*MAXLEN+ylen]>globalMax)
385     {
386         globalMax = scoreW[xlen*MAXLEN+ylen];
387         globalMaxPath = 'W';
388     }
389     if (scoreZ[xlen*MAXLEN+ylen]>globalMax)
390     {
391         globalMax = scoreZ[xlen*MAXLEN+ylen];
392         globalMaxPath = 'Z';
393     }
394 
395     matchStartX = -1;
396     matchEndX = -1;
397     matchStartY = -1;
398     matchEndY = -1;
399     noBasesAligned = 0;
400     if (globalMaxPath == 'M')
401     {
402         matchEndX = xlen;
403         matchEndY = ylen;
404         ++noBasesAligned;
405     }
406 
407     maxLogOdds = globalMax;
408 
409     ss.str("");
410     ss << globalMaxPath;
411 
412     //recursively trace path
413     trace_path(globalMaxPath, xlen, ylen);
414 
415     path = reverse(ss.str());
416 
417     bool inI = false;
418     bool inD = false;
419 
420     //records appearance of Indels in probe and read
421     indelStartsInX.clear();
422     indelEndsInX.clear();
423     indelStartsInY.clear();
424     indelEndsInY.clear();
425     indelStartsInPath.clear();
426     indelEndsInPath.clear();
427     indelStatusInPath.clear();
428 
429     matchedBases = 0;
430     mismatchedBases = 0;
431 
432     uint32_t x_index=0, y_index=0;
433     for (uint32_t i=0; i<path.size(); ++i)
434     {
435         if (!inI && path[i]=='I')
436         {
437             inI = true;
438             indelStartsInPath.push_back(i);
439             indelStatusInPath.push_back('I');
440             indelStartsInX.push_back(x_index);
441             indelStartsInY.push_back(y_index);
442         }
443 
444         if (inI && path[i]!='I')
445         {
446             inI = false;
447             indelEndsInPath.push_back(i);
448             indelEndsInX.push_back(x_index);
449             indelEndsInY.push_back(y_index);
450         }
451 
452         if (!inD && path[i]=='D')
453         {
454             inD = true;
455             indelStartsInPath.push_back(i);
456             indelStatusInPath.push_back('D');
457             indelStartsInX.push_back(x_index);
458             indelStartsInY.push_back(y_index);
459         }
460 
461         if (inD && path[i]!='D')
462         {
463             inD = false;
464             indelEndsInPath.push_back(i);
465             indelEndsInX.push_back(x_index);
466             indelEndsInY.push_back(y_index);
467         }
468 
469         if (path[i]=='M')
470         {
471             if (x[x_index]== y[y_index])
472             {
473                 ++matchedBases;
474             }
475             else
476             {
477                 ++mismatchedBases;
478             }
479 
480             ++x_index;
481             ++y_index;
482         }
483 
484         if (path[i]=='I' || path[i]=='Y' || path[i]=='Z')
485         {
486             ++y_index;
487         }
488 
489         if (path[i]=='D' || path[i]=='X' || path[i]=='W')
490         {
491             ++x_index;
492         }
493     }
494 
495     left_align();
496 }
497 
498 /**
499  * Recursive call for trace_path
500  */
trace_path(char state,uint32_t i,uint32_t j)501 void LHMM::trace_path(char state, uint32_t i, uint32_t j)
502 {
503     if (i>0 && j>0)
504     {
505         if (state=='X')
506         {
507             ss << pathX[i*MAXLEN+j];
508             trace_path(pathX[i*MAXLEN+j], i-1, j);
509         }
510         else if (state=='Y')
511         {
512             ss << pathY[i*MAXLEN+j];
513             trace_path(pathY[i*MAXLEN+j], i, j-1);
514         }
515         else if (state=='M')
516         {
517             if (matchStartX==-1 && (pathM[i*MAXLEN+j] =='X' || pathM[i*MAXLEN+j]=='Y'))
518             {
519                matchStartX = i;
520                matchStartY = j;
521             }
522 
523             ss << pathM[i*MAXLEN+j];
524             trace_path(pathM[i*MAXLEN+j], i-1, j-1);
525             ++noBasesAligned;
526         }
527         else if (state=='I')
528         {
529             ss << pathI[i*MAXLEN+j];
530             trace_path(pathI[i*MAXLEN+j], i, j-1);
531         }
532         else if (state=='D')
533         {
534             ss << pathD[i*MAXLEN+j];
535             trace_path(pathD[i*MAXLEN+j], i-1, j);
536             ++noBasesAligned;
537         }
538         else if (state=='W')
539         {
540             if (matchEndX==-1 && pathW[i*MAXLEN+j] =='M')
541             {
542                 matchEndX = i-1;
543                 matchEndY = j;
544             }
545 
546             ss << pathW[i*MAXLEN+j];
547             trace_path(pathW[i*MAXLEN+j], i-1, j);
548         }
549         else if (state=='Z')
550         {
551             if (matchEndX==-1 && pathZ[i*MAXLEN+j] =='M')
552             {
553                 matchEndX = i;
554                 matchEndY = j-1;
555             }
556 
557             ss << pathZ[i*MAXLEN+j];
558             trace_path(pathZ[i*MAXLEN+j], i, j-1);
559         }
560         else if (state=='S')
561         {
562             if (matchStartX==-1)
563             {
564                matchStartX = i+1;
565             }
566 
567             if (matchStartY==-1)
568             {
569                matchStartY = j+1;
570             }
571         }
572     }
573     else if (i==0 && j>0)
574     {
575         if (matchStartY==-1)
576         {
577            matchStartY = j+1;
578         }
579 
580         ss << pathY[i*MAXLEN+j];
581         trace_path(pathY[i*MAXLEN+j], i, j-1);
582     }
583     else if (i>0 && j==0)
584     {
585         if (matchStartY==-1)
586         {
587            matchStartY = j+1;
588         }
589 
590         ss << pathX[i*MAXLEN+j];
591         trace_path(pathX[i*MAXLEN+j], i-1, j);
592     }
593     else
594     {
595     }
596 }
597 
598 /**
599  * Left align gaps in an alignment
600  */
left_align()601 void LHMM::left_align()
602 {
603     for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
604     {
605         if (indelStatusInPath[i]=='I')
606         {
607             while (indelStartsInX[i]>1 && indelStartsInY[i]>1)
608             {
609                 if (//(indelStartsInY[i]!=indelEndsInY[i]-1) && //single indel need not be shifted
610                     x[indelStartsInX[i]-1]==y[indelStartsInY[i]-1] &&
611                     y[indelStartsInY[i]-1]==y[indelEndsInY[i]-1])
612                 {
613                     path[indelStartsInPath[i]-1] = 'I';
614                     path[indelEndsInPath[i]-1] = 'M';
615 
616                     --indelStartsInX[i];
617                     --indelEndsInX[i];
618                     --indelStartsInY[i];
619                     --indelEndsInY[i];
620                     --indelStartsInPath[i];
621                     --indelEndsInPath[i];
622                 }
623                 else
624                 {
625                     break;
626                 }
627             }
628         }
629         else
630         {
631             while (indelStartsInX[i]>1 && indelStartsInY[i]>1)
632             {
633                 if (//(indelStartsInX[i]!=indelEndsInX[i]-1) && //single indel need not be shifted
634                     x[indelStartsInX[i]-1]==y[indelStartsInY[i]-1] &&
635                     x[indelStartsInX[i]-1]==x[indelEndsInX[i]-1])
636                 {
637                     path[indelStartsInPath[i]-1] = 'D';
638                     path[indelEndsInPath[i]-1] = 'M';
639 
640                     --indelStartsInX[i];
641                     --indelEndsInX[i];
642                     --indelStartsInY[i];
643                     --indelEndsInY[i];
644                     --indelStartsInPath[i];
645                     --indelEndsInPath[i];
646                 }
647                 else
648                 {
649                     break;
650                 }
651             }
652 
653         }
654     }
655 }
656 
657 /**
658  * Compute log10 emission odds based on equal error probability distribution.
659  * Substracting log10(1/16).
660  */
log10_emission_odds(char readBase,char probeBase,double e)661 double LHMM::log10_emission_odds(char readBase, char probeBase, double e)
662 {
663     //4 encodes for N
664     if (readBase=='N' || probeBase=='N')
665     {
666         //silent match
667         return 0;
668     }
669 
670     if (readBase!=probeBase)
671     {
672         return log10(e/3)-logOneSixteenth;
673     }
674     else
675     {
676         return log10(1-e)-logOneSixteenth;
677     }
678 };
679 
680 /**
681  * Reverses a string.
682  */
reverse(std::string s)683 std::string LHMM::reverse(std::string s)
684 {
685     std::string rs;
686     for (std::string::reverse_iterator rit=s.rbegin() ; rit < s.rend(); rit++ )
687     {
688         rs.push_back(*rit);
689     }
690 
691     return rs;
692 };
693 
694 /**
695  * Checks if insertion exists in alignment.
696  */
insertion_start_exists(uint32_t pos,uint32_t & rpos)697 bool LHMM::insertion_start_exists(uint32_t pos, uint32_t& rpos)
698 {
699     rpos = 0;
700     for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
701     {
702         if (indelStatusInPath[i]=='I' &&
703             indelStartsInX[i]==pos)
704         {
705             rpos = indelStartsInY[i];
706             return true;
707         }
708     }
709 
710     return false;
711 }
712 
713 /**
714  * Checks if deletion exists in alignment.
715  */
deletion_start_exists(uint32_t pos,uint32_t & rpos)716 bool LHMM::deletion_start_exists(uint32_t pos, uint32_t& rpos)
717 {
718     rpos = 0;
719     for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
720     {
721         if (indelStatusInPath[i]=='D' &&
722             indelStartsInX[i]==pos)
723         {
724             rpos = indelStartsInY[i];
725             return true;
726         }
727     }
728 
729     return false;
730 }
731 
732 /**
733  * Prints an alignment.
734  */
print_alignment()735 void LHMM::print_alignment()
736 {
737     std::string pad = "\t";
738     print_alignment(pad);
739 };
740 
741 /**
742  * Prints an alignment with padding.
743  */
print_alignment(std::string & pad)744 void LHMM::print_alignment(std::string& pad)
745 {
746     std::stringstream xAligned;
747     std::stringstream yAligned;
748     std::stringstream qualAligned;
749     uint32_t xIndex=0, yIndex=0;
750     for (uint32_t i=0; i<path.size(); ++i)
751     {
752         char state = path.at(i);
753         if (state=='S' || state=='E')
754         {
755             xAligned << state;
756             yAligned << state;
757             qualAligned << state;
758         }
759         else if (state=='X'||state=='W')
760         {
761             xAligned << x[xIndex++];
762             yAligned << '.';
763             qualAligned << '.';
764         }
765         else if (state=='M')
766         {
767             xAligned << x[xIndex++];
768             yAligned << y[yIndex];
769             qualAligned << qual[yIndex++];
770         }
771         else if (state=='D')
772         {
773             xAligned << x[xIndex++];
774             yAligned << '-';
775             qualAligned << '-';
776         }
777         else if (state=='I')
778         {
779             xAligned << '-';
780             yAligned << y[yIndex];
781             qualAligned << qual[yIndex++];
782         }
783         else if (state=='Y'||state=='Z')
784         {
785             xAligned << '.';
786             yAligned << y[yIndex];
787             qualAligned << qual[yIndex++];
788         }
789     }
790 
791     std::cerr << pad << "X    " << xAligned.str() << "E\n";
792     std::cerr << pad << "Path " << path << "E\n";
793     std::cerr << pad << "Y    " << yAligned.str() << "E\n";
794     std::cerr << pad << "Qual " << qualAligned.str() << "E\n\n";
795 
796     std::cerr << pad << "Alignment in Probe : [" << matchStartX << "," << matchEndX<< "]\n";
797     std::cerr << pad << "Alignment in Read  : [" << matchStartY << "," << matchEndY<< "]\n";
798     std::cerr << pad << "# Aligned Bases : " << noBasesAligned << "\n";
799     std::cerr << pad << "# Matched Bases : " << matchedBases << "\n";
800     std::cerr << pad << "# Mismatched Bases : " << mismatchedBases << "\n";
801 
802     std::cerr << pad << "# INS in Probe : ";
803     for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
804     {
805         if (indelStatusInPath[i]=='I')
806         {
807             std::cerr << "[" << indelStartsInX[i] << "," << indelEndsInX[i] <<"] ";
808         }
809     }
810     std::cerr << "\n";
811 
812     std::cerr << pad << "# DELS in Probe : ";
813     for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
814     {
815         if (indelStatusInPath[i]=='D')
816         {
817             std::cerr << "[" << indelStartsInX[i] << "," << indelEndsInX[i] <<"] ";
818         }
819     }
820     std::cerr << "\n";
821 
822     std::cerr << pad << "# INS in Read : ";
823     for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
824     {
825         if (indelStatusInPath[i]=='I')
826         {
827             std::cerr << "[" << indelStartsInY[i] << "," << indelEndsInY[i] <<"] ";
828         }
829     }
830     std::cerr << "\n";
831 
832     std::cerr << pad << "# DELS in Read : ";
833     for (uint32_t i=0; i<indelStatusInPath.size(); ++i)
834     {
835         if (indelStatusInPath[i]=='D')
836         {
837             std::cerr << "[" << indelStartsInY[i] << "," << indelEndsInY[i] <<"] ";
838         }
839     }
840     std::cerr << "\n";
841 
842     std::cerr << pad << "Max Log odds    : " << maxLogOdds << "\n";
843 };
844 
845 /**
846  * Prints a double matrix.
847  */
print(double * v,uint32_t xlen,uint32_t ylen)848 void LHMM::print(double *v, uint32_t xlen, uint32_t ylen)
849 {
850     for (uint32_t i=0; i<xlen; ++i)
851     {
852         for (uint32_t j=0; j<ylen; ++j)
853         {
854             std::cerr << (v[i*MAXLEN+j]==-DBL_MAX?-1000:v[i*MAXLEN+j]) << "\t";
855         }
856 
857         std::cerr << "\n";
858     }
859 };
860 
861 /**
862  * Prints a char matrix.
863  */
print(char * v,uint32_t xlen,uint32_t ylen)864 void LHMM::print(char *v, uint32_t xlen, uint32_t ylen)
865 {
866     for (uint32_t i=0; i<xlen; ++i)
867     {
868         for (uint32_t j=0; j<ylen; ++j)
869         {
870           std::cerr << v[i*MAXLEN+j] << "\t";
871         }
872 
873         std::cerr << "\n";
874     }
875 };
876 
877 #undef MAXLEN
878 #undef S
879 #undef X
880 #undef Y
881 #undef M
882 #undef I
883 #undef D
884 #undef W
885 #undef Z
886 #undef E