1 /*
2 * myPerseus.cpp
3 *
4 *
5 * Created by Pat Schloss on 9/5/11.
6 * Copyright 2011 Patrick D. Schloss. All rights reserved.
7 *
8 */
9
10 #include "myPerseus.h"
11
12 /**************************************************************************************************/
13 int PERSEUSMAXINT = numeric_limits<int>::max();
14 /**************************************************************************************************/
15
binomial(int maxOrder)16 vector<vector<double> > Perseus::binomial(int maxOrder){
17 try {
18 vector<vector<double> > binomial(maxOrder+1);
19
20 for(int i=0;i<=maxOrder;i++){
21 binomial[i].resize(maxOrder+1);
22 binomial[i][0]=1;
23 binomial[0][i]=0;
24 }
25 binomial[0][0]=1;
26
27 binomial[1][0]=1;
28 binomial[1][1]=1;
29
30 for(int i=2;i<=maxOrder;i++){
31 binomial[1][i]=0;
32 }
33
34 for(int i=2;i<=maxOrder;i++){
35 for(int j=1;j<=maxOrder;j++){
36 if(i==j){ binomial[i][j]=1; }
37 if(j>i) { binomial[i][j]=0; }
38 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
39 }
40 }
41
42 return binomial;
43 }
44 catch(exception& e) {
45 m->errorOut(e, "Perseus", "binomial");
46 exit(1);
47 }
48 }
49
50 /**************************************************************************************************/
basicPairwiseAlignSeqs(string query,string reference,string & qAlign,string & rAlign,pwModel model)51 double Perseus::basicPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, pwModel model){
52 try {
53 double GAP = model.GAP_OPEN;
54 double MATCH = model.MATCH;
55 double MISMATCH = model.MISMATCH;
56
57 int queryLength = query.size();
58 int refLength = reference.size();
59
60 vector<vector<double> > alignMatrix(queryLength + 1);
61 vector<vector<char> > alignMoves(queryLength + 1);
62
63 for(int i=0;i<=queryLength;i++){
64 if (m->getControl_pressed()) { return 0; }
65 alignMatrix[i].resize(refLength + 1, 0);
66 alignMoves[i].resize(refLength + 1, 'x');
67 }
68
69 for(int i=0;i<=queryLength;i++){
70 if (m->getControl_pressed()) { return 0; }
71 alignMatrix[i][0] = GAP * i;
72 alignMoves[i][0] = 'u';
73 }
74
75 for(int i=0;i<=refLength;i++){
76 if (m->getControl_pressed()) { return 0; }
77 alignMatrix[0][i] = GAP * i;
78 alignMoves[0][i] = 'l';
79 }
80
81 for(int i=1;i<=queryLength;i++){
82
83 if (m->getControl_pressed()) { return 0; }
84
85 for(int j=1;j<=refLength;j++){
86
87 double nogapScore;
88 if(query[i-1] == reference[j-1]){ nogapScore = alignMatrix[i-1][j-1] + MATCH; }
89 else { nogapScore = alignMatrix[i-1][j-1] + MISMATCH; }
90
91 double leftScore;
92 if(i == queryLength) { leftScore = alignMatrix[i][j-1]; }
93 else { leftScore = alignMatrix[i][j-1] + GAP; }
94
95
96 double upScore;
97 if(j == refLength) { upScore = alignMatrix[i-1][j]; }
98 else { upScore = alignMatrix[i-1][j] + GAP; }
99
100 if(nogapScore > leftScore){
101 if(nogapScore > upScore){
102 alignMoves[i][j] = 'd';
103 alignMatrix[i][j] = nogapScore;
104 }
105 else{
106 alignMoves[i][j] = 'u';
107 alignMatrix[i][j] = upScore;
108 }
109 }
110 else{
111 if(leftScore > upScore){
112 alignMoves[i][j] = 'l';
113 alignMatrix[i][j] = leftScore;
114 }
115 else{
116 alignMoves[i][j] = 'u';
117 alignMatrix[i][j] = upScore;
118 }
119 }
120 }
121 }
122
123 int i = queryLength;
124 int j = refLength;
125
126 qAlign = "";
127 rAlign = "";
128
129 int diffs = 0;
130 int length = 0;
131
132 while(i > 0 && j > 0){
133
134 if (m->getControl_pressed()) { return 0; }
135
136 if(alignMoves[i][j] == 'd'){
137 qAlign = query[i-1] + qAlign;
138 rAlign = reference[j-1] + rAlign;
139
140 if(query[i-1] != reference[j-1]){ diffs++; }
141 length++;
142
143 i--;
144 j--;
145 }
146 else if(alignMoves[i][j] == 'u'){
147 qAlign = query[i-1] + qAlign;
148
149 if(j != refLength) { rAlign = '-' + rAlign; diffs++; length++; }
150 else { rAlign = '.' + rAlign; }
151 i--;
152 }
153 else if(alignMoves[i][j] == 'l'){
154 rAlign = reference[j-1] + rAlign;
155
156 if(i != queryLength){ qAlign = '-' + qAlign; diffs++; length++; }
157 else { qAlign = '.' + qAlign; }
158 j--;
159 }
160 }
161
162 while(i>0){
163
164 if (m->getControl_pressed()) { return 0; }
165
166 rAlign = '.' + rAlign;
167 qAlign = query[i-1] + qAlign;
168 i--;
169 }
170
171 while(j>0){
172
173 if (m->getControl_pressed()) { return 0; }
174
175 rAlign = reference[j-1] + rAlign;
176 qAlign = '.' + qAlign;
177 j--;
178 }
179
180
181
182 return double(diffs)/double(length);
183 }
184 catch(exception& e) {
185 m->errorOut(e, "Perseus", "basicPairwiseAlignSeqs");
186 exit(1);
187 }
188
189 }
190 /**************************************************************************************************/
getDiffs(string qAlign,string rAlign,vector<int> & leftDiffs,vector<int> & leftMap,vector<int> & rightDiffs,vector<int> & rightMap)191 int Perseus::getDiffs(string qAlign, string rAlign, vector<int>& leftDiffs, vector<int>& leftMap, vector<int>& rightDiffs, vector<int>& rightMap){
192 try {
193 int alignLength = qAlign.length();
194
195 int lDiffs = 0;
196 int lCount = 0;
197 for(int l=0;l<alignLength;l++){
198
199 if (m->getControl_pressed()) { return 0; }
200
201 if(qAlign[l] == '-'){
202 lDiffs++;
203 }
204 else if(qAlign[l] != '.'){
205
206 if(rAlign[l] == '-'){
207 lDiffs++;
208 }
209 else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
210 lDiffs++;
211 }
212 leftDiffs[lCount] = lDiffs;
213 leftMap[lCount] = l;
214
215 lCount++;
216 }
217 }
218
219 int rDiffs = 0;
220 int rCount = 0;
221 for(int l=alignLength-1;l>=0;l--){
222
223 if (m->getControl_pressed()) { return 0; }
224
225 if(qAlign[l] == '-'){
226 rDiffs++;
227 }
228 else if(qAlign[l] != '.'){
229
230 if(rAlign[l] == '-'){
231 rDiffs++;
232 }
233 else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
234 rDiffs++;
235 }
236
237 rightDiffs[rCount] = rDiffs;
238 rightMap[rCount] = l;
239 rCount++;
240
241 }
242
243 }
244
245 return 0;
246 }
247 catch(exception& e) {
248 m->errorOut(e, "Perseus", "getDiffs");
249 exit(1);
250 }
251 }
252 /**************************************************************************************************/
getLastMatch(char direction,vector<vector<char>> & alignMoves,int i,int j,string & seqA,string & seqB)253 int Perseus::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, string& seqA, string& seqB){
254 try {
255 char nullReturn = -1;
256
257 while(i>=1 && j>=1){
258
259 if (m->getControl_pressed()) { return 0; }
260
261 if(direction == 'd'){
262 if(seqA[i-1] == seqB[j-1]) { return seqA[i-1]; }
263 else { return nullReturn; }
264 }
265
266 else if(direction == 'l') { j--; }
267 else { i--; }
268
269 direction = alignMoves[i][j];
270 }
271
272 return nullReturn;
273 }
274 catch(exception& e) {
275 m->errorOut(e, "Perseus", "getLastMatch");
276 exit(1);
277 }
278 }
279
280 /**************************************************************************************************/
281
toInt(char b)282 int Perseus::toInt(char b){
283 try {
284 if(b == 'A') { return 0; }
285 else if(b == 'C') { return 1; }
286 else if(b == 'T') { return 2; }
287 else if(b == 'G') { return 3; }
288 else { m->mothurOut("[ERROR]: " + toString(b) + " is not ATGC.\n"); return -1; }
289 }
290 catch(exception& e) {
291 m->errorOut(e, "Perseus", "toInt");
292 exit(1);
293 }
294 }
295
296 /**************************************************************************************************/
297
modeledPairwiseAlignSeqs(string query,string reference,string & qAlign,string & rAlign,vector<vector<double>> & correctMatrix)298 double Perseus::modeledPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, vector<vector<double> >& correctMatrix){
299 try {
300 int queryLength = query.size();
301 int refLength = reference.size();
302
303 vector<vector<double> > alignMatrix(queryLength + 1);
304 vector<vector<char> > alignMoves(queryLength + 1);
305
306 for(int i=0;i<=queryLength;i++){
307 if (m->getControl_pressed()) { return 0; }
308 alignMatrix[i].resize(refLength + 1, 0);
309 alignMoves[i].resize(refLength + 1, 'x');
310 }
311
312 for(int i=0;i<=queryLength;i++){
313 if (m->getControl_pressed()) { return 0; }
314 alignMatrix[i][0] = 15.0 * i;
315 alignMoves[i][0] = 'u';
316 }
317
318 for(int i=0;i<=refLength;i++){
319 if (m->getControl_pressed()) { return 0; }
320 alignMatrix[0][i] = 15.0 * i;
321 alignMoves[0][i] = 'l';
322 }
323
324 for(int i=1;i<=queryLength;i++){
325
326 if (m->getControl_pressed()) { return 0; }
327
328 for(int j=1;j<=refLength;j++){
329
330 double nogap;
331 nogap = alignMatrix[i-1][j-1] + correctMatrix[toInt(query[i-1])][toInt(reference[j-1])];
332
333 double gap;
334
335 double left;
336 if(i == queryLength){ //terminal gap
337 left = alignMatrix[i][j-1];
338 }
339 else{
340 if(reference[j-1] == getLastMatch('l', alignMoves, i, j, query, reference)){
341 gap = 4.0;
342 }
343 else{
344 gap = 15.0;
345 }
346
347 left = alignMatrix[i][j-1] + gap;
348 }
349
350 double up;
351 if(j == refLength){ //terminal gap
352 up = alignMatrix[i-1][j];
353 }
354 else{
355
356 if(query[i-1] == getLastMatch('u', alignMoves, i, j, query, reference)){
357 gap = 4.0;
358 }
359 else{
360 gap = 15.0;
361 }
362
363 up = alignMatrix[i-1][j] + gap;
364 }
365
366
367 if(nogap < left){
368 if(nogap < up){
369 alignMoves[i][j] = 'd';
370 alignMatrix[i][j] = nogap;
371 }
372 else{
373 alignMoves[i][j] = 'u';
374 alignMatrix[i][j] = up;
375 }
376 }
377 else{
378 if(left < up){
379 alignMoves[i][j] = 'l';
380 alignMatrix[i][j] = left;
381 }
382 else{
383 alignMoves[i][j] = 'u';
384 alignMatrix[i][j] = up;
385 }
386 }
387 }
388 }
389
390 int i = queryLength;
391 int j = refLength;
392
393 int alignLength = 0;
394
395 while(i > 0 && j > 0){
396
397 if (m->getControl_pressed()) { return 0; }
398
399 if(alignMoves[i][j] == 'd'){
400 qAlign = query[i-1] + qAlign;
401 rAlign = reference[j-1] + rAlign;
402 alignLength++;
403 i--;
404 j--;
405 }
406 else if(alignMoves[i][j] == 'u'){
407 if(j != refLength){
408 qAlign = query[i-1] + qAlign;
409 rAlign = '-' + rAlign;
410 alignLength++;
411 }
412
413 i--;
414 }
415 else if(alignMoves[i][j] == 'l'){
416 if(i != queryLength){
417 qAlign = '-' + qAlign;
418 rAlign = reference[j-1] + rAlign;
419 alignLength++;
420 }
421
422 j--;
423 }
424 }
425
426 return alignMatrix[queryLength][refLength] / (double)alignLength;
427 }
428 catch(exception& e) {
429 m->errorOut(e, "Perseus", "modeledPairwiseAlignSeqs");
430 exit(1);
431 }
432 }
433
434 /**************************************************************************************************/
getAlignments(int curSequenceIndex,vector<seqData> sequences,vector<pwAlign> & alignments,vector<vector<int>> & leftDiffs,vector<vector<int>> & leftMaps,vector<vector<int>> & rightDiffs,vector<vector<int>> & rightMaps,int & bestRefSeq,int & bestRefDiff,vector<bool> & restricted)435 int Perseus::getAlignments(int curSequenceIndex, vector<seqData> sequences, vector<pwAlign>& alignments, vector<vector<int> >& leftDiffs, vector<vector<int> >& leftMaps, vector<vector<int> >& rightDiffs, vector<vector<int> >& rightMaps, int& bestRefSeq, int& bestRefDiff, vector<bool>& restricted){
436 try {
437 int numSeqs = sequences.size();
438 //int bestSequenceMismatch = PERSEUSMAXINT;
439
440 string curSequence = sequences[curSequenceIndex].sequence;
441 int curFrequency = sequences[curSequenceIndex].frequency;
442
443 bestRefSeq = -1;
444
445 int bestIndex = -1;
446 int bestDiffs = PERSEUSMAXINT;
447 int comparisons = 0;
448
449 pwModel model(0, -1, -1.5);
450
451 for(int i=0;i<numSeqs;i++){
452
453 if (m->getControl_pressed()) { return 0; }
454
455 if(i != curSequenceIndex && restricted[i] != 1 && sequences[i].frequency >= 2 * curFrequency){
456 string refSequence = sequences[i].sequence;
457
458 leftDiffs[i].assign(curSequence.length(), 0);
459 leftMaps[i].assign(curSequence.length(), 0);
460 rightDiffs[i].assign(curSequence.length(), 0);
461 rightMaps[i].assign(curSequence.length(), 0);
462
463 basicPairwiseAlignSeqs(curSequence, refSequence, alignments[i].query, alignments[i].reference, model);
464
465
466 getDiffs(alignments[i].query, alignments[i].reference, leftDiffs[i], leftMaps[i], rightDiffs[i], rightMaps[i]);
467
468 int diffs = rightDiffs[i][curSequence.length()-1];
469
470 if(diffs < bestDiffs){
471 bestDiffs = diffs;
472 bestIndex = i;
473 }
474 comparisons++;
475 restricted[i] = 0;
476 }
477 else{
478 restricted[i] = 1;
479 }
480 }
481
482 bestRefSeq = bestIndex;
483 bestRefDiff = bestDiffs;
484
485 return comparisons;
486 }
487 catch(exception& e) {
488 m->errorOut(e, "Perseus", "getAlignments");
489 exit(1);
490 }
491 }
492 /**************************************************************************************************/
getChimera(vector<seqData> sequences,vector<vector<int>> & leftDiffs,vector<vector<int>> & rightDiffs,int & leftParent,int & rightParent,int & breakPoint,vector<int> & singleLeft,vector<int> & bestLeft,vector<int> & singleRight,vector<int> & bestRight,vector<bool> restricted)493 int Perseus::getChimera(vector<seqData> sequences,
494 vector<vector<int> >& leftDiffs,
495 vector<vector<int> >& rightDiffs,
496 int& leftParent,
497 int& rightParent,
498 int& breakPoint,
499 vector<int>& singleLeft,
500 vector<int>& bestLeft,
501 vector<int>& singleRight,
502 vector<int>& bestRight,
503 vector<bool> restricted){
504 try {
505 int numRefSeqs = restricted.size();
506 int seqLength = leftDiffs[0].size();
507
508 singleLeft.resize(seqLength, PERSEUSMAXINT);
509 bestLeft.resize(seqLength, -1);
510
511 for(int l=0;l<seqLength;l++){
512
513 if (m->getControl_pressed()) { return 0; }
514
515 for(int i=0;i<numRefSeqs;i++){
516
517 if(!restricted[i]){
518 if(((leftDiffs[i][l] < singleLeft[l]) && sequences[i].frequency) || ((leftDiffs[i][l] == singleLeft[l]) && (sequences[i].frequency > sequences[bestLeft[l]].frequency))){
519 singleLeft[l] = leftDiffs[i][l];
520 bestLeft[l] = i;
521 }
522 }
523 }
524 }
525
526 singleRight.resize(seqLength, PERSEUSMAXINT);
527 bestRight.resize(seqLength, -1);
528
529 for(int l=0;l<seqLength;l++){
530
531 if (m->getControl_pressed()) { return 0; }
532
533 for(int i=0;i<numRefSeqs;i++){
534
535 if(!restricted[i]){
536 if((rightDiffs[i][l] < singleRight[l] && sequences[i].frequency) || ((rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency))){
537 singleRight[l] = rightDiffs[i][l];
538 bestRight[l] = i;
539 }
540 }
541 }
542 }
543
544
545
546 int bestChimeraMismatches = PERSEUSMAXINT;
547 leftParent = -1;
548 rightParent = -1;
549 breakPoint = -1;
550
551 for(int l=0;l<seqLength-1;l++){
552
553 if (m->getControl_pressed()) { return 0; }
554
555 int chimera = singleLeft[l] + singleRight[seqLength - l - 2];
556 if(chimera < bestChimeraMismatches){
557 bestChimeraMismatches = chimera;
558 breakPoint = l;
559 leftParent = bestLeft[l];
560 rightParent = bestRight[seqLength - l - 2];
561 }
562 }
563
564 return bestChimeraMismatches;
565 }
566 catch(exception& e) {
567 m->errorOut(e, "Perseus", "getChimera");
568 exit(1);
569 }
570 }
571
572 /**************************************************************************************************/
573
stitchBimera(vector<pwAlign> & alignments,int leftParent,int rightParent,int breakPoint,vector<vector<int>> & leftMaps,vector<vector<int>> & rightMaps)574 string Perseus::stitchBimera(vector<pwAlign>& alignments, int leftParent, int rightParent, int breakPoint, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
575 try {
576 int breakLeft = leftMaps[leftParent][breakPoint];
577 int breakRight = rightMaps[rightParent][rightMaps[rightParent].size() - breakPoint - 2];
578
579 string left = alignments[leftParent].reference;
580 string right = alignments[rightParent].reference;
581 string chimera = "";
582
583 for(int i=0;i<=breakLeft;i++){
584
585 if (m->getControl_pressed()) { return 0; }
586
587 if(left[i] != '-' && left[i] != '.'){
588 chimera += left[i];
589 }
590 }
591
592
593 for(int i=breakRight;i<right.length();i++){
594
595 if (m->getControl_pressed()) { return 0; }
596
597 if(right[i] != '-' && right[i] != '.'){
598 chimera += right[i];
599 }
600 }
601
602 return chimera;
603 }
604 catch(exception& e) {
605 m->errorOut(e, "Perseus", "stitchBimera");
606 exit(1);
607 }
608 }
609 /**************************************************************************************************/
getTrimera(vector<seqData> & sequences,vector<vector<int>> & leftDiffs,int & leftParent,int & middleParent,int & rightParent,int & breakPointA,int & breakPointB,vector<int> & singleLeft,vector<int> & bestLeft,vector<int> & singleRight,vector<int> & bestRight,vector<bool> restricted)610 int Perseus::getTrimera(vector<seqData>& sequences,
611 vector<vector<int> >& leftDiffs,
612 int& leftParent,
613 int& middleParent,
614 int& rightParent,
615 int& breakPointA,
616 int& breakPointB,
617 vector<int>& singleLeft,
618 vector<int>& bestLeft,
619 vector<int>& singleRight,
620 vector<int>& bestRight,
621 vector<bool> restricted){
622 try {
623 int numRefSeqs = leftDiffs.size();
624 int alignLength = leftDiffs[0].size();
625 int bestTrimeraMismatches = PERSEUSMAXINT;
626
627 leftParent = -1;
628 middleParent = -1;
629 rightParent = -1;
630
631 breakPointA = -1;
632 breakPointB = -1;
633
634 vector<vector<int> > minDelta(alignLength);
635 vector<vector<int> > minDeltaSeq(alignLength);
636
637 for(int i=0;i<alignLength;i++){
638 if (m->getControl_pressed()) { return 0; }
639 minDelta[i].assign(alignLength, PERSEUSMAXINT);
640 minDeltaSeq[i].assign(alignLength, -1);
641 }
642
643 for(int x=0;x<alignLength;x++){
644 for(int y=x;y<alignLength-1;y++){
645 for(int i=0;i<numRefSeqs;i++){
646
647 if (m->getControl_pressed()) { return 0; }
648
649 if(!restricted[i]){
650 int delta = leftDiffs[i][y] - leftDiffs[i][x];
651
652 if(delta < minDelta[x][y] || (delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency)){
653 minDelta[x][y] = delta;
654 minDeltaSeq[x][y] = i;
655 }
656 }
657 }
658 minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
659
660 if(minDelta[x][y] < bestTrimeraMismatches){
661 bestTrimeraMismatches = minDelta[x][y];
662
663 breakPointA = x;
664 breakPointB = y;
665
666 leftParent = bestLeft[x];
667 middleParent = minDeltaSeq[x][y];
668 rightParent = bestRight[alignLength - y - 2];
669 }
670 }
671 }
672
673 return bestTrimeraMismatches;
674 }
675 catch(exception& e) {
676 m->errorOut(e, "Perseus", "getTrimera");
677 exit(1);
678 }
679 }
680
681 /**************************************************************************************************/
682
stitchTrimera(vector<pwAlign> alignments,int leftParent,int middleParent,int rightParent,int breakPointA,int breakPointB,vector<vector<int>> & leftMaps,vector<vector<int>> & rightMaps)683 string Perseus::stitchTrimera(vector<pwAlign> alignments, int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
684 try {
685 int p1SplitPoint = leftMaps[leftParent][breakPointA];
686 int p2SplitPoint = leftMaps[middleParent][breakPointB];
687 int p3SplitPoint = rightMaps[rightParent][rightMaps[rightParent].size() - breakPointB - 2];
688
689 string chimeraRefSeq;
690 for(int i=0;i<=p1SplitPoint;i++){
691 if (m->getControl_pressed()) { return chimeraRefSeq; }
692 if(alignments[leftParent].reference[i] != '-' && alignments[leftParent].reference[i] != '.'){
693 chimeraRefSeq += alignments[leftParent].reference[i];
694 }
695 }
696
697 for(int i=p1SplitPoint+1;i<=p2SplitPoint;i++){
698 if (m->getControl_pressed()) { return chimeraRefSeq; }
699 if(alignments[middleParent].reference[i] != '-' && alignments[middleParent].reference[i] != '.'){
700 chimeraRefSeq += alignments[middleParent].reference[i];
701 }
702 }
703
704 for(int i=p3SplitPoint;i<alignments[rightParent].reference.length();i++){
705 if (m->getControl_pressed()) { return chimeraRefSeq; }
706 if(alignments[rightParent].reference[i] != '-' && alignments[rightParent].reference[i] != '.'){
707 chimeraRefSeq += alignments[rightParent].reference[i];
708 }
709 }
710
711 return chimeraRefSeq;
712 }
713 catch(exception& e) {
714 m->errorOut(e, "Perseus", "stitchTrimera");
715 exit(1);
716 }
717 }
718
719 /**************************************************************************************************/
720
threeWayAlign(string query,string parent1,string parent2,string & qAlign,string & aAlign,string & bAlign)721 int Perseus::threeWayAlign(string query, string parent1, string parent2, string& qAlign, string& aAlign, string& bAlign){
722 try {
723 pwModel model(1.0, -1.0, -5.0);
724
725 string qL, rL;
726 string qR, rR;
727
728 basicPairwiseAlignSeqs(query, parent1, qL, rL, model);
729 basicPairwiseAlignSeqs(query, parent2, qR, rR, model);
730
731 int lLength = qL.length();
732 int rLength = qR.length();
733
734 string qLNew, rLNew;
735 string qRNew, rRNew;
736
737 int lIndex = 0;
738 int rIndex = 0;
739
740 while(lIndex<lLength || rIndex<rLength){
741
742 if (m->getControl_pressed()) { return 0; }
743
744 if(qL[lIndex] == qR[rIndex]){
745 qLNew += qL[lIndex];
746 rLNew += rL[lIndex];
747 lIndex++;
748
749 qRNew += qR[rIndex];
750 rRNew += rR[rIndex];
751 rIndex++;
752 }
753 else if(qL[lIndex] == '-' || qL[lIndex] == '.'){
754 //insert a gap into the right sequences
755 qLNew += qL[lIndex];
756 rLNew += rL[lIndex];
757 lIndex++;
758
759 if(rIndex != rLength){
760 qRNew += '-';
761 rRNew += '-';
762 }
763 else{
764 qRNew += '.';
765 rRNew += '.';
766 }
767 }
768 else if(qR[rIndex] == '-' || qR[rIndex] == '.'){
769 //insert a gap into the left sequences
770 qRNew += qR[rIndex];
771 rRNew += rR[rIndex];
772 rIndex++;
773
774
775 if(lIndex != lLength){
776 qLNew += '-';
777 rLNew += '-';
778 }
779 else{
780 qLNew += '.';
781 rLNew += '.';
782 }
783
784 }
785 }
786
787 qAlign = qLNew;
788 aAlign = rLNew;
789 bAlign = rRNew;
790
791 bool qStart = 0;
792 bool aStart = 0;
793 bool bStart = 0;
794
795 for(int i=0;i<qAlign.length();i++){
796
797 if (m->getControl_pressed()) { return 0; }
798
799 if(qStart == 0){
800 if(qAlign[i] == '-') { qAlign[i] = '.'; }
801 else { qStart = 1; }
802 }
803 if(aStart == 0){
804 if(aAlign[i] == '-') { aAlign[i] = '.'; }
805 else { aStart = 1; }
806 }
807 if(bStart == 0){
808 if(bAlign[i] == '-') { bAlign[i] = '.'; }
809 else { bStart = 1; }
810 }
811 if(aStart == 1 && bStart == 1 && qStart == 1){
812 break;
813 }
814 }
815 return 0;
816 }
817 catch(exception& e) {
818 m->errorOut(e, "Perseus", "threeWayAlign");
819 exit(1);
820 }
821 }
822
823 /**************************************************************************************************/
824
calcLoonIndex(string query,string parent1,string parent2,int breakPoint,vector<vector<double>> & binMatrix)825 double Perseus::calcLoonIndex(string query, string parent1, string parent2, int breakPoint, vector<vector<double> >& binMatrix){
826 try {
827 string queryAln, leftParentAln, rightParentAln;
828 threeWayAlign(query, parent1, parent2, queryAln, leftParentAln, rightParentAln);
829
830 int alignLength = queryAln.length();
831
832 int endPos = alignLength;
833 for(int i=alignLength-1;i>=0; i--){
834 if(queryAln[i] != '.' && leftParentAln[i] != '.' && rightParentAln[i] != '.'){
835 endPos = i + 1;
836 break;
837 }
838 }
839
840 int diffToLeftCount = 0;
841 vector<int> diffToLeftMap(alignLength, 0);
842
843 int diffToRightCount = 0;
844 vector<int> diffToRightMap(alignLength, 0);
845
846 for(int i=0;i<endPos;i++){
847
848 if (m->getControl_pressed()) { return 0; }
849
850 if(queryAln[i] != leftParentAln[i]){
851 diffToLeftMap[diffToLeftCount] = i;
852 diffToLeftCount++;
853 }
854
855 if(queryAln[i] != rightParentAln[i]){
856 diffToRightMap[diffToRightCount] = i;
857 diffToRightCount++;
858 }
859 }
860
861
862
863 diffToLeftMap[diffToLeftCount] = endPos;
864 diffToRightMap[diffToRightCount] = endPos;
865
866 int indexL = 0;
867 int indexR = 0;
868 int indexS = 0;
869
870 vector<int> diffs;
871 vector<int> splits;
872
873 splits.push_back(-1);
874 diffs.push_back(diffToRightCount);
875 indexS++;
876
877 while(indexL < diffToLeftCount || indexR < diffToRightCount){
878
879 if (m->getControl_pressed()) { return 0; }
880
881 if(diffToLeftMap[indexL] <= diffToRightMap[indexR]){
882 diffs.push_back(diffs[indexS - 1] + 1);
883 splits.push_back(diffToLeftMap[indexL]);
884
885 indexL++;
886 indexS++;
887 }
888 else if(diffToLeftMap[indexL] > diffToRightMap[indexR]) {
889 diffs.push_back(diffs[indexS - 1] - 1);
890 splits.push_back(diffToRightMap[indexR]);
891
892 indexR++;
893 indexS++;
894 }
895 }
896
897 int minDiff = PERSEUSMAXINT;
898 int minIndex = -1;
899 for(int i=0;i<indexS;i++){
900
901 if (m->getControl_pressed()) { return 0; }
902
903 if(diffs[i] < minDiff){
904 minDiff = diffs[i];
905 minIndex = i;
906 }
907 }
908
909 int splitPos = endPos;
910 if(minIndex < indexS - 1){
911 splitPos = (splits[minIndex]+splits[minIndex+1]) / 2;
912 }
913
914 int diffToChimera = 0;
915 int leftDiffToP1 = 0;
916 int rightDiffToP1 = 0;
917 int leftDiffToP2 = 0;
918 int rightDiffToP2 = 0;
919
920 for(int i=0;i<endPos;i++){
921
922 if (m->getControl_pressed()) { return 0; }
923
924 char bQuery = queryAln[i];
925 char bP1 = leftParentAln[i];
926 char bP2 = rightParentAln[i];
927
928 char bConsensus = bQuery;
929 if(bP1 == bP2){ bConsensus = bP1; }
930
931 if(bConsensus != bQuery){
932 diffToChimera++;
933 }
934
935 if(bConsensus != bP1){
936 if(i <= splitPos){
937 leftDiffToP1++;
938 }
939 else{
940 rightDiffToP1++;
941 }
942 }
943 if(bConsensus != bP2){
944 if(i <= splitPos){
945 leftDiffToP2++;
946 }
947 else{
948 rightDiffToP2++;
949 }
950 }
951 }
952
953
954 int diffToClosestParent, diffToFurtherParent;
955 int xA, xB, yA, yB;
956 double aFraction, bFraction;
957
958 if(diffToLeftCount <= diffToRightCount){ //if parent 1 is closer
959
960 diffToClosestParent = leftDiffToP1 + rightDiffToP1;
961 xA = leftDiffToP1;
962 xB = rightDiffToP1;
963
964 diffToFurtherParent = leftDiffToP2 + rightDiffToP2;
965 yA = leftDiffToP2;
966 yB = rightDiffToP2;
967
968 aFraction = double(splitPos + 1)/(double) endPos;
969 bFraction = 1 - aFraction;
970
971 }
972 else{ //if parent 2 is closer
973
974 diffToClosestParent = leftDiffToP2 + rightDiffToP2;
975 xA = rightDiffToP2;
976 xB = leftDiffToP2;
977
978 diffToFurtherParent = leftDiffToP1 + rightDiffToP1;
979 yA = rightDiffToP1;
980 yB = leftDiffToP1;
981
982 bFraction = double(splitPos + 1)/(double) endPos;
983 aFraction = 1 - bFraction;
984
985 }
986
987 double loonIndex = 0;
988
989 int totalDifference = diffToClosestParent + diffToChimera;
990
991 if(totalDifference > 0){
992 double prob = 0;
993
994 for(int i=diffToClosestParent;i<=totalDifference;i++){
995 prob += binMatrix[totalDifference][i] * pow(0.50, i) * pow(0.50, totalDifference - i);
996 }
997 loonIndex += -log(prob);
998 }
999
1000 if(diffToFurtherParent > 0){
1001 double prob = 0;
1002
1003 for(int i=yA;i<=diffToFurtherParent;i++){
1004 prob += binMatrix[diffToFurtherParent][i] * pow(aFraction, i) * pow(1-aFraction, diffToFurtherParent - i);
1005 }
1006 loonIndex += -log(prob);
1007 }
1008
1009 if(diffToClosestParent > 0){
1010 double prob = 0;
1011
1012 for(int i=xB;i<=diffToClosestParent;i++){
1013 prob += binMatrix[diffToClosestParent][i] * pow(bFraction, i) * pow(1-bFraction, diffToClosestParent - i);
1014 }
1015 loonIndex += -log(prob);
1016 }
1017
1018 return loonIndex;
1019 }
1020 catch(exception& e) {
1021 m->errorOut(e, "Perseus", "calcLoonIndex");
1022 exit(1);
1023 }
1024 }
1025
1026 /**************************************************************************************************/
1027
calcBestDistance(string query,string reference)1028 double Perseus::calcBestDistance(string query, string reference){
1029 try {
1030 int alignLength = query.length();
1031 int mismatch = 0;
1032 int counter = 0;
1033
1034 for(int i=0;i<alignLength;i++){
1035
1036 if (m->getControl_pressed()) { return 0; }
1037
1038 if((query[i] != '.' || reference[i] != '.') && (query[i] != '-' && reference[i] != '-')){
1039 if(query[i] != reference[i]){ mismatch++; }
1040 counter++;
1041 }
1042 }
1043
1044 return (double)mismatch / (double)counter;
1045 }
1046 catch(exception& e) {
1047 m->errorOut(e, "Perseus", "calcBestDistance");
1048 exit(1);
1049 }
1050 }
1051
1052 /**************************************************************************************************/
1053
classifyChimera(double singleDist,double cIndex,double loonIndex,double alpha,double beta)1054 double Perseus::classifyChimera(double singleDist, double cIndex, double loonIndex, double alpha, double beta){
1055 try {
1056 double difference = cIndex - singleDist; //y
1057 double probability;
1058
1059 if(cIndex >= 0.15 || difference > 0.00){
1060 probability = 0.0000;
1061 }
1062 else{
1063 probability = 1.0 / (1.0 + exp(-(alpha + beta * loonIndex)));
1064 }
1065
1066 return probability;
1067 }
1068 catch(exception& e) {
1069 m->errorOut(e, "Perseus", "classifyChimera");
1070 exit(1);
1071 }
1072 }
1073 /**************************************************************************************************/
1074