1 /*
2  *  myPerseus.cpp
3  *
4  *
5  *  Created by Pat Schloss on 9/5/11.
6  *  Copyright 2011 Patrick D. Schloss. All rights reserved.
7  *
8  */
9 
10 #include "myPerseus.h"
11 
12 /**************************************************************************************************/
13 int PERSEUSMAXINT = numeric_limits<int>::max();
14 /**************************************************************************************************/
15 
binomial(int maxOrder)16 vector<vector<double> > Perseus::binomial(int maxOrder){
17 	try {
18 		vector<vector<double> > binomial(maxOrder+1);
19 
20 		for(int i=0;i<=maxOrder;i++){
21 			binomial[i].resize(maxOrder+1);
22 			binomial[i][0]=1;
23 			binomial[0][i]=0;
24 		}
25 		binomial[0][0]=1;
26 
27 		binomial[1][0]=1;
28 		binomial[1][1]=1;
29 
30 		for(int i=2;i<=maxOrder;i++){
31 			binomial[1][i]=0;
32 		}
33 
34 		for(int i=2;i<=maxOrder;i++){
35 			for(int j=1;j<=maxOrder;j++){
36 				if(i==j){	binomial[i][j]=1;									}
37 				if(j>i)	{	binomial[i][j]=0;									}
38 				else	{	binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];	}
39 			}
40 		}
41 
42 		return binomial;
43 	}
44 	catch(exception& e) {
45 		m->errorOut(e, "Perseus", "binomial");
46 		exit(1);
47 	}
48 }
49 
50 /**************************************************************************************************/
basicPairwiseAlignSeqs(string query,string reference,string & qAlign,string & rAlign,pwModel model)51 double Perseus::basicPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, pwModel model){
52 	try {
53 		double GAP = model.GAP_OPEN;
54 		double MATCH = model.MATCH;
55 		double MISMATCH = model.MISMATCH;
56 
57 		int queryLength = query.size();
58 		int refLength = reference.size();
59 
60 		vector<vector<double> > alignMatrix(queryLength + 1);
61 		vector<vector<char> > alignMoves(queryLength + 1);
62 
63 		for(int i=0;i<=queryLength;i++){
64 			if (m->getControl_pressed()) { return 0; }
65 			alignMatrix[i].resize(refLength + 1, 0);
66 			alignMoves[i].resize(refLength + 1, 'x');
67 		}
68 
69 		for(int i=0;i<=queryLength;i++){
70 			if (m->getControl_pressed()) { return 0; }
71 			alignMatrix[i][0] = GAP * i;
72 			alignMoves[i][0] = 'u';
73 		}
74 
75 		for(int i=0;i<=refLength;i++){
76 			if (m->getControl_pressed()) { return 0; }
77 			alignMatrix[0][i] = GAP * i;
78 			alignMoves[0][i] = 'l';
79 		}
80 
81 		for(int i=1;i<=queryLength;i++){
82 
83 			if (m->getControl_pressed()) { return 0; }
84 
85 			for(int j=1;j<=refLength;j++){
86 
87 				double nogapScore;
88 				if(query[i-1] == reference[j-1]){	nogapScore = alignMatrix[i-1][j-1] + MATCH;		}
89 				else							{	nogapScore = alignMatrix[i-1][j-1] + MISMATCH;	}
90 
91 				double leftScore;
92 				if(i == queryLength)			{	leftScore = alignMatrix[i][j-1];				}
93 				else							{	leftScore = alignMatrix[i][j-1] + GAP;			}
94 
95 
96 				double upScore;
97 				if(j == refLength)				{	upScore = alignMatrix[i-1][j];					}
98 				else							{	upScore = alignMatrix[i-1][j] + GAP;			}
99 
100 				if(nogapScore > leftScore){
101 					if(nogapScore > upScore){
102 						alignMoves[i][j] = 'd';
103 						alignMatrix[i][j] = nogapScore;
104 					}
105 					else{
106 						alignMoves[i][j] = 'u';
107 						alignMatrix[i][j] = upScore;
108 					}
109 				}
110 				else{
111 					if(leftScore > upScore){
112 						alignMoves[i][j] = 'l';
113 						alignMatrix[i][j] = leftScore;
114 					}
115 					else{
116 						alignMoves[i][j] = 'u';
117 						alignMatrix[i][j] = upScore;
118 					}
119 				}
120 			}
121 		}
122 
123 		int i = queryLength;
124 		int j = refLength;
125 
126 		qAlign = "";
127 		rAlign = "";
128 
129 		int diffs = 0;
130 		int length = 0;
131 
132 		while(i > 0 && j > 0){
133 
134 			if (m->getControl_pressed()) { return 0; }
135 
136 			if(alignMoves[i][j] == 'd'){
137 				qAlign = query[i-1] + qAlign;
138 				rAlign = reference[j-1] + rAlign;
139 
140 				if(query[i-1] != reference[j-1]){	diffs++;	}
141 				length++;
142 
143 				i--;
144 				j--;
145 			}
146 			else if(alignMoves[i][j] == 'u'){
147 				qAlign = query[i-1] + qAlign;
148 
149 				if(j != refLength)	{	rAlign = '-' + rAlign;	diffs++;	length++;	}
150 				else				{	rAlign = '.' + rAlign;	}
151 				i--;
152 			}
153 			else if(alignMoves[i][j] == 'l'){
154 				rAlign = reference[j-1] + rAlign;
155 
156 				if(i != queryLength){	qAlign = '-' + qAlign;	diffs++;	length++;	}
157 				else				{	qAlign = '.' + qAlign;	}
158 				j--;
159 			}
160 		}
161 
162 		while(i>0){
163 
164 			if (m->getControl_pressed()) { return 0; }
165 
166 			rAlign = '.' + rAlign;
167 			qAlign = query[i-1] + qAlign;
168 			i--;
169 		}
170 
171 		while(j>0){
172 
173 			if (m->getControl_pressed()) { return 0; }
174 
175 			rAlign = reference[j-1] + rAlign;
176 			qAlign = '.' + qAlign;
177 			j--;
178 		}
179 
180 
181 
182 		return double(diffs)/double(length);
183 	}
184 	catch(exception& e) {
185 		m->errorOut(e, "Perseus", "basicPairwiseAlignSeqs");
186 		exit(1);
187 	}
188 
189 }
190 /**************************************************************************************************/
getDiffs(string qAlign,string rAlign,vector<int> & leftDiffs,vector<int> & leftMap,vector<int> & rightDiffs,vector<int> & rightMap)191 int Perseus::getDiffs(string qAlign, string rAlign, vector<int>& leftDiffs, vector<int>& leftMap, vector<int>& rightDiffs, vector<int>& rightMap){
192 	try {
193 		int alignLength = qAlign.length();
194 
195 		int lDiffs = 0;
196 		int lCount = 0;
197 		for(int l=0;l<alignLength;l++){
198 
199 			if (m->getControl_pressed()) { return 0; }
200 
201 			if(qAlign[l] == '-'){
202 				lDiffs++;
203 			}
204 			else if(qAlign[l] != '.'){
205 
206 				if(rAlign[l] == '-'){
207 					lDiffs++;
208 				}
209 				else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
210 					lDiffs++;
211 				}
212 				leftDiffs[lCount] = lDiffs;
213 				leftMap[lCount] = l;
214 
215 				lCount++;
216 			}
217 		}
218 
219 		int rDiffs = 0;
220 		int rCount = 0;
221 		for(int l=alignLength-1;l>=0;l--){
222 
223 			if (m->getControl_pressed()) { return 0; }
224 
225 			if(qAlign[l] == '-'){
226 				rDiffs++;
227 			}
228 			else if(qAlign[l] != '.'){
229 
230 				if(rAlign[l] == '-'){
231 					rDiffs++;
232 				}
233 				else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
234 					rDiffs++;
235 				}
236 
237 				rightDiffs[rCount] = rDiffs;
238 				rightMap[rCount] = l;
239 				rCount++;
240 
241 			}
242 
243 		}
244 
245 		return 0;
246 	}
247 	catch(exception& e) {
248 		m->errorOut(e, "Perseus", "getDiffs");
249 		exit(1);
250 	}
251 }
252 /**************************************************************************************************/
getLastMatch(char direction,vector<vector<char>> & alignMoves,int i,int j,string & seqA,string & seqB)253 int Perseus::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, string& seqA, string& seqB){
254 	try {
255 		char nullReturn = -1;
256 
257 		while(i>=1 && j>=1){
258 
259 			if (m->getControl_pressed()) { return 0; }
260 
261 			if(direction == 'd'){
262 				if(seqA[i-1] == seqB[j-1])	{	return seqA[i-1];	}
263 				else						{	return nullReturn;	}
264 			}
265 
266 			else if(direction == 'l')		{	j--;				}
267 			else							{	i--;				}
268 
269 			direction = alignMoves[i][j];
270 		}
271 
272 		return nullReturn;
273 	}
274 	catch(exception& e) {
275 		m->errorOut(e, "Perseus", "getLastMatch");
276 		exit(1);
277 	}
278 }
279 
280 /**************************************************************************************************/
281 
toInt(char b)282 int Perseus::toInt(char b){
283 	try {
284 		if(b == 'A')		{	return 0;	}
285 		else if(b == 'C')	{	return 1;	}
286 		else if(b == 'T')	{	return 2;	}
287 		else if(b == 'G')	{	return 3;	}
288 		else { m->mothurOut("[ERROR]: " + toString(b) + " is not ATGC.\n");  return -1; }
289 	}
290 	catch(exception& e) {
291 		m->errorOut(e, "Perseus", "toInt");
292 		exit(1);
293 	}
294 }
295 
296 /**************************************************************************************************/
297 
modeledPairwiseAlignSeqs(string query,string reference,string & qAlign,string & rAlign,vector<vector<double>> & correctMatrix)298 double Perseus::modeledPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, vector<vector<double> >& correctMatrix){
299 	try {
300 		int queryLength = query.size();
301 		int refLength = reference.size();
302 
303 		vector<vector<double> > alignMatrix(queryLength + 1);
304 		vector<vector<char> > alignMoves(queryLength + 1);
305 
306 		for(int i=0;i<=queryLength;i++){
307 			if (m->getControl_pressed()) { return 0; }
308 			alignMatrix[i].resize(refLength + 1, 0);
309 			alignMoves[i].resize(refLength + 1, 'x');
310 		}
311 
312 		for(int i=0;i<=queryLength;i++){
313 			if (m->getControl_pressed()) { return 0; }
314 			alignMatrix[i][0] = 15.0 * i;
315 			alignMoves[i][0] = 'u';
316 		}
317 
318 		for(int i=0;i<=refLength;i++){
319 			if (m->getControl_pressed()) { return 0; }
320 			alignMatrix[0][i] = 15.0 * i;
321 			alignMoves[0][i] = 'l';
322 		}
323 
324 		for(int i=1;i<=queryLength;i++){
325 
326 			if (m->getControl_pressed()) { return 0; }
327 
328 			for(int j=1;j<=refLength;j++){
329 
330 				double nogap;
331 				nogap = alignMatrix[i-1][j-1] + correctMatrix[toInt(query[i-1])][toInt(reference[j-1])];
332 
333 				double gap;
334 
335 				double left;
336 				if(i == queryLength){ //terminal gap
337 					left = alignMatrix[i][j-1];
338 				}
339 				else{
340 					if(reference[j-1] == getLastMatch('l', alignMoves, i, j, query, reference)){
341 						gap = 4.0;
342 					}
343 					else{
344 						gap = 15.0;
345 					}
346 
347 					left = alignMatrix[i][j-1] + gap;
348 				}
349 
350 				double up;
351 				if(j == refLength){ //terminal gap
352 					up = alignMatrix[i-1][j];
353 				}
354 				else{
355 
356 					if(query[i-1] == getLastMatch('u', alignMoves, i, j, query, reference)){
357 						gap = 4.0;
358 					}
359 					else{
360 						gap = 15.0;
361 					}
362 
363 					up = alignMatrix[i-1][j] + gap;
364 				}
365 
366 
367 				if(nogap < left){
368 					if(nogap < up){
369 						alignMoves[i][j] = 'd';
370 						alignMatrix[i][j] = nogap;
371 					}
372 					else{
373 						alignMoves[i][j] = 'u';
374 						alignMatrix[i][j] = up;
375 					}
376 				}
377 				else{
378 					if(left < up){
379 						alignMoves[i][j] = 'l';
380 						alignMatrix[i][j] = left;
381 					}
382 					else{
383 						alignMoves[i][j] = 'u';
384 						alignMatrix[i][j] = up;
385 					}
386 				}
387 			}
388 		}
389 
390 		int i = queryLength;
391 		int j = refLength;
392 
393 		int alignLength = 0;
394 
395 		while(i > 0 && j > 0){
396 
397 			if (m->getControl_pressed()) { return 0; }
398 
399 			if(alignMoves[i][j] == 'd'){
400 				qAlign = query[i-1] + qAlign;
401 				rAlign = reference[j-1] + rAlign;
402 				alignLength++;
403 				i--;
404 				j--;
405 			}
406 			else if(alignMoves[i][j] == 'u'){
407 				if(j != refLength){
408 					qAlign = query[i-1] + qAlign;
409 					rAlign = '-' + rAlign;
410 					alignLength++;
411 				}
412 
413 				i--;
414 			}
415 			else if(alignMoves[i][j] == 'l'){
416 				if(i != queryLength){
417 					qAlign = '-' + qAlign;
418 					rAlign = reference[j-1] + rAlign;
419 					alignLength++;
420 				}
421 
422 				j--;
423 			}
424 		}
425 
426 		return alignMatrix[queryLength][refLength] / (double)alignLength;
427 	}
428 	catch(exception& e) {
429 		m->errorOut(e, "Perseus", "modeledPairwiseAlignSeqs");
430 		exit(1);
431 	}
432 }
433 
434 /**************************************************************************************************/
getAlignments(int curSequenceIndex,vector<seqData> sequences,vector<pwAlign> & alignments,vector<vector<int>> & leftDiffs,vector<vector<int>> & leftMaps,vector<vector<int>> & rightDiffs,vector<vector<int>> & rightMaps,int & bestRefSeq,int & bestRefDiff,vector<bool> & restricted)435 int Perseus::getAlignments(int curSequenceIndex, vector<seqData> sequences, vector<pwAlign>& alignments, vector<vector<int> >& leftDiffs, vector<vector<int> >& leftMaps, vector<vector<int> >& rightDiffs, vector<vector<int> >& rightMaps, int& bestRefSeq, int& bestRefDiff, vector<bool>& restricted){
436 	try {
437 		int numSeqs = sequences.size();
438 		//int bestSequenceMismatch = PERSEUSMAXINT;
439 
440 		string curSequence = sequences[curSequenceIndex].sequence;
441 		int curFrequency = sequences[curSequenceIndex].frequency;
442 
443 		bestRefSeq = -1;
444 
445 		int bestIndex = -1;
446 		int bestDiffs = PERSEUSMAXINT;
447 		int comparisons = 0;
448 
449 		pwModel model(0, -1, -1.5);
450 
451 		for(int i=0;i<numSeqs;i++){
452 
453 			if (m->getControl_pressed()) { return 0; }
454 
455 			if(i != curSequenceIndex && restricted[i] != 1 && sequences[i].frequency >= 2 * curFrequency){
456 				string refSequence = sequences[i].sequence;
457 
458 				leftDiffs[i].assign(curSequence.length(), 0);
459 				leftMaps[i].assign(curSequence.length(), 0);
460 				rightDiffs[i].assign(curSequence.length(), 0);
461 				rightMaps[i].assign(curSequence.length(), 0);
462 
463 				basicPairwiseAlignSeqs(curSequence, refSequence, alignments[i].query, alignments[i].reference, model);
464 
465 
466 				getDiffs(alignments[i].query, alignments[i].reference, leftDiffs[i], leftMaps[i], rightDiffs[i], rightMaps[i]);
467 
468 				int diffs = rightDiffs[i][curSequence.length()-1];
469 
470 				if(diffs < bestDiffs){
471 					bestDiffs = diffs;
472 					bestIndex = i;
473 				}
474 				comparisons++;
475 				restricted[i] = 0;
476 			}
477 			else{
478 				restricted[i] = 1;
479 			}
480 		}
481 
482 		bestRefSeq = bestIndex;
483 		bestRefDiff = bestDiffs;
484 
485 		return comparisons;
486 	}
487 	catch(exception& e) {
488 		m->errorOut(e, "Perseus", "getAlignments");
489 		exit(1);
490 	}
491 }
492 /**************************************************************************************************/
getChimera(vector<seqData> sequences,vector<vector<int>> & leftDiffs,vector<vector<int>> & rightDiffs,int & leftParent,int & rightParent,int & breakPoint,vector<int> & singleLeft,vector<int> & bestLeft,vector<int> & singleRight,vector<int> & bestRight,vector<bool> restricted)493 int Perseus::getChimera(vector<seqData> sequences,
494 			   vector<vector<int> >& leftDiffs,
495 			   vector<vector<int> >& rightDiffs,
496 			   int& leftParent,
497 			   int& rightParent,
498 			   int& breakPoint,
499 			   vector<int>& singleLeft,
500 			   vector<int>& bestLeft,
501 			   vector<int>& singleRight,
502 			   vector<int>& bestRight,
503 			   vector<bool> restricted){
504 	try {
505 		int numRefSeqs = restricted.size();
506 		int seqLength = leftDiffs[0].size();
507 
508 		singleLeft.resize(seqLength, PERSEUSMAXINT);
509 		bestLeft.resize(seqLength, -1);
510 
511 		for(int l=0;l<seqLength;l++){
512 
513 			if (m->getControl_pressed()) { return 0; }
514 
515 			for(int i=0;i<numRefSeqs;i++){
516 
517 				if(!restricted[i]){
518 					if(((leftDiffs[i][l] < singleLeft[l]) && sequences[i].frequency) || ((leftDiffs[i][l] == singleLeft[l]) && (sequences[i].frequency > sequences[bestLeft[l]].frequency))){
519 						singleLeft[l] = leftDiffs[i][l];
520 						bestLeft[l] = i;
521 					}
522 				}
523 			}
524 		}
525 
526 		singleRight.resize(seqLength, PERSEUSMAXINT);
527 		bestRight.resize(seqLength, -1);
528 
529 		for(int l=0;l<seqLength;l++){
530 
531 			if (m->getControl_pressed()) { return 0; }
532 
533 			for(int i=0;i<numRefSeqs;i++){
534 
535 				if(!restricted[i]){
536 					if((rightDiffs[i][l] < singleRight[l] && sequences[i].frequency) || ((rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency))){
537 						singleRight[l] = rightDiffs[i][l];
538 						bestRight[l] = i;
539 					}
540 				}
541 			}
542 		}
543 
544 
545 
546 		int bestChimeraMismatches = PERSEUSMAXINT;
547 		leftParent = -1;
548 		rightParent = -1;
549 		breakPoint = -1;
550 
551 		for(int l=0;l<seqLength-1;l++){
552 
553 			if (m->getControl_pressed()) { return 0; }
554 
555 			int chimera = singleLeft[l] + singleRight[seqLength - l - 2];
556 			if(chimera < bestChimeraMismatches){
557 				bestChimeraMismatches = chimera;
558 				breakPoint = l;
559 				leftParent = bestLeft[l];
560 				rightParent = bestRight[seqLength - l - 2];
561 			}
562 		}
563 
564 		return bestChimeraMismatches;
565 	}
566 	catch(exception& e) {
567 		m->errorOut(e, "Perseus", "getChimera");
568 		exit(1);
569 	}
570 }
571 
572 /**************************************************************************************************/
573 
stitchBimera(vector<pwAlign> & alignments,int leftParent,int rightParent,int breakPoint,vector<vector<int>> & leftMaps,vector<vector<int>> & rightMaps)574 string Perseus::stitchBimera(vector<pwAlign>& alignments, int leftParent, int rightParent, int breakPoint, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
575 	try {
576 		int breakLeft = leftMaps[leftParent][breakPoint];
577 		int breakRight = rightMaps[rightParent][rightMaps[rightParent].size() - breakPoint - 2];
578 
579 		string left = alignments[leftParent].reference;
580 		string right = alignments[rightParent].reference;
581 		string chimera = "";
582 
583 		for(int i=0;i<=breakLeft;i++){
584 
585 			if (m->getControl_pressed()) { return 0; }
586 
587 			if(left[i] != '-' && left[i] != '.'){
588 				chimera += left[i];
589 			}
590 		}
591 
592 
593 		for(int i=breakRight;i<right.length();i++){
594 
595 			if (m->getControl_pressed()) { return 0; }
596 
597 			if(right[i] != '-' && right[i] != '.'){
598 				chimera += right[i];
599 			}
600 		}
601 
602 		return chimera;
603 	}
604 	catch(exception& e) {
605 		m->errorOut(e, "Perseus", "stitchBimera");
606 		exit(1);
607 	}
608 }
609 /**************************************************************************************************/
getTrimera(vector<seqData> & sequences,vector<vector<int>> & leftDiffs,int & leftParent,int & middleParent,int & rightParent,int & breakPointA,int & breakPointB,vector<int> & singleLeft,vector<int> & bestLeft,vector<int> & singleRight,vector<int> & bestRight,vector<bool> restricted)610 int Perseus::getTrimera(vector<seqData>& sequences,
611 			   vector<vector<int> >& leftDiffs,
612 			   int& leftParent,
613 			   int& middleParent,
614 			   int& rightParent,
615 			   int& breakPointA,
616 			   int& breakPointB,
617 			   vector<int>& singleLeft,
618 			   vector<int>& bestLeft,
619 			   vector<int>& singleRight,
620 			   vector<int>& bestRight,
621 			   vector<bool> restricted){
622 	try {
623 		int numRefSeqs = leftDiffs.size();
624 		int alignLength = leftDiffs[0].size();
625 		int bestTrimeraMismatches = PERSEUSMAXINT;
626 
627 		leftParent = -1;
628 		middleParent = -1;
629 		rightParent = -1;
630 
631 		breakPointA = -1;
632 		breakPointB = -1;
633 
634 		vector<vector<int> > minDelta(alignLength);
635 		vector<vector<int> > minDeltaSeq(alignLength);
636 
637 		for(int i=0;i<alignLength;i++){
638 			if (m->getControl_pressed()) { return 0; }
639 			minDelta[i].assign(alignLength, PERSEUSMAXINT);
640 			minDeltaSeq[i].assign(alignLength, -1);
641 		}
642 
643 		for(int x=0;x<alignLength;x++){
644 			for(int y=x;y<alignLength-1;y++){
645 				for(int i=0;i<numRefSeqs;i++){
646 
647 					if (m->getControl_pressed()) { return 0; }
648 
649 					if(!restricted[i]){
650 						int delta = leftDiffs[i][y] - leftDiffs[i][x];
651 
652 						if(delta < minDelta[x][y] || (delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency)){
653 							minDelta[x][y] = delta;
654 							minDeltaSeq[x][y] = i;
655 						}
656 					}
657 				}
658 				minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
659 
660 				if(minDelta[x][y] < bestTrimeraMismatches){
661 					bestTrimeraMismatches = minDelta[x][y];
662 
663 					breakPointA = x;
664 					breakPointB = y;
665 
666 					leftParent = bestLeft[x];
667 					middleParent = minDeltaSeq[x][y];
668 					rightParent = bestRight[alignLength - y - 2];
669 				}
670 			}
671 		}
672 
673 		return bestTrimeraMismatches;
674 	}
675 	catch(exception& e) {
676 		m->errorOut(e, "Perseus", "getTrimera");
677 		exit(1);
678 	}
679 }
680 
681 /**************************************************************************************************/
682 
stitchTrimera(vector<pwAlign> alignments,int leftParent,int middleParent,int rightParent,int breakPointA,int breakPointB,vector<vector<int>> & leftMaps,vector<vector<int>> & rightMaps)683 string Perseus::stitchTrimera(vector<pwAlign> alignments, int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
684 	try {
685 		int p1SplitPoint = leftMaps[leftParent][breakPointA];
686 		int p2SplitPoint = leftMaps[middleParent][breakPointB];
687 		int p3SplitPoint = rightMaps[rightParent][rightMaps[rightParent].size() - breakPointB - 2];
688 
689 		string chimeraRefSeq;
690 		for(int i=0;i<=p1SplitPoint;i++){
691 			if (m->getControl_pressed()) { return chimeraRefSeq; }
692 			if(alignments[leftParent].reference[i] != '-' && alignments[leftParent].reference[i] != '.'){
693 				chimeraRefSeq += alignments[leftParent].reference[i];
694 			}
695 		}
696 
697 		for(int i=p1SplitPoint+1;i<=p2SplitPoint;i++){
698 			if (m->getControl_pressed()) { return chimeraRefSeq; }
699 			if(alignments[middleParent].reference[i] != '-' && alignments[middleParent].reference[i] != '.'){
700 				chimeraRefSeq += alignments[middleParent].reference[i];
701 			}
702 		}
703 
704 		for(int i=p3SplitPoint;i<alignments[rightParent].reference.length();i++){
705 			if (m->getControl_pressed()) { return chimeraRefSeq; }
706 			if(alignments[rightParent].reference[i] != '-' && alignments[rightParent].reference[i] != '.'){
707 				chimeraRefSeq += alignments[rightParent].reference[i];
708 			}
709 		}
710 
711 		return chimeraRefSeq;
712 	}
713 	catch(exception& e) {
714 		m->errorOut(e, "Perseus", "stitchTrimera");
715 		exit(1);
716 	}
717 }
718 
719 /**************************************************************************************************/
720 
threeWayAlign(string query,string parent1,string parent2,string & qAlign,string & aAlign,string & bAlign)721 int Perseus::threeWayAlign(string query, string parent1, string parent2, string& qAlign, string& aAlign, string& bAlign){
722 	try {
723 		pwModel model(1.0, -1.0, -5.0);
724 
725 		string qL, rL;
726 		string qR, rR;
727 
728 		basicPairwiseAlignSeqs(query, parent1, qL, rL, model);
729 		basicPairwiseAlignSeqs(query, parent2, qR, rR, model);
730 
731 		int lLength = qL.length();
732 		int rLength = qR.length();
733 
734 		string qLNew, rLNew;
735 		string qRNew, rRNew;
736 
737 		int lIndex = 0;
738 		int rIndex = 0;
739 
740 		while(lIndex<lLength || rIndex<rLength){
741 
742 			if (m->getControl_pressed()) { return 0; }
743 
744 			if(qL[lIndex] == qR[rIndex]){
745 				qLNew += qL[lIndex];
746 				rLNew += rL[lIndex];
747 				lIndex++;
748 
749 				qRNew += qR[rIndex];
750 				rRNew += rR[rIndex];
751 				rIndex++;
752 			}
753 			else if(qL[lIndex] == '-' || qL[lIndex] == '.'){
754 				//insert a gap into the right sequences
755 				qLNew += qL[lIndex];
756 				rLNew += rL[lIndex];
757 				lIndex++;
758 
759 				if(rIndex != rLength){
760 					qRNew += '-';
761 					rRNew += '-';
762 				}
763 				else{
764 					qRNew += '.';
765 					rRNew += '.';
766 				}
767 			}
768 			else if(qR[rIndex] == '-' || qR[rIndex] == '.'){
769 				//insert a gap into the left sequences
770 				qRNew += qR[rIndex];
771 				rRNew += rR[rIndex];
772 				rIndex++;
773 
774 
775 				if(lIndex != lLength){
776 					qLNew += '-';
777 					rLNew += '-';
778 				}
779 				else{
780 					qLNew += '.';
781 					rLNew += '.';
782 				}
783 
784 			}
785 		}
786 
787 		qAlign = qLNew;
788 		aAlign = rLNew;
789 		bAlign = rRNew;
790 
791 		bool qStart = 0;
792 		bool aStart = 0;
793 		bool bStart = 0;
794 
795 		for(int i=0;i<qAlign.length();i++){
796 
797 			if (m->getControl_pressed()) { return 0; }
798 
799 			if(qStart == 0){
800 				if(qAlign[i] == '-')	{	qAlign[i] = '.';	}
801 				else					{	qStart = 1;			}
802 			}
803 			if(aStart == 0){
804 				if(aAlign[i] == '-')	{	aAlign[i] = '.';	}
805 				else					{	aStart = 1;			}
806 			}
807 			if(bStart == 0){
808 				if(bAlign[i] == '-')	{	bAlign[i] = '.';	}
809 				else					{	bStart = 1;			}
810 			}
811 			if(aStart == 1 && bStart == 1 && qStart == 1){
812 				break;
813 			}
814 		}
815 		return 0;
816 	}
817 	catch(exception& e) {
818 		m->errorOut(e, "Perseus", "threeWayAlign");
819 		exit(1);
820 	}
821 }
822 
823 /**************************************************************************************************/
824 
calcLoonIndex(string query,string parent1,string parent2,int breakPoint,vector<vector<double>> & binMatrix)825 double Perseus::calcLoonIndex(string query, string parent1, string parent2, int breakPoint, vector<vector<double> >& binMatrix){
826 	try {
827 		string queryAln, leftParentAln, rightParentAln;
828 		threeWayAlign(query, parent1, parent2, queryAln, leftParentAln, rightParentAln);
829 
830 		int alignLength = queryAln.length();
831 
832 		int endPos = alignLength;
833 		for(int i=alignLength-1;i>=0; i--){
834 			if(queryAln[i] != '.' && leftParentAln[i] != '.' && rightParentAln[i] != '.'){
835 				endPos = i + 1;
836 				break;
837 			}
838 		}
839 
840 		int diffToLeftCount = 0;
841 		vector<int> diffToLeftMap(alignLength, 0);
842 
843 		int diffToRightCount = 0;
844 		vector<int> diffToRightMap(alignLength, 0);
845 
846 		for(int i=0;i<endPos;i++){
847 
848 			if (m->getControl_pressed()) { return 0; }
849 
850 			if(queryAln[i] != leftParentAln[i]){
851 				diffToLeftMap[diffToLeftCount] = i;
852 				diffToLeftCount++;
853 			}
854 
855 			if(queryAln[i] != rightParentAln[i]){
856 				diffToRightMap[diffToRightCount] = i;
857 				diffToRightCount++;
858 			}
859 		}
860 
861 
862 
863 		diffToLeftMap[diffToLeftCount] = endPos;
864 		diffToRightMap[diffToRightCount] = endPos;
865 
866 		int indexL = 0;
867 		int indexR = 0;
868 		int indexS = 0;
869 
870 		vector<int> diffs;
871 		vector<int> splits;
872 
873 		splits.push_back(-1);
874 		diffs.push_back(diffToRightCount);
875 		indexS++;
876 
877 		while(indexL < diffToLeftCount || indexR < diffToRightCount){
878 
879 			if (m->getControl_pressed()) { return 0; }
880 
881 			if(diffToLeftMap[indexL] <= diffToRightMap[indexR]){
882 				diffs.push_back(diffs[indexS - 1] + 1);
883 				splits.push_back(diffToLeftMap[indexL]);
884 
885 				indexL++;
886 				indexS++;
887 			}
888 			else if(diffToLeftMap[indexL] > diffToRightMap[indexR]) {
889 				diffs.push_back(diffs[indexS - 1] - 1);
890 				splits.push_back(diffToRightMap[indexR]);
891 
892 				indexR++;
893 				indexS++;
894 			}
895 		}
896 
897 		int minDiff = PERSEUSMAXINT;
898 		int minIndex = -1;
899 		for(int i=0;i<indexS;i++){
900 
901 			if (m->getControl_pressed()) { return 0; }
902 
903 			if(diffs[i] < minDiff){
904 				minDiff = diffs[i];
905 				minIndex = i;
906 			}
907 		}
908 
909 		int splitPos = endPos;
910 		if(minIndex < indexS - 1){
911 			splitPos = (splits[minIndex]+splits[minIndex+1]) / 2;
912 		}
913 
914 		int diffToChimera = 0;
915 		int leftDiffToP1 = 0;
916 		int rightDiffToP1 = 0;
917 		int leftDiffToP2 = 0;
918 		int rightDiffToP2 = 0;
919 
920 		for(int i=0;i<endPos;i++){
921 
922 			if (m->getControl_pressed()) { return 0; }
923 
924 			char bQuery = queryAln[i];
925 			char bP1 = leftParentAln[i];
926 			char bP2 = rightParentAln[i];
927 
928 			char bConsensus = bQuery;
929 			if(bP1 == bP2){	bConsensus = bP1;	}
930 
931 			if(bConsensus != bQuery){
932 				diffToChimera++;
933 			}
934 
935 			if(bConsensus != bP1){
936 				if(i <= splitPos){
937 					leftDiffToP1++;
938 				}
939 				else{
940 					rightDiffToP1++;
941 				}
942 			}
943 			if(bConsensus != bP2){
944 				if(i <= splitPos){
945 					leftDiffToP2++;
946 				}
947 				else{
948 					rightDiffToP2++;
949 				}
950 			}
951 		}
952 
953 
954 		int diffToClosestParent, diffToFurtherParent;
955 		int xA, xB, yA, yB;
956 		double aFraction, bFraction;
957 
958 		if(diffToLeftCount <= diffToRightCount){	//if parent 1 is closer
959 
960 			diffToClosestParent = leftDiffToP1 + rightDiffToP1;
961 			xA = leftDiffToP1;
962 			xB = rightDiffToP1;
963 
964 			diffToFurtherParent = leftDiffToP2 + rightDiffToP2;
965 			yA = leftDiffToP2;
966 			yB = rightDiffToP2;
967 
968 			aFraction = double(splitPos + 1)/(double) endPos;
969 			bFraction = 1 - aFraction;
970 
971 		}
972 		else{												//if parent 2 is closer
973 
974 			diffToClosestParent = leftDiffToP2 + rightDiffToP2;
975 			xA = rightDiffToP2;
976 			xB = leftDiffToP2;
977 
978 			diffToFurtherParent = leftDiffToP1 + rightDiffToP1;
979 			yA = rightDiffToP1;
980 			yB = leftDiffToP1;
981 
982 			bFraction = double(splitPos + 1)/(double) endPos;
983 			aFraction = 1 - bFraction;
984 
985 		}
986 
987 		double loonIndex = 0;
988 
989 		int totalDifference = diffToClosestParent + diffToChimera;
990 
991 		if(totalDifference > 0){
992 			double prob = 0;
993 
994 			for(int i=diffToClosestParent;i<=totalDifference;i++){
995 				prob += binMatrix[totalDifference][i] * pow(0.50, i) * pow(0.50, totalDifference - i);
996 			}
997 			loonIndex += -log(prob);
998 		}
999 
1000 		if(diffToFurtherParent > 0){
1001 			double prob = 0;
1002 
1003 			for(int i=yA;i<=diffToFurtherParent;i++){
1004 				prob += binMatrix[diffToFurtherParent][i] * pow(aFraction, i) * pow(1-aFraction, diffToFurtherParent - i);
1005 			}
1006 			loonIndex += -log(prob);
1007 		}
1008 
1009 		if(diffToClosestParent > 0){
1010 			double prob = 0;
1011 
1012 			for(int i=xB;i<=diffToClosestParent;i++){
1013 				prob += binMatrix[diffToClosestParent][i] * pow(bFraction, i) * pow(1-bFraction, diffToClosestParent - i);
1014 			}
1015 			loonIndex += -log(prob);
1016 		}
1017 
1018 		return loonIndex;
1019 	}
1020 	catch(exception& e) {
1021 		m->errorOut(e, "Perseus", "calcLoonIndex");
1022 		exit(1);
1023 	}
1024 }
1025 
1026 /**************************************************************************************************/
1027 
calcBestDistance(string query,string reference)1028 double Perseus::calcBestDistance(string query, string reference){
1029 	try {
1030 		int alignLength = query.length();
1031 		int mismatch = 0;
1032 		int counter = 0;
1033 
1034 		for(int i=0;i<alignLength;i++){
1035 
1036 			if (m->getControl_pressed()) { return 0; }
1037 
1038 			if((query[i] != '.' || reference[i] != '.') && (query[i] != '-' && reference[i] != '-')){
1039 				if(query[i] != reference[i]){	mismatch++;	}
1040 				counter++;
1041 			}
1042 		}
1043 
1044 		return (double)mismatch / (double)counter;
1045 	}
1046 	catch(exception& e) {
1047 		m->errorOut(e, "Perseus", "calcBestDistance");
1048 		exit(1);
1049 	}
1050 }
1051 
1052 /**************************************************************************************************/
1053 
classifyChimera(double singleDist,double cIndex,double loonIndex,double alpha,double beta)1054 double Perseus::classifyChimera(double singleDist, double cIndex, double loonIndex, double alpha, double beta){
1055 	try {
1056 		double difference = cIndex - singleDist;	//y
1057 		double probability;
1058 
1059 		if(cIndex >= 0.15 || difference > 0.00){
1060 			probability = 0.0000;
1061 		}
1062 		else{
1063 			probability = 1.0 / (1.0 + exp(-(alpha + beta * loonIndex)));
1064 		}
1065 
1066 		return probability;
1067 	}
1068 	catch(exception& e) {
1069 		m->errorOut(e, "Perseus", "classifyChimera");
1070 		exit(1);
1071 	}
1072 }
1073 /**************************************************************************************************/
1074