1 /*
2  *  chimerarealigner.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 2/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9 
10 #include "chimerarealigner.h"
11 #include "needlemanoverlap.hpp"
12 #include "nast.hpp"
13 
14 //***************************************************************************************************************
ChimeraReAligner()15 ChimeraReAligner::ChimeraReAligner()  {  m = MothurOut::getInstance(); }
16 //***************************************************************************************************************
~ChimeraReAligner()17 ChimeraReAligner::~ChimeraReAligner() {}
18 //***************************************************************************************************************
reAlign(Sequence * query,vector<string> parents)19 void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
20 	try {
21 
22 		if(parents.size() != 0){
23 
24 			alignmentLength = query->getAlignLength();	//x
25 			int queryUnalignedLength = query->getNumBases();	//y
26 
27 			buildTemplateProfile(parents);
28 
29 			createAlignMatrix(queryUnalignedLength, alignmentLength);
30 			fillAlignMatrix(query->getUnaligned());
31 			query->setAligned(getNewAlignment(query->getUnaligned()));
32 		}
33 
34 	}
35 	catch(exception& e) {
36 		m->errorOut(e, "ChimeraReAligner", "reAlign");
37 		exit(1);
38 	}
39 }
40 /***************************************************************************************************************/
41 
buildTemplateProfile(vector<string> parents)42 void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
43 	try{
44 		int numParents = parents.size();
45 
46 		profile.resize(alignmentLength);
47 
48 		for(int i=0;i<numParents;i++){
49 			string seq = parents[i];
50 
51 			for(int j=0;j<alignmentLength;j++){
52 
53 
54 				if(seq[j] == 'A')		{	profile[j].A++;		}
55 				else if(seq[j] == 'T')	{	profile[j].T++;		}
56 				else if(seq[j] == 'G')	{	profile[j].G++;		}
57 				else if(seq[j] == 'C')	{	profile[j].C++;		}
58 				else if(seq[j] == '-')	{	profile[j].Gap++;	}
59 				else if(seq[j] == '.')	{	profile[j].Gap++;	}
60 
61 			}
62 		}
63 
64 
65 		for(int i=0;i<alignmentLength;i++){
66 			profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
67 		}
68 
69 	}
70 	catch(exception& e) {
71 		m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
72 		exit(1);
73 	}
74 }
75 
76 
77 /***************************************************************************************************************/
78 
createAlignMatrix(int queryUnalignedLength,int alignmentLength)79 void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
80 
81 	try{
82 		alignMatrix.resize(alignmentLength+1);
83 		for(int i=0;i<=alignmentLength;i++){
84 			alignMatrix[i].resize(queryUnalignedLength+1);
85 		}
86 
87 		for(int i=1;i<=alignmentLength;i++)		{	alignMatrix[i][0].direction = 'l';	}
88 		for(int j=1;j<=queryUnalignedLength;j++){	alignMatrix[0][j].direction = 'u';	}
89 	}
90 	catch(exception& e) {
91 		m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
92 		exit(1);
93 	}
94 }
95 
96 /***************************************************************************************************************/
97 
fillAlignMatrix(string query)98 void ChimeraReAligner::fillAlignMatrix(string query){
99 	try{
100 		int GAP = -4;
101 
102 		int nrows = alignMatrix.size()-1;
103 		int ncols = alignMatrix[0].size()-1;
104 
105 		for(int i=1;i<=nrows;i++){
106 
107 			bases p = profile[i-1];
108 			int numChars = p.Chars;
109 
110 			for(int j=1;j<=ncols;j++){
111 
112 				char q = query[j-1];
113 
114 				//	score it for if there was a match
115 				int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
116 				int maxDirection = 'd';
117 
118 				//	score it for if there was a gap in the query
119 				int score = alignMatrix[i-1][j].score + (numChars * GAP);
120 				if (score > maxScore) {
121 					maxScore = score;
122 					maxDirection = 'l';
123 				}
124 
125 				alignMatrix[i][j].score = maxScore;
126 				alignMatrix[i][j].direction = maxDirection;
127 
128 			}
129 		}
130 	}
131 	catch(exception& e) {
132 		m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
133 		exit(1);
134 	}
135 }
136 
137 /***************************************************************************************************************/
138 
calcMatchScore(bases p,char q)139 int ChimeraReAligner::calcMatchScore(bases p, char q){
140 	try{
141 
142 		int MATCH = 5;
143 		int MISMATCH = -4;
144 
145 		int score = 0;
146 
147 		if(q == 'G')		{	score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap));		}
148 		else if(q == 'A')	{	score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));		}
149 		else if(q == 'T')	{	score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap));		}
150 		else if(q == 'C')	{	score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap));		}
151 		else				{	score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));		}
152 
153 		return score;
154 	}
155 	catch(exception& e) {
156 		m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
157 		exit(1);
158 	}
159 }
160 
161 /***************************************************************************************************************/
162 
getNewAlignment(string query)163 string ChimeraReAligner::getNewAlignment(string query){
164 	try{
165 		string queryAlignment(alignmentLength, '.');
166 		string referenceAlignment(alignmentLength, '.');
167 
168 
169 		int maxScore = -99999999;
170 
171 		int nrows = alignMatrix.size()-1;
172 		int ncols = alignMatrix[0].size()-1;
173 
174 		int bestCol = -1;
175 		int bestRow = -1;
176 
177 		for(int i=1;i<=nrows;i++){
178 			int score = alignMatrix[i][ncols].score;
179 			if (score > maxScore) {
180 				maxScore = score;
181 				bestRow = i;
182 				bestCol = ncols;
183 			}
184 		}
185 
186 		for(int j=1;j<=ncols;j++){
187 			int score = alignMatrix[nrows][j].score;
188 			if (score > maxScore) {
189 				maxScore = score;
190 				bestRow = nrows;
191 				bestCol = j;
192 			}
193 		}
194 
195 		int currentRow = bestRow;
196 		int currentCol = bestCol;
197 
198 		int alignmentPosition = 0;
199 		if(currentRow < alignmentLength){
200 			for(int i=alignmentLength;i>currentRow;i--){
201 				alignmentPosition++;
202 			}
203 		}
204 
205 		AlignCell c = alignMatrix[currentRow][currentCol];
206 		while(c.direction != 'x'){
207 
208 			char q;
209 
210 			if(c.direction == 'd'){
211 				q = query[currentCol-1];
212 				currentCol--;
213 				currentRow--;
214 			}
215 
216 
217 			else if (c.direction == 'u') {
218 				break;
219 			}
220 			else if(c.direction == 'l'){
221 				char gapChar;
222 				if(currentCol == 0)	{	gapChar = '.';	}
223 				else				{	gapChar = '-';	}
224 
225 				q = gapChar;
226 				currentRow--;
227 			}
228             else{ m->mothurOut("[ERROR]: Unexpected case in ChimeraReAligner::getNewAlignment, aborting.\n"); m->setControl_pressed(true); }
229 
230 			queryAlignment[alignmentPosition] = q;
231 			alignmentPosition++;
232 			c = alignMatrix[currentRow][currentCol];
233 		}
234 
235 //		need to reverse the string
236 		string flipSeq = "";
237 		for(int i=alignmentLength-1;i>=0;i--){
238 			flipSeq += queryAlignment[i];
239 		}
240 
241 		return flipSeq;
242 	}
243 	catch(exception& e) {
244 		m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
245 		exit(1);
246 	}
247 }
248 
249 /***************************************************************************************************************/
250 
251 // Sequence* ChimeraReAligner::getSequence(string name) {
252 //	try{
253 //		Sequence* temp;
254 //
255 //		//look through templateSeqs til you find it
256 //		int spot = -1;
257 //		for (int i = 0; i < templateSeqs.size(); i++) {
258 //			if (name == templateSeqs[i]->getName()) {
259 //				spot = i;
260 //				break;
261 //			}
262 //		}
263 //
264 //		if(spot == -1) { m->mothurOut("Error: Could not find sequence.\n");  return NULL; }
265 //
266 //		temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
267 //
268 //		return temp;
269 //	}
270 //	catch(exception& e) {
271 //		m->errorOut(e, "ChimeraReAligner", "getSequence");
272 //		exit(1);
273 //	}
274 //}
275 
276 //***************************************************************************************************************/
277