1 /*
2 * chimerarealigner.cpp
3 * Mothur
4 *
5 * Created by westcott on 2/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
7 *
8 */
9
10 #include "chimerarealigner.h"
11 #include "needlemanoverlap.hpp"
12 #include "nast.hpp"
13
14 //***************************************************************************************************************
ChimeraReAligner()15 ChimeraReAligner::ChimeraReAligner() { m = MothurOut::getInstance(); }
16 //***************************************************************************************************************
~ChimeraReAligner()17 ChimeraReAligner::~ChimeraReAligner() {}
18 //***************************************************************************************************************
reAlign(Sequence * query,vector<string> parents)19 void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
20 try {
21
22 if(parents.size() != 0){
23
24 alignmentLength = query->getAlignLength(); //x
25 int queryUnalignedLength = query->getNumBases(); //y
26
27 buildTemplateProfile(parents);
28
29 createAlignMatrix(queryUnalignedLength, alignmentLength);
30 fillAlignMatrix(query->getUnaligned());
31 query->setAligned(getNewAlignment(query->getUnaligned()));
32 }
33
34 }
35 catch(exception& e) {
36 m->errorOut(e, "ChimeraReAligner", "reAlign");
37 exit(1);
38 }
39 }
40 /***************************************************************************************************************/
41
buildTemplateProfile(vector<string> parents)42 void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
43 try{
44 int numParents = parents.size();
45
46 profile.resize(alignmentLength);
47
48 for(int i=0;i<numParents;i++){
49 string seq = parents[i];
50
51 for(int j=0;j<alignmentLength;j++){
52
53
54 if(seq[j] == 'A') { profile[j].A++; }
55 else if(seq[j] == 'T') { profile[j].T++; }
56 else if(seq[j] == 'G') { profile[j].G++; }
57 else if(seq[j] == 'C') { profile[j].C++; }
58 else if(seq[j] == '-') { profile[j].Gap++; }
59 else if(seq[j] == '.') { profile[j].Gap++; }
60
61 }
62 }
63
64
65 for(int i=0;i<alignmentLength;i++){
66 profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
67 }
68
69 }
70 catch(exception& e) {
71 m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
72 exit(1);
73 }
74 }
75
76
77 /***************************************************************************************************************/
78
createAlignMatrix(int queryUnalignedLength,int alignmentLength)79 void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
80
81 try{
82 alignMatrix.resize(alignmentLength+1);
83 for(int i=0;i<=alignmentLength;i++){
84 alignMatrix[i].resize(queryUnalignedLength+1);
85 }
86
87 for(int i=1;i<=alignmentLength;i++) { alignMatrix[i][0].direction = 'l'; }
88 for(int j=1;j<=queryUnalignedLength;j++){ alignMatrix[0][j].direction = 'u'; }
89 }
90 catch(exception& e) {
91 m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
92 exit(1);
93 }
94 }
95
96 /***************************************************************************************************************/
97
fillAlignMatrix(string query)98 void ChimeraReAligner::fillAlignMatrix(string query){
99 try{
100 int GAP = -4;
101
102 int nrows = alignMatrix.size()-1;
103 int ncols = alignMatrix[0].size()-1;
104
105 for(int i=1;i<=nrows;i++){
106
107 bases p = profile[i-1];
108 int numChars = p.Chars;
109
110 for(int j=1;j<=ncols;j++){
111
112 char q = query[j-1];
113
114 // score it for if there was a match
115 int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
116 int maxDirection = 'd';
117
118 // score it for if there was a gap in the query
119 int score = alignMatrix[i-1][j].score + (numChars * GAP);
120 if (score > maxScore) {
121 maxScore = score;
122 maxDirection = 'l';
123 }
124
125 alignMatrix[i][j].score = maxScore;
126 alignMatrix[i][j].direction = maxDirection;
127
128 }
129 }
130 }
131 catch(exception& e) {
132 m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
133 exit(1);
134 }
135 }
136
137 /***************************************************************************************************************/
138
calcMatchScore(bases p,char q)139 int ChimeraReAligner::calcMatchScore(bases p, char q){
140 try{
141
142 int MATCH = 5;
143 int MISMATCH = -4;
144
145 int score = 0;
146
147 if(q == 'G') { score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap)); }
148 else if(q == 'A') { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); }
149 else if(q == 'T') { score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap)); }
150 else if(q == 'C') { score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap)); }
151 else { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); }
152
153 return score;
154 }
155 catch(exception& e) {
156 m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
157 exit(1);
158 }
159 }
160
161 /***************************************************************************************************************/
162
getNewAlignment(string query)163 string ChimeraReAligner::getNewAlignment(string query){
164 try{
165 string queryAlignment(alignmentLength, '.');
166 string referenceAlignment(alignmentLength, '.');
167
168
169 int maxScore = -99999999;
170
171 int nrows = alignMatrix.size()-1;
172 int ncols = alignMatrix[0].size()-1;
173
174 int bestCol = -1;
175 int bestRow = -1;
176
177 for(int i=1;i<=nrows;i++){
178 int score = alignMatrix[i][ncols].score;
179 if (score > maxScore) {
180 maxScore = score;
181 bestRow = i;
182 bestCol = ncols;
183 }
184 }
185
186 for(int j=1;j<=ncols;j++){
187 int score = alignMatrix[nrows][j].score;
188 if (score > maxScore) {
189 maxScore = score;
190 bestRow = nrows;
191 bestCol = j;
192 }
193 }
194
195 int currentRow = bestRow;
196 int currentCol = bestCol;
197
198 int alignmentPosition = 0;
199 if(currentRow < alignmentLength){
200 for(int i=alignmentLength;i>currentRow;i--){
201 alignmentPosition++;
202 }
203 }
204
205 AlignCell c = alignMatrix[currentRow][currentCol];
206 while(c.direction != 'x'){
207
208 char q;
209
210 if(c.direction == 'd'){
211 q = query[currentCol-1];
212 currentCol--;
213 currentRow--;
214 }
215
216
217 else if (c.direction == 'u') {
218 break;
219 }
220 else if(c.direction == 'l'){
221 char gapChar;
222 if(currentCol == 0) { gapChar = '.'; }
223 else { gapChar = '-'; }
224
225 q = gapChar;
226 currentRow--;
227 }
228 else{ m->mothurOut("[ERROR]: Unexpected case in ChimeraReAligner::getNewAlignment, aborting.\n"); m->setControl_pressed(true); }
229
230 queryAlignment[alignmentPosition] = q;
231 alignmentPosition++;
232 c = alignMatrix[currentRow][currentCol];
233 }
234
235 // need to reverse the string
236 string flipSeq = "";
237 for(int i=alignmentLength-1;i>=0;i--){
238 flipSeq += queryAlignment[i];
239 }
240
241 return flipSeq;
242 }
243 catch(exception& e) {
244 m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
245 exit(1);
246 }
247 }
248
249 /***************************************************************************************************************/
250
251 // Sequence* ChimeraReAligner::getSequence(string name) {
252 // try{
253 // Sequence* temp;
254 //
255 // //look through templateSeqs til you find it
256 // int spot = -1;
257 // for (int i = 0; i < templateSeqs.size(); i++) {
258 // if (name == templateSeqs[i]->getName()) {
259 // spot = i;
260 // break;
261 // }
262 // }
263 //
264 // if(spot == -1) { m->mothurOut("Error: Could not find sequence.\n"); return NULL; }
265 //
266 // temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
267 //
268 // return temp;
269 // }
270 // catch(exception& e) {
271 // m->errorOut(e, "ChimeraReAligner", "getSequence");
272 // exit(1);
273 // }
274 //}
275
276 //***************************************************************************************************************/
277