1 /*
2 * alignment.cpp
3 *
4 * Created by Pat Schloss on 12/15/08.
5 * Copyright 2008 Patrick D. Schloss. All rights reserved.
6 *
7 * This is a class for an abstract datatype for classes that implement various types of alignment algorithms.
8 * As of 12/18/08 these included alignments based on blastn, needleman-wunsch, and the Gotoh algorithms
9 *
10 */
11
12 #include "alignmentcell.hpp"
13 #include "alignment.hpp"
14
15
16 /**************************************************************************************************/
17
Alignment()18 Alignment::Alignment() { m = MothurOut::getInstance(); /* do nothing */ }
19
20 /**************************************************************************************************/
21
Alignment(int A)22 Alignment::Alignment(int A) : nCols(A), nRows(A) {
23 try {
24 m = MothurOut::getInstance();
25 alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
26 for(int i=0;i<nRows;i++){ // matrix by initializing a matrix that is A x A. By default we will set A
27 alignment[i].resize(nCols); // at 2000 for 16S rRNA gene sequences
28 }
29 }
30 catch(exception& e) {
31 m->errorOut(e, "Alignment", "Alignment");
32 exit(1);
33 }
34 }
35 /**************************************************************************************************/
36
Alignment(int A,int nk)37 Alignment::Alignment(int A, int nk) : nCols(A), nRows(A) {
38 try {
39 m = MothurOut::getInstance();
40 alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
41 for(int i=0;i<nRows;i++){ // matrix by initializing a matrix that is A x A. By default we will set A
42 alignment[i].resize(nCols); // at 2000 for 16S rRNA gene sequences
43 }
44 }
45 catch(exception& e) {
46 m->errorOut(e, "Alignment", "Alignment");
47 exit(1);
48 }
49 }
50 /**************************************************************************************************/
51 //only gets bigger
resize(int A)52 void Alignment::resize(int A) {
53 try {
54 nCols = A;
55 nRows = A;
56
57 alignment.resize(nRows);
58 for(int i=0;i<nRows;i++){
59 alignment[i].resize(nCols);
60 }
61 }
62 catch(exception& e) {
63 m->errorOut(e, "Alignment", "resize");
64 exit(1);
65 }
66 }
67 /**************************************************************************************************/
68
traceBack(bool createBaseMap)69 void Alignment::traceBack(bool createBaseMap){ // This traceback routine is used by the dynamic programming algorithms
70 try {
71 BBaseMap.clear();
72 ABaseMap.clear(); // to fill the values of seqAaln and seqBaln
73 seqAaln = "";
74 seqBaln = "";
75 int row = lB-1;
76 int column = lA-1;
77 // seqAstart = 1;
78 // seqAend = column;
79
80 AlignmentCell currentCell = alignment[row][column]; // Start the traceback from the bottom-right corner of the
81 // matrix
82
83 if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom-
84 else{ // right corner bail out because it means nothing got aligned
85 int count = 0;
86 while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going...
87
88 if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the
89 seqAaln = '-' + seqAaln; // matrix. this indicates that we need to insert a gap in
90 seqBaln = seqB[row] + seqBaln; // seqA and a base in seqB
91 if (createBaseMap) { BBaseMap[row] = count; }
92 //currentCell = alignment[--row][column];
93 --row;
94 }
95 else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left
96 seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap
97 seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA
98 if (createBaseMap) { ABaseMap[column] = count; }
99 //currentCell = alignment[row][--column];
100 --column;
101 }
102 else{
103 seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left,
104 seqBaln = seqB[row] + seqBaln; // here we add a base to both alignments
105 if (createBaseMap) {
106 BBaseMap[row] = count;
107 ABaseMap[column] = count;
108 }
109 //currentCell = alignment[--row][--column];
110 --row; --column;
111 }
112 if ((row >= 0) && (column >= 0)) { currentCell = alignment[row][column]; }
113 else { break; }
114 count++;
115 }
116 }
117
118
119 pairwiseLength = seqAaln.length();
120 seqAstart = 1; seqAend = 0;
121 seqBstart = 1; seqBend = 0;
122
123 if (createBaseMap) {
124 //flip maps since we now know the total length
125 map<int, int> newAMap;
126 for (map<int, int>::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) {
127 int spot = it->second;
128 newAMap[pairwiseLength-spot-1] = it->first-1;
129 }
130 ABaseMap = newAMap;
131 map<int, int> newBMap;
132 for (map<int, int>::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) {
133 int spot = it->second;
134 newBMap[pairwiseLength-spot-1] = it->first-1;
135 }
136 BBaseMap = newBMap;
137 }
138
139 for(int i=0;i<seqAaln.length();i++){
140 if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAstart++; }
141 else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBstart++; }
142 else { break; }
143 }
144
145 pairwiseLength -= (seqAstart + seqBstart - 2);
146
147 for(int i=seqAaln.length()-1; i>=0;i--){
148 if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAend++; }
149 else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBend++; }
150 else { break; }
151 }
152 pairwiseLength -= (seqAend + seqBend);
153
154 seqAend = seqA.length() - seqAend - 1;
155 seqBend = seqB.length() - seqBend - 1;
156 }
157 catch(exception& e) {
158 m->errorOut(e, "Alignment", "traceBack");
159 exit(1);
160 }
161 }
162 /**************************************************************************************************/
163
~Alignment()164 Alignment::~Alignment(){
165 try {
166 for (int i = 0; i < alignment.size(); i++) {
167 for (int j = (alignment[i].size()-1); j >= 0; j--) { alignment[i].pop_back(); }
168 }
169 alignment.clear();
170 }
171 catch(exception& e) {
172 m->errorOut(e, "Alignment", "~Alignment");
173 exit(1);
174 }
175 }
176
177 /**************************************************************************************************/
178
getSeqAAln()179 string Alignment::getSeqAAln(){
180 return seqAaln; // this is called to get the alignment of seqA
181 }
182
183 /**************************************************************************************************/
184
getSeqBAln()185 string Alignment::getSeqBAln(){
186 return seqBaln; // this is called to get the alignment of seqB
187 }
188
189 /**************************************************************************************************/
190
getCandidateStartPos()191 int Alignment::getCandidateStartPos(){
192 return seqAstart; // this is called to report the quality of the alignment
193 }
194
195 /**************************************************************************************************/
196
getCandidateEndPos()197 int Alignment::getCandidateEndPos(){
198 return seqAend; // this is called to report the quality of the alignment
199 }
200
201 /**************************************************************************************************/
202
getTemplateStartPos()203 int Alignment::getTemplateStartPos(){
204 return seqBstart; // this is called to report the quality of the alignment
205 }
206 /**************************************************************************************************/
207
getSeqAAlnBaseMap()208 map<int, int> Alignment::getSeqAAlnBaseMap(){
209 return ABaseMap;
210 }
211 /**************************************************************************************************/
212
getSeqBAlnBaseMap()213 map<int, int> Alignment::getSeqBAlnBaseMap(){
214 return BBaseMap;
215 }
216 /**************************************************************************************************/
217
getTemplateEndPos()218 int Alignment::getTemplateEndPos(){
219 return seqBend; // this is called to report the quality of the alignment
220 }
221
222 /**************************************************************************************************/
223
getPairwiseLength()224 int Alignment::getPairwiseLength(){
225 return pairwiseLength; // this is the pairwise alignment length
226 }
227
228 /**************************************************************************************************/
229