1 // hhfullalignment.C
2
3 #include "hhhalfalignment.h"
4
5 /////////////////////////////////////////////////////////////////////////////////////
6 /////////////////////////////////////////////////////////////////////////////////////
7 // Methods of class HalfAlignment
8 /////////////////////////////////////////////////////////////////////////////////////
9 /////////////////////////////////////////////////////////////////////////////////////
10
11 /////////////////////////////////////////////////////////////////////////////////////
12 // Constructor
HalfAlignment(int maxseqdis)13 HalfAlignment::HalfAlignment(int maxseqdis) {
14 n = 0;
15 sname = seq = NULL;
16 nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons = -1;
17 h = new int[maxseqdis]; //h[k] = next position of sequence k to be written
18 s = new char*[maxseqdis]; //s[k][h] = character in column h, sequence k of output alignment
19 l = new int*[maxseqdis]; //counts non-gap residues: l[k][i] = index of last residue AT OR BEFORE match state i in seq k
20 m = new int*[maxseqdis]; //counts positions: m[k][i] = position of match state i in string seq[k]
21 if (!h || !s || !l || !m)
22 MemoryError("space for formatting HMM-HMM alignment", __FILE__, __LINE__,
23 __func__);
24 }
25
26 /////////////////////////////////////////////////////////////////////////////////////
27 // Destructor
~HalfAlignment()28 HalfAlignment::~HalfAlignment() {
29 Unset();
30 delete[] h;
31 delete[] s;
32 delete[] l;
33 delete[] m;
34 }
35
36 /////////////////////////////////////////////////////////////////////////////////////
37 // Free memory in HalfAlignment arrays s[][], l[][], and m[][]
Unset()38 void HalfAlignment::Unset() {
39 // Free memory for alignment characters and residue counts
40 for (int k = 0; k < n; k++) {
41 delete[] s[k];
42 delete[] l[k];
43 delete[] m[k];
44 }
45 n = 0;
46 sname = seq = NULL;
47 nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons = -1;
48 }
49
50 /////////////////////////////////////////////////////////////////////////////////////
51 // Prepare a2m/a3m alignment: Calculate l[k][i] (residue indices) and m[k][i] (position in seq[k])
Set(char * name,char ** seq_in,char ** sname_in,int n_in,int L_in,int n1,int n2,int n3,int n4,int nc)52 void HalfAlignment::Set(char* name, char** seq_in, char** sname_in, int n_in,
53 int L_in, int n1, int n2, int n3, int n4, int nc) {
54 int i; //counts match states in seq[k]
55 int ll; //counts residues LEFT from or at current position in seq[k]
56 int mm; //counts postions in string seq[k]
57 int k; //counts sequences
58 char c;
59 char warned = 0;
60
61 nss_dssp = n1;
62 nss_pred = n2;
63 nss_conf = n3;
64 nsa_dssp = n4;
65 ncons = nc;
66 seq = seq_in; //flat copy of sequences
67 sname = sname_in; //flat copy of sequence names
68 n = n_in;
69 L = L_in;
70 pos = 0;
71
72 // Allocate memory for alignment characters and residue counts
73 for (k = 0; k < n; k++) {
74 s[k] = new char[LINELEN];
75 l[k] = new int[L + 10];
76 m[k] = new int[L + 10];
77 if (!s[k] || !l[k] || !m[k])
78 MemoryError("space for formatting HMM-HMM alignment", __FILE__, __LINE__,
79 __func__);
80 h[k] = 0; //starting positions in alignment = 0
81 }
82
83 for (k = 0; k < n; k++) {
84 m[k][0] = 0; // 0'th match state (virtual) is begin state at 0
85 //if k is consensus sequence
86 if (k == nc) {
87 for (i = 1; i <= L; i++)
88 m[k][i] = l[k][i] = i;
89 m[k][L + 1] = l[k][L + 1] = L;
90 continue;
91 }
92 i = 1;
93 mm = 1;
94 ll = 1;
95 while ((c = seq[k][mm])) {
96 if (MatchChr(c) == c && i <= L) //count match/delete states
97 {
98 l[k][i] = ll;
99 m[k][i] = mm;
100 i++;
101 }
102 if (WordChr(c))
103 ll++; //index of next residue
104 mm++;
105 }
106 l[k][i] = ll - 1; //set l[k][L+1] eq number of residues in seq k (-1 since there is no residue at L+1st match state)
107 m[k][i] = mm; //set m[k][L+1]
108 if ((i - 1) != L && !warned) {
109 HH_LOG(WARNING) << "Sequence " << sname[k] << " in HMM " << name
110 << " has " << i << " match states but should have " << L
111 << "\n";
112 warned = 1;
113 }
114 }
115
116 HH_LOG(DEBUG2) << " i chr m l\n";
117 for (i = 0; i <= L + 1; i++)
118 HH_LOG(DEBUG2) << i << " " << seq[0][m[0][i]] << " " << m[0][i]
119 << " " << l[0][i] << std::endl;
120 HH_LOG(DEBUG2) << std::endl;
121 }
122
123 /////////////////////////////////////////////////////////////////////////////////////
124 // Fill in insert states following match state i (without inserting '.' to fill up)
AddInserts(int i)125 void HalfAlignment::AddInserts(int i) {
126 for (int k = 0; k < n; k++) // for all sequences...
127 for (int mm = m[k][i] + 1; mm < m[k][i + 1]; mm++) // for all inserts between match state i and i+1...
128 s[k][h[k]++] = seq[k][mm]; // fill inserts into output alignment s[k]
129 }
130
131 /////////////////////////////////////////////////////////////////////////////////////
132 // Fill up alignment with gaps '.' to generate flush end (all h[k] equal)
FillUpGaps()133 void HalfAlignment::FillUpGaps() {
134 int k; //counts sequences
135 pos = 0;
136
137 // Determine max position h[k]
138 for (k = 0; k < n; k++)
139 pos = imax(h[k], pos);
140
141 // Fill in gaps up to pos
142 for (k = 0; k < n; k++) {
143 for (int hh = h[k]; hh < pos; hh++)
144 s[k][hh] = '.';
145 h[k] = pos;
146 }
147 }
148
149 /////////////////////////////////////////////////////////////////////////////////////
150 // Fill in insert states following match state i and fill up gaps with '.'
AddInsertsAndFillUpGaps(int i)151 void HalfAlignment::AddInsertsAndFillUpGaps(int i) {
152 AddInserts(i);
153 FillUpGaps();
154 }
155
156 /////////////////////////////////////////////////////////////////////////////////////
157 // Add gap column '.'
AddChar(char c)158 void HalfAlignment::AddChar(char c) {
159 for (int k = 0; k < n; k++)
160 s[k][h[k]++] = c;
161 pos++;
162 }
163
164 /////////////////////////////////////////////////////////////////////////////////////
165 // Add match state column i as is
AddColumn(int i)166 void HalfAlignment::AddColumn(int i) {
167 for (int k = 0; k < n; k++)
168 s[k][h[k]++] = seq[k][m[k][i]];
169 pos++;
170 }
171
172 /////////////////////////////////////////////////////////////////////////////////////
173 // Add match state column i as insert state
AddColumnAsInsert(int i)174 void HalfAlignment::AddColumnAsInsert(int i) {
175 char c;
176 for (int k = 0; k < n; k++)
177 if ((c = seq[k][m[k][i]]) != '-' && (c < '0' || c > '9'))
178 s[k][h[k]++] = InsertChr(c);
179 pos++;
180 }
181
182 /////////////////////////////////////////////////////////////////////////////////////
183 // Remove all characters c from template sequences
RemoveChars(char c)184 void HalfAlignment::RemoveChars(char c) {
185 int k, h, hh;
186 for (k = 0; k < n; k++) {
187 for (h = hh = 0; h < pos; h++)
188 if (s[k][h] != c)
189 s[k][hh++] = s[k][h];
190 s[k][++hh] = '\0';
191 }
192 }
193
194 /////////////////////////////////////////////////////////////////////////////////////
195 // Transform alignment sequences from A3M to A2M (insert ".")
BuildFASTA()196 void HalfAlignment::BuildFASTA() {
197 AddInserts(0);
198 FillUpGaps();
199 for (int i = 1; i <= L; i++) {
200 AddColumn(i);
201 AddInserts(i);
202 FillUpGaps();
203 }
204 ToFASTA();
205 }
206
207 /////////////////////////////////////////////////////////////////////////////////////
208 // Transform alignment sequences from A3M to A2M (insert ".")
BuildA2M()209 void HalfAlignment::BuildA2M() {
210 AddInserts(0);
211 FillUpGaps();
212 for (int i = 1; i <= L; i++) {
213 AddColumn(i);
214 AddInserts(i);
215 FillUpGaps();
216 }
217 AddChar('\0');
218 }
219
220 /////////////////////////////////////////////////////////////////////////////////////
221 // Transform alignment sequences from A3M to A2M (insert ".")
BuildA3M()222 void HalfAlignment::BuildA3M() {
223 AddInserts(0);
224 for (int i = 1; i <= L; i++) {
225 AddColumn(i);
226 AddInserts(i);
227 }
228 AddChar('\0');
229 }
230
231 /////////////////////////////////////////////////////////////////////////////////////
232 // Transform alignment sequences from A2M to FASTA ( lowercase to uppercase and '.' to '-')
ToFASTA()233 void HalfAlignment::ToFASTA() {
234 for (int k = 0; k < n; k++) {
235 uprstr(s[k]);
236 strtr(s[k], ".", "-");
237 }
238 }
239
240 /////////////////////////////////////////////////////////////////////////////////////
241 // Align query (HalfAlignment) to template (i.e. hit) match state structure
242 /////////////////////////////////////////////////////////////////////////////////////
AlignToTemplate(Hit & hit,const char outformat)243 void HalfAlignment::AlignToTemplate(Hit& hit, const char outformat) {
244 int i, j;
245 int step; // column of the HMM-HMM alignment (first:nstep, last:1)
246 char state;
247
248 if (0) { //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??
249 // If in global mode: Add part of alignment before first MM state
250 AddInserts(0); // Fill in insert states before first match state
251 for (i = 1; i < hit.i[hit.nsteps]; i++) {
252 AddColumnAsInsert(i);
253 AddInserts(i);
254 if (outformat <= 2)
255 FillUpGaps();
256 }
257 }
258
259 // Add endgaps (First state must be an MM state!!)
260 for (j = 1; j < hit.j[hit.nsteps]; j++) {
261 AddChar('-');
262 }
263
264 // Add alignment between first and last MM state
265 for (step = hit.nsteps; step >= 1; step--) {
266 state = hit.states[step];
267 i = hit.i[step];
268
269 switch (state) {
270 case MM: //MM pair state (both query and template in Match state)
271 AddColumn(i);
272 AddInserts(i);
273 break;
274 case DG: //D- state
275 case MI: //MI state
276 AddColumnAsInsert(i);
277 AddInserts(i);
278 break;
279 case GD: //-D state
280 case IM: //IM state
281 AddChar('-');
282 break;
283 }
284 if (outformat <= 2)
285 FillUpGaps();
286
287 }
288
289 if (0) { //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??
290
291 // If in global mode: Add part of alignment after last MM state
292 for (i = hit.i[1] + 1; i <= L; i++) {
293 AddColumnAsInsert(i);
294 AddInserts(i);
295 if (outformat == 2)
296 FillUpGaps();
297 }
298 }
299
300 // Add endgaps
301 for (j = hit.j[1] + 1; j <= hit.L; j++) {
302 AddChar('-');
303 }
304
305 // Add end-of-string character
306 AddChar('\0');
307 }
308
309 /////////////////////////////////////////////////////////////////////////////////////
310 // Write the a2m/a3m alignment into alnfile
311 /////////////////////////////////////////////////////////////////////////////////////
Print(char * alnfile,const char append,char * commentname,const char format[])312 void HalfAlignment::Print(char* alnfile, const char append, char* commentname,
313 const char format[]) {
314 int k; //counts sequences
315 int omitted = 0; // counts number of sequences with no residues in match states
316 FILE *outf;
317 char* tmp_name = new char[NAMELEN];
318 if (strcmp(alnfile, "stdout")) {
319 if (append)
320 outf = fopen(alnfile, "a");
321 else
322 outf = fopen(alnfile, "w");
323 if (!outf)
324 OpenFileError(alnfile, __FILE__, __LINE__, __func__);
325 } else
326 outf = stdout;
327
328 HH_LOG(DEBUG) << "Writing alignment to " << alnfile << "\n";
329
330 if (!format || strcmp(format, "psi")) {
331 if (commentname != NULL)
332 fprintf(outf, "#%s\n", commentname);
333
334 for (k = 0; k < n; k++)
335 if (k == nss_pred || k == nss_conf || k == nss_dssp || k == nsa_dssp) {
336 fprintf(outf, ">%s\n", sname[k]);
337 fprintf(outf, "%s\n", s[k]);
338 }
339 for (k = 0; k < n; k++) {
340 if (!(k == nss_pred || k == nss_conf || k == nss_dssp || k == nsa_dssp)) {
341 // Print sequence only if it contains at least one residue in a match state
342 if (1) //strpbrk(s[k],"ABCDEFGHIKLMNPQRSTUVWXYZ1234567890"))
343 {
344 fprintf(outf, ">%s\n", sname[k]);
345 fprintf(outf, "%s\n", s[k]);
346 } else {
347 omitted++;
348 HH_LOG(DEBUG)
349 << sname[k]
350 << " contains no residue in match state. Omitting sequence\n";
351 }
352 }
353 }
354 if (omitted) {
355 HH_LOG(INFO) << "Omitted " << omitted << " sequences in "
356 << alnfile
357 << " which contained no residue in match state\n";
358 }
359 } else {
360 for (k = 0; k < n; k++) {
361 strwrd(tmp_name, sname[k], NAMELEN);
362 fprintf(outf, "%-20.20s ", tmp_name);
363 char* ptr = s[k];
364 for (; *ptr != '\0'; ptr++)
365 if (*ptr == 45 || (*ptr >= 65 && *ptr <= 90))
366 fprintf(outf, "%c", *ptr);
367 fprintf(outf, "\n");
368 }
369 }
370 fclose(outf);
371 delete[] tmp_name;
372 }
373