1 /*
2 * PostScript output for Sequence / Structure Alignments
3 *
4 * c Ivo Hofacker, Peter F Stadler, Ronny Lorenz
5 * Vienna RNA package
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <math.h>
15 #include <string.h>
16 #include <ctype.h>
17 #include "ViennaRNA/model.h"
18 #include "ViennaRNA/fold_vars.h"
19 #include "ViennaRNA/utils/basic.h"
20 #include "ViennaRNA/utils/alignments.h"
21 #include "ViennaRNA/alphabet.h"
22 #include "ViennaRNA/plotting/alignments.h"
23
24 #include "ViennaRNA/static/templates_postscript.h"
25
26 #include "ViennaRNA/plotting/ps_helpers.inc"
27
28 /*
29 #################################
30 # PRIVATE MACROS #
31 #################################
32 */
33
34 /*
35 #################################
36 # GLOBAL VARIABLES #
37 #################################
38 */
39
40 /*
41 #################################
42 # PRIVATE VARIABLES #
43 #################################
44 */
45
46 /*
47 #################################
48 # PRIVATE FUNCTION DECLARATIONS #
49 #################################
50 */
51
52 /*
53 #################################
54 # BEGIN OF FUNCTION DEFINITIONS #
55 #################################
56 */
57 PUBLIC int
vrna_file_PS_aln(const char * filename,const char ** seqs,const char ** names,const char * structure,unsigned int columns)58 vrna_file_PS_aln(const char *filename,
59 const char **seqs,
60 const char **names,
61 const char *structure,
62 unsigned int columns)
63 {
64 return vrna_file_PS_aln_slice(filename,
65 seqs,
66 names,
67 structure,
68 0,
69 0,
70 0,
71 columns);
72 }
73
74
75 PUBLIC int
vrna_file_PS_aln_slice(const char * filename,const char ** seqs,const char ** names,const char * structure,unsigned int start,unsigned int end,int offset,unsigned int columns)76 vrna_file_PS_aln_slice(const char *filename,
77 const char **seqs,
78 const char **names,
79 const char *structure,
80 unsigned int start,
81 unsigned int end,
82 int offset,
83 unsigned int columns)
84 {
85 /* produce PS sequence alignment color-annotated by consensus structure */
86
87 int N, i, j, k, x, y, tmp, columnWidth, bbox[4];
88 char *tmpBuffer, *ssEscaped, *ruler, *cons, *substructure;
89 char c;
90 float fontWidth, fontHeight, imageHeight, imageWidth, tmpColumns;
91 int length, maxName, maxNum, currPos, num;
92 float lineStep, blockStep, consStep, ssStep, rulerStep, nameStep, numberStep;
93 float maxConsBar, startY, namesX, seqsX, currY;
94 float score, barHeight, xx, yy;
95 int match, block;
96 FILE *outfile;
97 short *pair_table;
98 char *colorMatrix[6][3] = {
99 { "0.0 1", "0.0 0.6", "0.0 0.2" }, /* red */
100 { "0.16 1", "0.16 0.6", "0.16 0.2" }, /* ochre */
101 { "0.32 1", "0.32 0.6", "0.32 0.2" }, /* turquoise */
102 { "0.48 1", "0.48 0.6", "0.48 0.2" }, /* green */
103 { "0.65 1", "0.65 0.6", "0.65 0.2" }, /* blue */
104 { "0.81 1", "0.81 0.6", "0.81 0.2" } /* violet */
105 };
106
107 vrna_md_t md;
108
109 set_model_details(&md);
110
111 outfile = fopen(filename, "w");
112
113 if (outfile == NULL) {
114 vrna_message_warning("can't open file %s - not doing alignment plot\n", filename);
115 return 0;
116 }
117
118 fontWidth = 6; /* Font metrics */
119 fontHeight = 6.5;
120 lineStep = fontHeight + 2; /* distance between lines */
121 blockStep = 3.5 * fontHeight; /* distance between blocks */
122 consStep = fontHeight * 0.5; /* distance between alignment and conservation curve */
123 ssStep = 2; /* distance between secondary structure line and sequences */
124 rulerStep = 2; /* distance between sequences and ruler */
125 nameStep = 3 * fontWidth; /* distance between names and sequences */
126 numberStep = fontWidth; /* distance between sequeces and numbers */
127 maxConsBar = 2.5 * fontHeight; /* Height of conservation curve */
128 startY = 2; /* "y origin" */
129 namesX = fontWidth; /* "x origin" */
130
131 if (start == 0)
132 start = 1;
133
134 if (end == 0)
135 end = (int)strlen(seqs[0]);
136
137 /* Number of columns of the alignment */
138 length = end - start + 1;
139
140 substructure = (char *)vrna_alloc(sizeof(char) * (length + 1));
141 substructure = memcpy(substructure, structure + start - 1, sizeof(char) * length);
142 substructure[length] = '\0';
143
144 /* remove unbalanced brackets ? */
145
146 /* Display long alignments in blocks of this size */
147 columnWidth = (columns == 0) ? length : columns;
148
149 /*
150 * Allocate memory for various strings, length*2 is (more than)
151 * enough for all of them
152 */
153 tmpBuffer = (char *)vrna_alloc((unsigned)MAX2(length * 2, columnWidth) + 1);
154 ssEscaped = (char *)vrna_alloc((unsigned)length * 2);
155 ruler = (char *)vrna_alloc((unsigned)length * 2);
156
157
158 /* Get length of longest name and count sequences in alignment*/
159
160 for (i = maxName = N = 0; names[i] != NULL; i++) {
161 N++;
162 tmp = strlen(names[i]);
163 if (tmp > maxName)
164 maxName = tmp;
165 }
166
167
168 /* x-coord. where sequences start */
169 seqsX = namesX + maxName * fontWidth + nameStep;
170
171 /* calculate number of digits of the alignment length */
172 snprintf(tmpBuffer, length, "%d", start + length + offset);
173 maxNum = strlen(tmpBuffer);
174
175 /* Calculate bounding box */
176 tmpColumns = columnWidth;
177 if (length < columnWidth)
178 tmpColumns = length;
179
180 imageWidth = ceil(namesX +
181 (maxName + tmpColumns + maxNum) * fontWidth +
182 2 * nameStep +
183 fontWidth +
184 numberStep);
185
186 imageHeight = startY +
187 ceil((float)length / columnWidth) *
188 ((N + 2) * lineStep + blockStep + consStep + ssStep + rulerStep);
189
190 bbox[0] = bbox[1] = 0;
191 bbox[2] = (int)imageWidth;
192 bbox[3] = (int)imageHeight;
193
194 /* Write postscript header including correct bounding box */
195 print_PS_header(outfile,
196 "ViennaRNA Package - Alignment",
197 bbox,
198 &md,
199 NULL,
200 "ALNdict",
201 PS_MACRO_ALN_BASE);
202
203 fprintf(outfile, "0 %d translate\n"
204 "1 -1 scale\n"
205 "/Courier findfont\n"
206 "[10 0 0 -10 0 0] makefont setfont\n",
207 (int)imageHeight);
208
209 /*
210 * Create ruler and secondary structure lines
211 * Init all with dots
212 */
213 memset(ruler, '.', sizeof(char) * length);
214
215 for (i = 0; i < length; i++) {
216 /* Write number every 10th position, leave out block breaks */
217 if (((i + start + offset) % 10 == 0) && (i % columnWidth != 0)) {
218 snprintf(tmpBuffer, length, "%d", i + start + offset);
219 num = strlen(tmpBuffer);
220 if (i + num <= length)
221 memcpy(ruler + i, tmpBuffer, num);
222 }
223 }
224 ruler[length] = '\0';
225
226 pair_table = vrna_ptable_from_string(substructure,
227 VRNA_BRACKETS_RND |
228 VRNA_BRACKETS_ANG |
229 VRNA_BRACKETS_SQR);
230
231 /*
232 * note: The substructrue and the corresponding pair_table
233 * are relative to the 'start' position, while the sequence(s)
234 * are not! Therefore we need to shift structure coordinates
235 * accordingly.
236 */
237 int shift = start - 1;
238
239 pair_table -= shift;
240
241 /*
242 * Draw color annotation first
243 * Repeat for all pairs
244 */
245 for (i = start; i <= end; i++) {
246 j = pair_table[i] + shift;
247 if ((j > i) && (j <= end)) {
248 /* Repeat for open and closing position */
249 for (k = 0; k < 2; k++) {
250 unsigned int tt;
251 int pairings, nonpair, s, col;
252 int ptype[8] = {
253 0, 0, 0, 0, 0, 0, 0, 0
254 };
255 char *color;
256 col = (k == 0) ? i - shift - 1 : j - shift - 1;
257 block = ceil((float)(col + 1) / columnWidth);
258 xx = seqsX + (col - (block - 1) * columnWidth) * fontWidth;
259
260 /* Repeat for each sequence */
261 for (s = pairings = nonpair = 0; s < N; s++) {
262 tt = md.pair[vrna_nucleotide_encode(seqs[s][i - 1], &md)]
263 [vrna_nucleotide_encode(seqs[s][j - 1], &md)];
264 ptype[tt]++;
265 }
266
267 for (pairings = 0, s = 1; s <= 7; s++)
268 if (ptype[s])
269 pairings++;
270
271 nonpair = ptype[0];
272 if (nonpair <= 2) {
273 color = colorMatrix[pairings - 1][nonpair];
274 for (s = 0; s < N; s++) {
275 yy = startY + (block - 1) * (lineStep * (N + 2) + blockStep + consStep + rulerStep) +
276 ssStep * (block) + (s + 1) * lineStep;
277
278 /* Color according due color information in pi-array, only if base pair is possible */
279 tt = md.pair[vrna_nucleotide_encode(seqs[s][i - 1], &md)]
280 [vrna_nucleotide_encode(seqs[s][j - 1], &md)];
281 if (tt)
282 fprintf(outfile, "%.1f %.1f %.1f %.1f %s box\n",
283 xx, yy - 1, xx + fontWidth, yy + fontHeight + 1, color);
284 }
285 }
286 }
287 }
288 }
289
290 pair_table += shift;
291 free(pair_table);
292
293 /* Process rest of the output in blocks of columnWidth */
294
295 currY = startY;
296 currPos = 0;
297
298 cons = vrna_aln_consensus_sequence(seqs, &md);
299
300 while (currPos < length) {
301 /* Display secondary structure line */
302 fprintf(outfile, "0 setgray\n");
303 strncpy(tmpBuffer, substructure + currPos, columnWidth);
304 tmpBuffer[columnWidth] = '\0';
305
306 x = 0;
307 y = 0;
308 while ((c = tmpBuffer[x])) {
309 if (c == '.') {
310 ssEscaped[y++] = '.';
311 } else {
312 ssEscaped[y++] = '\\';
313 ssEscaped[y++] = c;
314 }
315
316 x++;
317 }
318 ssEscaped[y] = '\0';
319
320 fprintf(outfile, "(%s) %.1f %.1f string\n", ssEscaped, seqsX, currY);
321 currY += ssStep + lineStep;
322
323 /* Display names, sequences and numbers */
324
325 for (i = 0; i < N; i++) {
326 unsigned int max_len = columnWidth;
327 if (length - currPos < max_len)
328 max_len = length - currPos;
329
330 strncpy(tmpBuffer, seqs[i] + currPos + shift, max_len);
331 tmpBuffer[max_len] = '\0';
332
333 match = 0;
334 for (j = 0; j < (currPos + strlen(tmpBuffer)); j++)
335 if (seqs[i][j + shift] != '-')
336 match++;
337
338 fprintf(outfile, "(%s) %.1f %.1f string\n", names[i], namesX, currY);
339 fprintf(outfile, "(%s) %.1f %.1f string\n", tmpBuffer, seqsX, currY);
340 fprintf(outfile,
341 "(%i) %.1f %.1f string\n",
342 match,
343 seqsX + fontWidth * (strlen(tmpBuffer)) + numberStep,
344 currY);
345 currY += lineStep;
346 }
347 currY += rulerStep;
348 strncpy(tmpBuffer, ruler + currPos, columnWidth);
349 tmpBuffer[columnWidth] = '\0';
350 fprintf(outfile, "(%s) %.1f %.1f string\n", tmpBuffer, seqsX, currY);
351
352 currY += lineStep;
353 currY += consStep;
354
355 /*Display conservation bar*/
356
357 fprintf(outfile, "0.6 setgray\n");
358 for (i = currPos; (i < currPos + columnWidth && i < length); i++) {
359 match = 0;
360 for (j = 0; j < N; j++) {
361 if (cons[i + shift] == toupper(seqs[j][i + shift]))
362 match++;
363
364 if (cons[i + shift] == 'U' && toupper(seqs[j][i + shift]) == 'T')
365 match++;
366
367 if (cons[i + shift] == 'T' && toupper(seqs[j][i + shift]) == 'U')
368 match++;
369 }
370 score = (float)(match - 1) / (N - 1);
371
372 if (cons[i + shift] == '-' ||
373 cons[i + shift] == '_' ||
374 cons[i + shift] == '.')
375 score = 0;
376
377 barHeight = maxConsBar * score;
378 if (barHeight == 0)
379 barHeight = 1;
380
381 xx = seqsX + (i - (columnWidth * currPos / columnWidth)) * fontWidth;
382
383 fprintf(outfile, "%.1f %.1f %.1f %.1f box2\n",
384 xx,
385 currY + maxConsBar - barHeight,
386 xx + fontWidth,
387 currY + maxConsBar);
388 }
389
390 currY += blockStep;
391 currPos += columnWidth;
392 }
393 free(cons);
394
395 print_PS_footer(outfile);
396
397 fclose(outfile);
398
399 free(tmpBuffer);
400 free(ssEscaped);
401 free(ruler);
402 free(substructure);
403
404 return 0;
405 }
406
407
408 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
409
410 /*
411 *###########################################
412 *# deprecated functions below #
413 *###########################################
414 */
415 PUBLIC int
PS_color_aln(const char * structure,const char * filename,const char * seqs[],const char * names[])416 PS_color_aln(const char *structure,
417 const char *filename,
418 const char *seqs[],
419 const char *names[])
420 {
421 return vrna_file_PS_aln(filename, seqs, names, structure, 60);
422 }
423
424
425 int
aliPS_color_aln(const char * structure,const char * filename,const char * seqs[],const char * names[])426 aliPS_color_aln(const char *structure,
427 const char *filename,
428 const char *seqs[],
429 const char *names[])
430 {
431 return vrna_file_PS_aln_slice(filename,
432 seqs,
433 names,
434 structure,
435 0,
436 0,
437 0,
438 100);
439 }
440
441
442 #endif
443