1 /*
2  *      PostScript output for Sequence / Structure Alignments
3  *
4  *               c  Ivo Hofacker, Peter F Stadler, Ronny Lorenz
5  *                        Vienna RNA package
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11 
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <math.h>
15 #include <string.h>
16 #include <ctype.h>
17 #include "ViennaRNA/model.h"
18 #include "ViennaRNA/fold_vars.h"
19 #include "ViennaRNA/utils/basic.h"
20 #include "ViennaRNA/utils/alignments.h"
21 #include "ViennaRNA/alphabet.h"
22 #include "ViennaRNA/plotting/alignments.h"
23 
24 #include "ViennaRNA/static/templates_postscript.h"
25 
26 #include "ViennaRNA/plotting/ps_helpers.inc"
27 
28 /*
29  #################################
30  # PRIVATE MACROS                #
31  #################################
32  */
33 
34 /*
35  #################################
36  # GLOBAL VARIABLES              #
37  #################################
38  */
39 
40 /*
41  #################################
42  # PRIVATE VARIABLES             #
43  #################################
44  */
45 
46 /*
47  #################################
48  # PRIVATE FUNCTION DECLARATIONS #
49  #################################
50  */
51 
52 /*
53  #################################
54  # BEGIN OF FUNCTION DEFINITIONS #
55  #################################
56  */
57 PUBLIC int
vrna_file_PS_aln(const char * filename,const char ** seqs,const char ** names,const char * structure,unsigned int columns)58 vrna_file_PS_aln(const char   *filename,
59                  const char   **seqs,
60                  const char   **names,
61                  const char   *structure,
62                  unsigned int columns)
63 {
64   return vrna_file_PS_aln_slice(filename,
65                                 seqs,
66                                 names,
67                                 structure,
68                                 0,
69                                 0,
70                                 0,
71                                 columns);
72 }
73 
74 
75 PUBLIC int
vrna_file_PS_aln_slice(const char * filename,const char ** seqs,const char ** names,const char * structure,unsigned int start,unsigned int end,int offset,unsigned int columns)76 vrna_file_PS_aln_slice(const char   *filename,
77                        const char   **seqs,
78                        const char   **names,
79                        const char   *structure,
80                        unsigned int start,
81                        unsigned int end,
82                        int          offset,
83                        unsigned int columns)
84 {
85   /* produce PS sequence alignment color-annotated by consensus structure */
86 
87   int       N, i, j, k, x, y, tmp, columnWidth, bbox[4];
88   char      *tmpBuffer, *ssEscaped, *ruler, *cons, *substructure;
89   char      c;
90   float     fontWidth, fontHeight, imageHeight, imageWidth, tmpColumns;
91   int       length, maxName, maxNum, currPos, num;
92   float     lineStep, blockStep, consStep, ssStep, rulerStep, nameStep, numberStep;
93   float     maxConsBar, startY, namesX, seqsX, currY;
94   float     score, barHeight, xx, yy;
95   int       match, block;
96   FILE      *outfile;
97   short     *pair_table;
98   char      *colorMatrix[6][3] = {
99     { "0.0 1",  "0.0 0.6",  "0.0 0.2"  },   /* red    */
100     { "0.16 1", "0.16 0.6", "0.16 0.2" },   /* ochre  */
101     { "0.32 1", "0.32 0.6", "0.32 0.2" },   /* turquoise */
102     { "0.48 1", "0.48 0.6", "0.48 0.2" },   /* green  */
103     { "0.65 1", "0.65 0.6", "0.65 0.2" },   /* blue   */
104     { "0.81 1", "0.81 0.6", "0.81 0.2" }    /* violet */
105   };
106 
107   vrna_md_t md;
108 
109   set_model_details(&md);
110 
111   outfile = fopen(filename, "w");
112 
113   if (outfile == NULL) {
114     vrna_message_warning("can't open file %s - not doing alignment plot\n", filename);
115     return 0;
116   }
117 
118   fontWidth   = 6;                              /* Font metrics */
119   fontHeight  = 6.5;
120   lineStep    = fontHeight + 2;                 /* distance between lines */
121   blockStep   = 3.5 * fontHeight;               /* distance between blocks */
122   consStep    = fontHeight * 0.5;               /* distance between alignment and conservation curve */
123   ssStep      = 2;                              /* distance between secondary structure line and sequences */
124   rulerStep   = 2;                              /* distance between sequences and ruler */
125   nameStep    = 3 * fontWidth;                  /* distance between names and sequences */
126   numberStep  = fontWidth;                      /* distance between sequeces and numbers */
127   maxConsBar  = 2.5 * fontHeight;               /* Height of conservation curve */
128   startY      = 2;                              /* "y origin" */
129   namesX      = fontWidth;                      /* "x origin" */
130 
131   if (start == 0)
132     start = 1;
133 
134   if (end == 0)
135     end = (int)strlen(seqs[0]);
136 
137   /* Number of columns of the alignment */
138   length = end - start + 1;
139 
140   substructure          = (char *)vrna_alloc(sizeof(char) * (length + 1));
141   substructure          = memcpy(substructure, structure + start - 1, sizeof(char) * length);
142   substructure[length]  = '\0';
143 
144   /* remove unbalanced brackets ? */
145 
146   /* Display long alignments in blocks of this size */
147   columnWidth = (columns == 0) ? length : columns;
148 
149   /*
150    *  Allocate memory for various strings, length*2 is (more than)
151    *  enough for all of them
152    */
153   tmpBuffer = (char *)vrna_alloc((unsigned)MAX2(length * 2, columnWidth) + 1);
154   ssEscaped = (char *)vrna_alloc((unsigned)length * 2);
155   ruler     = (char *)vrna_alloc((unsigned)length * 2);
156 
157 
158   /* Get length of longest name and count sequences in alignment*/
159 
160   for (i = maxName = N = 0; names[i] != NULL; i++) {
161     N++;
162     tmp = strlen(names[i]);
163     if (tmp > maxName)
164       maxName = tmp;
165   }
166 
167 
168   /* x-coord. where sequences start */
169   seqsX = namesX + maxName * fontWidth + nameStep;
170 
171   /* calculate number of digits of the alignment length */
172   snprintf(tmpBuffer, length, "%d", start + length + offset);
173   maxNum = strlen(tmpBuffer);
174 
175   /* Calculate bounding box */
176   tmpColumns = columnWidth;
177   if (length < columnWidth)
178     tmpColumns = length;
179 
180   imageWidth = ceil(namesX +
181                     (maxName + tmpColumns + maxNum) * fontWidth +
182                     2 * nameStep +
183                     fontWidth +
184                     numberStep);
185 
186   imageHeight = startY +
187                 ceil((float)length / columnWidth) *
188                 ((N + 2) * lineStep + blockStep + consStep + ssStep + rulerStep);
189 
190   bbox[0] = bbox[1] = 0;
191   bbox[2] = (int)imageWidth;
192   bbox[3] = (int)imageHeight;
193 
194   /* Write postscript header including correct bounding box */
195   print_PS_header(outfile,
196                   "ViennaRNA Package - Alignment",
197                   bbox,
198                   &md,
199                   NULL,
200                   "ALNdict",
201                   PS_MACRO_ALN_BASE);
202 
203   fprintf(outfile, "0 %d translate\n"
204           "1 -1 scale\n"
205           "/Courier findfont\n"
206           "[10 0 0 -10 0 0] makefont setfont\n",
207           (int)imageHeight);
208 
209   /*
210    * Create ruler and secondary structure lines
211    * Init all with dots
212    */
213   memset(ruler, '.', sizeof(char) * length);
214 
215   for (i = 0; i < length; i++) {
216     /* Write number every 10th position, leave out block breaks */
217     if (((i + start + offset) % 10 == 0) && (i % columnWidth != 0)) {
218       snprintf(tmpBuffer, length, "%d", i + start + offset);
219       num = strlen(tmpBuffer);
220       if (i + num <= length)
221         memcpy(ruler + i, tmpBuffer, num);
222     }
223   }
224   ruler[length] = '\0';
225 
226   pair_table = vrna_ptable_from_string(substructure,
227                                        VRNA_BRACKETS_RND |
228                                        VRNA_BRACKETS_ANG |
229                                        VRNA_BRACKETS_SQR);
230 
231   /*
232    *  note: The substructrue and the corresponding pair_table
233    *  are relative to the 'start' position, while the sequence(s)
234    *  are not! Therefore we need to shift structure coordinates
235    *  accordingly.
236    */
237   int shift = start - 1;
238 
239   pair_table -= shift;
240 
241   /*
242    * Draw color annotation first
243    * Repeat for all pairs
244    */
245   for (i = start; i <= end; i++) {
246     j = pair_table[i] + shift;
247     if ((j > i) && (j <= end)) {
248       /* Repeat for open and closing position */
249       for (k = 0; k < 2; k++) {
250         unsigned int  tt;
251         int           pairings, nonpair, s, col;
252         int           ptype[8] = {
253           0, 0, 0, 0, 0, 0, 0, 0
254         };
255         char          *color;
256         col   = (k == 0) ? i - shift - 1 : j - shift - 1;
257         block = ceil((float)(col + 1) / columnWidth);
258         xx    = seqsX + (col - (block - 1) * columnWidth) * fontWidth;
259 
260         /* Repeat for each sequence */
261         for (s = pairings = nonpair = 0; s < N; s++) {
262           tt = md.pair[vrna_nucleotide_encode(seqs[s][i - 1], &md)]
263                [vrna_nucleotide_encode(seqs[s][j - 1], &md)];
264           ptype[tt]++;
265         }
266 
267         for (pairings = 0, s = 1; s <= 7; s++)
268           if (ptype[s])
269             pairings++;
270 
271         nonpair = ptype[0];
272         if (nonpair <= 2) {
273           color = colorMatrix[pairings - 1][nonpair];
274           for (s = 0; s < N; s++) {
275             yy = startY + (block - 1) * (lineStep * (N + 2) + blockStep + consStep + rulerStep) +
276                  ssStep * (block) + (s + 1) * lineStep;
277 
278             /* Color according due color information in pi-array, only if base pair is possible */
279             tt = md.pair[vrna_nucleotide_encode(seqs[s][i - 1], &md)]
280                  [vrna_nucleotide_encode(seqs[s][j - 1], &md)];
281             if (tt)
282               fprintf(outfile, "%.1f %.1f %.1f %.1f %s box\n",
283                       xx, yy - 1, xx + fontWidth, yy + fontHeight + 1, color);
284           }
285         }
286       }
287     }
288   }
289 
290   pair_table += shift;
291   free(pair_table);
292 
293   /* Process rest of the output in blocks of columnWidth */
294 
295   currY   = startY;
296   currPos = 0;
297 
298   cons = vrna_aln_consensus_sequence(seqs, &md);
299 
300   while (currPos < length) {
301     /* Display secondary structure line */
302     fprintf(outfile, "0 setgray\n");
303     strncpy(tmpBuffer, substructure + currPos, columnWidth);
304     tmpBuffer[columnWidth] = '\0';
305 
306     x = 0;
307     y = 0;
308     while ((c = tmpBuffer[x])) {
309       if (c == '.') {
310         ssEscaped[y++] = '.';
311       } else {
312         ssEscaped[y++]  = '\\';
313         ssEscaped[y++]  = c;
314       }
315 
316       x++;
317     }
318     ssEscaped[y] = '\0';
319 
320     fprintf(outfile, "(%s) %.1f %.1f string\n", ssEscaped, seqsX, currY);
321     currY += ssStep + lineStep;
322 
323     /* Display names, sequences and numbers */
324 
325     for (i = 0; i < N; i++) {
326       unsigned int max_len = columnWidth;
327       if (length - currPos < max_len)
328         max_len = length - currPos;
329 
330       strncpy(tmpBuffer, seqs[i] + currPos + shift, max_len);
331       tmpBuffer[max_len] = '\0';
332 
333       match = 0;
334       for (j = 0; j < (currPos + strlen(tmpBuffer)); j++)
335         if (seqs[i][j + shift] != '-')
336           match++;
337 
338       fprintf(outfile, "(%s) %.1f %.1f string\n", names[i], namesX, currY);
339       fprintf(outfile, "(%s) %.1f %.1f string\n", tmpBuffer, seqsX, currY);
340       fprintf(outfile,
341               "(%i) %.1f %.1f string\n",
342               match,
343               seqsX + fontWidth * (strlen(tmpBuffer)) + numberStep,
344               currY);
345       currY += lineStep;
346     }
347     currY += rulerStep;
348     strncpy(tmpBuffer, ruler + currPos, columnWidth);
349     tmpBuffer[columnWidth] = '\0';
350     fprintf(outfile, "(%s) %.1f %.1f string\n", tmpBuffer, seqsX, currY);
351 
352     currY += lineStep;
353     currY += consStep;
354 
355     /*Display conservation bar*/
356 
357     fprintf(outfile, "0.6 setgray\n");
358     for (i = currPos; (i < currPos + columnWidth && i < length); i++) {
359       match = 0;
360       for (j = 0; j < N; j++) {
361         if (cons[i + shift] == toupper(seqs[j][i + shift]))
362           match++;
363 
364         if (cons[i + shift] == 'U' && toupper(seqs[j][i + shift]) == 'T')
365           match++;
366 
367         if (cons[i + shift] == 'T' && toupper(seqs[j][i + shift]) == 'U')
368           match++;
369       }
370       score = (float)(match - 1) / (N - 1);
371 
372       if (cons[i + shift] == '-' ||
373           cons[i + shift] == '_' ||
374           cons[i + shift] == '.')
375         score = 0;
376 
377       barHeight = maxConsBar * score;
378       if (barHeight == 0)
379         barHeight = 1;
380 
381       xx = seqsX + (i - (columnWidth * currPos / columnWidth)) * fontWidth;
382 
383       fprintf(outfile, "%.1f %.1f %.1f %.1f box2\n",
384               xx,
385               currY + maxConsBar - barHeight,
386               xx + fontWidth,
387               currY + maxConsBar);
388     }
389 
390     currY   += blockStep;
391     currPos += columnWidth;
392   }
393   free(cons);
394 
395   print_PS_footer(outfile);
396 
397   fclose(outfile);
398 
399   free(tmpBuffer);
400   free(ssEscaped);
401   free(ruler);
402   free(substructure);
403 
404   return 0;
405 }
406 
407 
408 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
409 
410 /*
411  *###########################################
412  *# deprecated functions below              #
413  *###########################################
414  */
415 PUBLIC int
PS_color_aln(const char * structure,const char * filename,const char * seqs[],const char * names[])416 PS_color_aln(const char *structure,
417              const char *filename,
418              const char *seqs[],
419              const char *names[])
420 {
421   return vrna_file_PS_aln(filename, seqs, names, structure, 60);
422 }
423 
424 
425 int
aliPS_color_aln(const char * structure,const char * filename,const char * seqs[],const char * names[])426 aliPS_color_aln(const char  *structure,
427                 const char  *filename,
428                 const char  *seqs[],
429                 const char  *names[])
430 {
431   return vrna_file_PS_aln_slice(filename,
432                                 seqs,
433                                 names,
434                                 structure,
435                                 0,
436                                 0,
437                                 0,
438                                 100);
439 }
440 
441 
442 #endif
443