1 /** \file fold_compound.c **/
2 
3 /*
4  *                Fold Compound API
5  *
6  *                This file contains everything necessary to create
7  *                and destroy data structures of type vrna_fold_compound_s,
8  *                the most fundamental data structure throughout RNAlib
9  *
10  *                c Ronny Lorenz
11  *
12  *                ViennaRNA package
13  */
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <math.h>
22 #include <ctype.h>
23 #include <string.h>
24 #include <limits.h>
25 
26 #include "ViennaRNA/utils/basic.h"
27 #include "ViennaRNA/utils/structures.h"
28 #include "ViennaRNA/utils/strings.h"
29 #include "ViennaRNA/params/default.h"
30 #include "ViennaRNA/datastructures/basic.h"
31 #include "ViennaRNA/fold_vars.h"
32 #include "ViennaRNA/params/basic.h"
33 #include "ViennaRNA/gquad.h"
34 #include "ViennaRNA/utils/alignments.h"
35 #include "ViennaRNA/ribo.h"
36 #include "ViennaRNA/constraints/hard.h"
37 #include "ViennaRNA/constraints/soft.h"
38 #include "ViennaRNA/part_func.h"
39 #include "ViennaRNA/cofold.h"
40 #include "ViennaRNA/mm.h"
41 #include "ViennaRNA/alphabet.h"
42 #include "ViennaRNA/fold_compound.h"
43 
44 /*
45  #################################
46  # PRIVATE MACROS                #
47  #################################
48  */
49 
50 #define WITH_PTYPE          1L    /* passed to set_fold_compound() to indicate that we need to set fc->ptype */
51 #define WITH_PTYPE_COMPAT   2L    /* passed to set_fold_compound() to indicate that we need to set fc->ptype_compat */
52 
53 /*
54  #################################
55  # GLOBAL VARIABLES              #
56  #################################
57  */
58 
59 /*
60  #################################
61  # PRIVATE VARIABLES             #
62  #################################
63  */
64 
65 /*
66  #################################
67  # PRIVATE FUNCTION DECLARATIONS #
68  #################################
69  */
70 PRIVATE void
71 set_fold_compound(vrna_fold_compound_t  *fc,
72                   unsigned int          options,
73                   unsigned int          aux);
74 
75 
76 PRIVATE void
77 make_pscores(vrna_fold_compound_t *fc);
78 
79 
80 PRIVATE void
81 sanitize_bp_span(vrna_fold_compound_t *fc,
82                  unsigned int         options);
83 
84 
85 PRIVATE void
86 add_params(vrna_fold_compound_t *fc,
87            vrna_md_t            *md_p,
88            unsigned int         options);
89 
90 
91 PRIVATE vrna_fold_compound_t *
92 init_fc_single(void);
93 
94 
95 PRIVATE vrna_fold_compound_t *
96 init_fc_comparative(void);
97 
98 
99 PRIVATE INLINE void
100 nullify(vrna_fold_compound_t *fc);
101 
102 
103 /*
104  #################################
105  # BEGIN OF FUNCTION DEFINITIONS #
106  #################################
107  */
108 PUBLIC void
vrna_fold_compound_free(vrna_fold_compound_t * fc)109 vrna_fold_compound_free(vrna_fold_compound_t *fc)
110 {
111   int s;
112 
113   if (fc) {
114     /* first destroy common attributes */
115     vrna_mx_mfe_free(fc);
116     vrna_mx_pf_free(fc);
117     free(fc->iindx);
118     free(fc->jindx);
119     free(fc->params);
120     free(fc->exp_params);
121 
122     vrna_hc_free(fc->hc);
123     vrna_ud_remove(fc);
124     vrna_sequence_remove_all(fc);
125 
126     /* now distinguish the fc type */
127     switch (fc->type) {
128       case VRNA_FC_TYPE_SINGLE:
129         free(fc->sequence);
130         free(fc->sequence_encoding);
131         free(fc->sequence_encoding2);
132         free(fc->ptype);
133         free(fc->ptype_pf_compat);
134         vrna_sc_free(fc->sc);
135         break;
136       case VRNA_FC_TYPE_COMPARATIVE:
137         for (s = 0; s < fc->n_seq; s++) {
138           free(fc->sequences[s]);
139           free(fc->S[s]);
140           free(fc->S5[s]);
141           free(fc->S3[s]);
142           free(fc->Ss[s]);
143           free(fc->a2s[s]);
144         }
145         free(fc->sequences);
146         free(fc->cons_seq);
147         free(fc->S_cons);
148         free(fc->S);
149         free(fc->S5);
150         free(fc->S3);
151         free(fc->Ss);
152         free(fc->a2s);
153         free(fc->pscore);
154         free(fc->pscore_pf_compat);
155         if (fc->scs) {
156           for (s = 0; s < fc->n_seq; s++)
157             vrna_sc_free(fc->scs[s]);
158           free(fc->scs);
159         }
160 
161         break;
162       default:                      /* do nothing */
163         break;
164     }
165 
166     /* free Distance Class Partitioning stuff (should be NULL if not used) */
167     free(fc->reference_pt1);
168     free(fc->reference_pt2);
169     free(fc->referenceBPs1);
170     free(fc->referenceBPs2);
171     free(fc->bpdist);
172     free(fc->mm1);
173     free(fc->mm2);
174 
175     /* free local folding related stuff (should be NULL if not used) */
176     free(fc->ptype_local);
177     free(fc->pscore_local);
178 #ifdef VRNA_WITH_SVM
179     vrna_zsc_filter_free(fc);
180 #endif
181 
182     if (fc->free_auxdata)
183       fc->free_auxdata(fc->auxdata);
184 
185     free(fc);
186   }
187 }
188 
189 
190 PUBLIC vrna_fold_compound_t *
vrna_fold_compound(const char * sequence,const vrna_md_t * md_p,unsigned int options)191 vrna_fold_compound(const char       *sequence,
192                    const vrna_md_t  *md_p,
193                    unsigned int     options)
194 {
195   unsigned int          length, aux_options;
196   vrna_fold_compound_t  *fc;
197   vrna_md_t             md;
198 
199   if (sequence == NULL)
200     return NULL;
201 
202   /* sanity check */
203   length = strlen(sequence);
204   if (length == 0) {
205     vrna_message_warning("vrna_fold_compound@data_structures.c: "
206                          "sequence length must be greater 0");
207     return NULL;
208   }
209 
210   if (length > vrna_sequence_length_max(options)) {
211     vrna_message_warning("vrna_fold_compound@data_structures.c: "
212                          "sequence length of %d exceeds addressable range",
213                          length);
214     return NULL;
215   }
216 
217   fc = init_fc_single();
218 
219   fc->length    = length;
220   fc->sequence  = strdup(sequence);
221 
222   aux_options = 0L;
223 
224 
225   /* get a copy of the model details */
226   if (md_p)
227     md = *md_p;
228   else
229     vrna_md_set_default(&md);
230 
231   /* now for the energy parameters */
232   add_params(fc, &md, options);
233 
234   sanitize_bp_span(fc, options);
235 
236   if (options & VRNA_OPTION_WINDOW) {
237     set_fold_compound(fc, options, aux_options);
238 
239     if (!(options & VRNA_OPTION_EVAL_ONLY)) {
240       /* add minimal hard constraint data structure */
241       vrna_hc_init_window(fc);
242 
243       /* add DP matrices */
244       vrna_mx_add(fc, VRNA_MX_WINDOW, options);
245     }
246   } else {
247     /* regular global structure prediction */
248     aux_options |= WITH_PTYPE;
249 
250     if (options & VRNA_OPTION_PF)
251       aux_options |= WITH_PTYPE_COMPAT;
252 
253     set_fold_compound(fc, options, aux_options);
254 
255     if (!(options & VRNA_OPTION_EVAL_ONLY)) {
256       /* add default hard constraints */
257       vrna_hc_init(fc);
258 
259       /* add DP matrices (if required) */
260       vrna_mx_add(fc, VRNA_MX_DEFAULT, options);
261     }
262   }
263 
264   return fc;
265 }
266 
267 
268 PUBLIC vrna_fold_compound_t *
vrna_fold_compound_comparative(const char ** sequences,vrna_md_t * md_p,unsigned int options)269 vrna_fold_compound_comparative(const char   **sequences,
270                                vrna_md_t    *md_p,
271                                unsigned int options)
272 {
273   return vrna_fold_compound_comparative2(sequences,
274                                          NULL,
275                                          NULL,
276                                          NULL,
277                                          NULL,
278                                          md_p,
279                                          options);
280 }
281 
282 
283 PUBLIC vrna_fold_compound_t *
vrna_fold_compound_comparative2(const char ** sequences,const char ** names,const unsigned char * orientation,const unsigned long long * start,const unsigned long long * genome_size,vrna_md_t * md_p,unsigned int options)284 vrna_fold_compound_comparative2(const char                **sequences,
285                                 const char                **names,
286                                 const unsigned char       *orientation,
287                                 const unsigned long long  *start,
288                                 const unsigned long long  *genome_size,
289                                 vrna_md_t                 *md_p,
290                                 unsigned int              options)
291 {
292   int                   s, n_seq, length;
293   vrna_fold_compound_t  *fc;
294   vrna_md_t             md;
295   unsigned int          aux_options;
296 
297   aux_options = 0U;
298 
299   if (sequences == NULL)
300     return NULL;
301 
302   for (s = 0; sequences[s]; s++);  /* count the sequences */
303 
304   n_seq = s;
305 
306   length = strlen(sequences[0]);
307   /* sanity check */
308   if (length == 0) {
309     vrna_message_warning("vrna_fold_compound_comparative: "
310                          "sequence length must be greater 0");
311   } else if (length > vrna_sequence_length_max(options)) {
312     vrna_message_warning("vrna_fold_compound_comparative: "
313                          "sequence length of %d exceeds addressable range",
314                          length);
315   }
316 
317   for (s = 0; s < n_seq; s++)
318     if (strlen(sequences[s]) != length) {
319       vrna_message_warning("vrna_fold_compound_comparative: "
320                            "uneqal sequence lengths in alignment");
321       return NULL;
322     }
323 
324   fc = init_fc_comparative();
325 
326   if (fc) {
327     fc->n_seq     = n_seq;
328     fc->length    = length;
329 
330     /* get a copy of the model details */
331     if (md_p)
332       md = *md_p;
333     else /* this fallback relies on global parameters and thus is not threadsafe */
334       vrna_md_set_default(&md);
335 
336     /* now for the energy parameters */
337     add_params(fc, &md, options);
338 
339     sanitize_bp_span(fc, options);
340 
341     vrna_msa_add( fc,
342                   sequences,
343                   names,
344                   orientation,
345                   start,
346                   genome_size,
347                   VRNA_SEQUENCE_RNA);
348 
349     fc->sequences = vrna_alloc(sizeof(char *) * (fc->n_seq + 1));
350     for (s = 0; sequences[s]; s++)
351       fc->sequences[s] = strdup(sequences[s]);
352 
353     if (options & VRNA_OPTION_WINDOW) {
354       set_fold_compound(fc, options, aux_options);
355 
356       fc->pscore_local = vrna_alloc(sizeof(int *) * (fc->length + 1));
357 
358 #if 0
359       for (i = (int)fc->length; (i > (int)fc->length - fc->window_size - 5) && (i >= 0); i--)
360         fc->pscore_local[i] = vrna_alloc(sizeof(int) * (fc->window_size + 5));
361 #endif
362 
363       if (!(options & VRNA_OPTION_EVAL_ONLY)) {
364         /* add minimal hard constraint data structure */
365         vrna_hc_init_window(fc);
366 
367         /* add DP matrices */
368         vrna_mx_add(fc, VRNA_MX_WINDOW, options);
369       }
370     } else {
371       /* regular global structure prediction */
372 
373       aux_options |= WITH_PTYPE;
374 
375       if (options & VRNA_OPTION_PF)
376         aux_options |= WITH_PTYPE_COMPAT;
377 
378       set_fold_compound(fc, options, aux_options);
379 
380       make_pscores(fc);
381 
382       if (!(options & VRNA_OPTION_EVAL_ONLY)) {
383         /* add default hard constraints */
384         vrna_hc_init(fc);
385 
386         /* add DP matrices (if required) */
387         vrna_mx_add(fc, VRNA_MX_DEFAULT, options);
388       }
389     }
390   }
391 
392   return fc;
393 }
394 
395 
396 PUBLIC vrna_fold_compound_t *
vrna_fold_compound_TwoD(const char * sequence,const char * s1,const char * s2,vrna_md_t * md_p,unsigned int options)397 vrna_fold_compound_TwoD(const char    *sequence,
398                         const char    *s1,
399                         const char    *s2,
400                         vrna_md_t     *md_p,
401                         unsigned int  options)
402 {
403   int                   length, l, turn;
404   vrna_fold_compound_t  *fc;
405   vrna_md_t             md;
406 
407 
408   if (sequence == NULL)
409     return NULL;
410 
411   /* sanity check */
412   length = strlen(sequence);
413   if (length == 0) {
414     vrna_message_warning("vrna_fold_compound_TwoD: "
415                          "sequence length must be greater 0");
416     return NULL;
417   } else if (length > vrna_sequence_length_max(options)) {
418     vrna_message_warning("vrna_fold_compound_TwoD: "
419                          "sequence length of %d exceeds addressable range",
420                          length);
421     return NULL;
422   }
423 
424   l = strlen(s1);
425   if (l != length) {
426     vrna_message_warning("vrna_fold_compound_TwoD: "
427                          "sequence and s1 differ in length");
428     return NULL;
429   }
430 
431   l = strlen(s2);
432   if (l != length) {
433     vrna_message_warning("vrna_fold_compound_TwoD: "
434                          "sequence and s2 differ in length");
435     return NULL;
436   }
437 
438   fc            = init_fc_single();
439   if (fc) {
440     fc->length    = length;
441     fc->sequence  = strdup(sequence);
442 
443     /* get a copy of the model details */
444     if (md_p)
445       md = *md_p;
446     else /* this fallback relies on global parameters and thus is not threadsafe */
447       vrna_md_set_default(&md);
448 
449     /* always make uniq ML decomposition ! */
450     md.uniq_ML      = 1;
451     md.compute_bpp  = 0;
452 
453     /* now for the energy parameters */
454     add_params(fc, &md, options);
455 
456     set_fold_compound(fc, options, WITH_PTYPE | WITH_PTYPE_COMPAT);
457 
458     if (!(options & VRNA_OPTION_EVAL_ONLY)) {
459       vrna_hc_init(fc); /* add default hard constraints */
460 
461       /* add DP matrices */
462       vrna_mx_add(fc, VRNA_MX_2DFOLD, options);
463     }
464 
465     /* set all fields that are unique to Distance class partitioning... */
466     turn              = fc->params->model_details.min_loop_size;
467     fc->reference_pt1 = vrna_ptable(s1);
468     fc->reference_pt2 = vrna_ptable(s2);
469     fc->referenceBPs1 = vrna_refBPcnt_matrix(fc->reference_pt1, turn);
470     fc->referenceBPs2 = vrna_refBPcnt_matrix(fc->reference_pt2, turn);
471     fc->bpdist        = vrna_refBPdist_matrix(fc->reference_pt1, fc->reference_pt2, turn);
472     /* compute maximum matching with reference structure 1 disallowed */
473     fc->mm1 = maximumMatchingConstraint(fc->sequence, fc->reference_pt1);
474     /* compute maximum matching with reference structure 2 disallowed */
475     fc->mm2 = maximumMatchingConstraint(fc->sequence, fc->reference_pt2);
476 
477     fc->maxD1 = fc->mm1[fc->iindx[1] - length] + fc->referenceBPs1[fc->iindx[1] - length];
478     fc->maxD2 = fc->mm2[fc->iindx[1] - length] + fc->referenceBPs2[fc->iindx[1] - length];
479   }
480 
481   return fc;
482 }
483 
484 
485 PUBLIC void
vrna_fold_compound_add_auxdata(vrna_fold_compound_t * fc,void * data,vrna_callback_free_auxdata * f)486 vrna_fold_compound_add_auxdata(vrna_fold_compound_t       *fc,
487                                void                       *data,
488                                vrna_callback_free_auxdata *f)
489 {
490   if (fc && data) {
491     if (fc->free_auxdata) /* free pre-existing auxdata */
492       fc->free_auxdata(fc->auxdata);
493 
494     fc->auxdata       = data;
495     fc->free_auxdata  = f;
496   }
497 }
498 
499 
500 PUBLIC void
vrna_fold_compound_add_callback(vrna_fold_compound_t * fc,vrna_callback_recursion_status * f)501 vrna_fold_compound_add_callback(vrna_fold_compound_t            *fc,
502                                 vrna_callback_recursion_status  *f)
503 {
504   if (fc && f)
505     fc->stat_cb = f;
506 }
507 
508 
509 PUBLIC int
vrna_fold_compound_prepare(vrna_fold_compound_t * fc,unsigned int options)510 vrna_fold_compound_prepare(vrna_fold_compound_t *fc,
511                            unsigned int         options)
512 {
513   int ret = 1; /* success */
514 
515   /* check maximum sequence length restrictions */
516   if (fc->length > vrna_sequence_length_max(options)) {
517     vrna_message_warning(
518       "vrna_fold_compound_prepare@data_structures.c: sequence length of %d exceeds addressable range",
519       fc->length);
520     return 0;
521   }
522 
523   /* make sure to always provide sane bp-span settings */
524   sanitize_bp_span(fc, options);
525 
526   /* prepare Boltzmann factors if required */
527   vrna_params_prepare(fc, options);
528 
529   /* prepare ptype array(s) */
530   vrna_ptypes_prepare(fc, options);
531 
532   if (options & VRNA_OPTION_PF) {
533     /* prepare for partition function computation */
534 
535     switch (fc->type) {
536       case VRNA_FC_TYPE_SINGLE:     /* get pre-computed Boltzmann factors if not present*/
537         if (fc->domains_up)                            /* turn on unique ML decomposition with qm1 array */
538           fc->exp_params->model_details.uniq_ML = 1;
539 
540         break;
541 
542       default:
543         /* not doing anything here... */
544         break;
545     }
546   }
547 
548   /* prepare hard constraints */
549   vrna_hc_prepare(fc, options);
550 
551   /* prepare soft constraints data structure, if required */
552   vrna_sc_prepare(fc, options);
553 
554   /* Add DP matrices, if not they are not present or do not fit current settings */
555   vrna_mx_prepare(fc, options);
556 
557   return ret;
558 }
559 
560 
561 /*
562  #####################################
563  # BEGIN OF STATIC HELPER FUNCTIONS  #
564  #####################################
565  */
566 PRIVATE void
sanitize_bp_span(vrna_fold_compound_t * fc,unsigned int options)567 sanitize_bp_span(vrna_fold_compound_t *fc,
568                  unsigned int         options)
569 {
570   vrna_md_t *md;
571 
572   md = &(fc->params->model_details);
573 
574   /* make sure that min_loop_size, max_bp_span, and window_size are sane */
575   if (options & VRNA_OPTION_WINDOW) {
576     if (md->window_size <= 0)
577       md->window_size = (int)fc->length;
578     else if (md->window_size > (int)fc->length)
579       md->window_size = (int)fc->length;
580 
581     fc->window_size = md->window_size;
582   } else {
583     /* non-local fold mode */
584     md->window_size = (int)fc->length;
585   }
586 
587   if ((md->max_bp_span <= 0) || (md->max_bp_span > md->window_size))
588     md->max_bp_span = md->window_size;
589 }
590 
591 
592 PRIVATE void
add_params(vrna_fold_compound_t * fc,vrna_md_t * md_p,unsigned int options)593 add_params(vrna_fold_compound_t *fc,
594            vrna_md_t            *md_p,
595            unsigned int         options)
596 {
597   /*
598    * ALWAYS provide regular energy parameters
599    * remove previous parameters if present and they differ from current model
600    */
601   if (fc->params) {
602     if (memcmp(md_p, &(fc->params->model_details), sizeof(vrna_md_t)) != 0) {
603       free(fc->params);
604       fc->params = NULL;
605     }
606   }
607 
608   if (!fc->params)
609     fc->params = vrna_params(md_p);
610 
611   vrna_params_prepare(fc, options);
612 }
613 
614 
615 PRIVATE void
set_fold_compound(vrna_fold_compound_t * fc,unsigned int options,unsigned int aux)616 set_fold_compound(vrna_fold_compound_t  *fc,
617                   unsigned int          options,
618                   unsigned int          aux)
619 {
620   char          *sequence, **sequences, **ptr;
621   unsigned int  length, s;
622   vrna_md_t     *md_p;
623 
624   sequence  = NULL;
625   sequences = NULL;
626 
627   md_p = &(fc->params->model_details);
628 
629   switch (fc->type) {
630     case VRNA_FC_TYPE_SINGLE:
631       sequence = fc->sequence;
632 
633       fc->sequence  = NULL;
634       fc->length    = 0;
635 
636       /* split input sequences at default delimiter '&' */
637       sequences = vrna_strsplit(sequence, NULL);
638 
639       /* add individual sequences to fold compound */
640       for (ptr = sequences; *ptr; ptr++) {
641         vrna_sequence_add(fc, *ptr, VRNA_SEQUENCE_RNA);
642         free(*ptr);
643       }
644 
645       free(sequences);
646       free(sequence);
647 
648       if (fc->strands > 1) {
649         fc->cutpoint = fc->nucleotides[0].length + 1;
650 
651         if (md_p->min_loop_size == TURN)
652           md_p->min_loop_size = 0;                              /* is it safe to set this here? */
653       }
654 
655       if (!(options & VRNA_OPTION_EVAL_ONLY)) {
656         fc->ptype = (aux & WITH_PTYPE) ? vrna_ptypes(fc->sequence_encoding2, md_p) : NULL;
657         /* backward compatibility ptypes */
658         fc->ptype_pf_compat =
659           (aux & WITH_PTYPE_COMPAT) ? get_ptypes(fc->sequence_encoding2, md_p, 1) : NULL;
660       }
661 
662       break;
663 
664     case VRNA_FC_TYPE_COMPARATIVE:
665       sequences = fc->sequences;
666 
667       fc->length = length = fc->length;
668 
669       fc->cons_seq  = vrna_aln_consensus_sequence((const char **)sequences, md_p);
670       fc->S_cons    = vrna_seq_encode_simple(fc->cons_seq, md_p);
671 
672       fc->pscore = vrna_alloc(sizeof(int) * ((length * (length + 1)) / 2 + 2));
673       /* backward compatibility ptypes */
674       fc->pscore_pf_compat =
675         (aux & WITH_PTYPE_COMPAT) ? vrna_alloc(sizeof(int) *
676                                                ((length * (length + 1)) / 2 + 2)) : NULL;
677 
678       oldAliEn = fc->oldAliEn = md_p->oldAliEn;
679 
680       fc->S   = vrna_alloc((fc->n_seq + 1) * sizeof(short *));
681       fc->S5  = vrna_alloc((fc->n_seq + 1) * sizeof(short *));
682       fc->S3  = vrna_alloc((fc->n_seq + 1) * sizeof(short *));
683       fc->a2s = vrna_alloc((fc->n_seq + 1) * sizeof(unsigned int *));
684       fc->Ss  = vrna_alloc((fc->n_seq + 1) * sizeof(char *));
685 
686       for (s = 0; s < fc->n_seq; s++) {
687         vrna_aln_encode(fc->sequences[s],
688                         &(fc->S[s]),
689                         &(fc->S5[s]),
690                         &(fc->S3[s]),
691                         &(fc->Ss[s]),
692                         &(fc->a2s[s]),
693                         md_p);
694       }
695       fc->S5[fc->n_seq]   = NULL;
696       fc->S3[fc->n_seq]   = NULL;
697       fc->a2s[fc->n_seq]  = NULL;
698       fc->Ss[fc->n_seq]   = NULL;
699       fc->S[fc->n_seq]    = NULL;
700 
701       break;
702 
703     default:                      /* do nothing ? */
704       break;
705   }
706 
707   vrna_sequence_prepare(fc);
708 
709   if (!(options & VRNA_OPTION_WINDOW) && (fc->length <= vrna_sequence_length_max(options))) {
710     fc->iindx = vrna_idx_row_wise(fc->length);
711     fc->jindx = vrna_idx_col_wise(fc->length);
712   }
713 }
714 
715 
716 PRIVATE void
make_pscores(vrna_fold_compound_t * fc)717 make_pscores(vrna_fold_compound_t *fc)
718 {
719   /*
720    * calculate co-variance bonus for each pair depending on
721    * compensatory/consistent mutations and incompatible seqs
722    * should be 0 for conserved pairs, >0 for good pairs
723    */
724 
725 #define NONE -10000 /* score for forbidden pairs */
726 
727   int       i, j, k, l, s, max_span, turn;
728   float     **dm;
729   int       olddm[7][7] = { { 0, 0, 0, 0, 0, 0, 0 }, /* hamming distance between pairs */
730                             { 0, 0, 2, 2, 1, 2, 2 } /* CG */,
731                             { 0, 2, 0, 1, 2, 2, 2 } /* GC */,
732                             { 0, 2, 1, 0, 2, 1, 2 } /* GU */,
733                             { 0, 1, 2, 2, 0, 2, 1 } /* UG */,
734                             { 0, 2, 2, 1, 2, 0, 2 } /* AU */,
735                             { 0, 2, 2, 2, 1, 2, 0 } /* UA */ };
736 
737   short     **S   = fc->S;
738   char      **AS  = fc->sequences;
739   int       n_seq = fc->n_seq;
740   vrna_md_t *md   =
741     (fc->params) ? &(fc->params->model_details) : &(fc->exp_params->model_details);
742   int       *pscore   = fc->pscore;             /* precomputed array of pair types */
743   int       *indx     = fc->jindx;
744   int       *my_iindx = fc->iindx;
745   int       n         = fc->length;
746 
747   turn = md->min_loop_size;
748 
749   if (md->ribo) {
750     if (RibosumFile != NULL)
751       dm = readribosum(RibosumFile);
752     else
753       dm = get_ribosum((const char **)AS, n_seq, n);
754   } else {
755     /*use usual matrix*/
756     dm = vrna_alloc(7 * sizeof(float *));
757     for (i = 0; i < 7; i++) {
758       dm[i] = vrna_alloc(7 * sizeof(float));
759       for (j = 0; j < 7; j++)
760         dm[i][j] = (float)olddm[i][j];
761     }
762   }
763 
764   max_span = md->max_bp_span;
765   if ((max_span < turn + 2) || (max_span > n))
766     max_span = n;
767 
768   for (i = 1; i < n; i++) {
769     for (j = i + 1; (j < i + turn + 1) && (j <= n); j++)
770       pscore[indx[j] + i] = NONE;
771     for (j = i + turn + 1; j <= n; j++) {
772       int     pfreq[8] = {
773         0, 0, 0, 0, 0, 0, 0, 0
774       };
775       double  score;
776       for (s = 0; s < n_seq; s++) {
777         int type;
778         if (S[s][i] == 0 && S[s][j] == 0) {
779           type = 7;                             /* gap-gap  */
780         } else {
781           if ((AS[s][i] == '~') || (AS[s][j] == '~')) {
782             type = 7;
783           } else {
784             type = md->pair[S[s][i]][S[s][j]];
785             if ((md->noGU) && ((type == 3) || (type == 4)))
786               type = 0;
787           }
788         }
789 
790         pfreq[type]++;
791       }
792       if (pfreq[0] * 2 + pfreq[7] > n_seq) {
793         pscore[indx[j] + i] = NONE;
794         continue;
795       }
796 
797       for (k = 1, score = 0; k <= 6; k++) /* ignore pairtype 7 (gap-gap) */
798         for (l = k; l <= 6; l++)
799           score += pfreq[k] * pfreq[l] * dm[k][l];
800       /* counter examples score -1, gap-gap scores -0.25   */
801       pscore[indx[j] + i] = md->cv_fact *
802                             ((UNIT * score) / n_seq - md->nc_fact * UNIT *
803                              (pfreq[0] + pfreq[7] * 0.25));
804 
805       if ((j - i + 1) > max_span)
806         pscore[indx[j] + i] = NONE;
807     }
808   }
809 
810   if (md->noLP) {
811     /* remove unwanted pairs */
812     for (k = 1; k < n - turn - 1; k++)
813       for (l = 1; l <= 2; l++) {
814         int type, ntype = 0, otype = 0;
815         i     = k;
816         j     = i + turn + l;
817         type  = pscore[indx[j] + i];
818         while ((i >= 1) && (j <= n)) {
819           if ((i > 1) && (j < n))
820             ntype = pscore[indx[j + 1] + i - 1];
821 
822           if ((otype < md->cv_fact * MINPSCORE) && (ntype < md->cv_fact * MINPSCORE)) /* too many counterexamples */
823             pscore[indx[j] + i] = NONE;                                               /* i.j can only form isolated pairs */
824 
825           otype = type;
826           type  = ntype;
827           i--;
828           j++;
829         }
830       }
831   }
832 
833   /*free dm */
834   for (i = 0; i < 7; i++)
835     free(dm[i]);
836   free(dm);
837 
838   /* copy over pscores for backward compatibility */
839   if (fc->pscore_pf_compat) {
840     for (i = 1; i < n; i++)
841       for (j = i; j <= n; j++)
842         fc->pscore_pf_compat[my_iindx[i] - j] = (short)pscore[indx[j] + i];
843   }
844 }
845 
846 
847 PRIVATE vrna_fold_compound_t *
init_fc_single(void)848 init_fc_single(void)
849 {
850   vrna_fold_compound_t  init = {
851     .type = VRNA_FC_TYPE_SINGLE
852   };
853   vrna_fold_compound_t  *fc = vrna_alloc(sizeof(vrna_fold_compound_t));
854 
855   if (fc) {
856     memcpy(fc, &init, sizeof(vrna_fold_compound_t));
857     nullify(fc);
858   }
859 
860   return fc;
861 }
862 
863 
864 PRIVATE vrna_fold_compound_t *
init_fc_comparative(void)865 init_fc_comparative(void)
866 {
867   vrna_fold_compound_t  init = {
868     .type = VRNA_FC_TYPE_COMPARATIVE
869   };
870   vrna_fold_compound_t  *fc = vrna_alloc(sizeof(vrna_fold_compound_t));
871 
872   if (fc) {
873     memcpy(fc, &init, sizeof(vrna_fold_compound_t));
874     nullify(fc);
875   }
876 
877   return fc;
878 }
879 
880 
881 PRIVATE INLINE void
nullify(vrna_fold_compound_t * fc)882 nullify(vrna_fold_compound_t *fc)
883 {
884   if (fc) {
885     fc->length        = 0;
886     fc->strands       = 0;
887     fc->cutpoint      = -1;
888     fc->strand_number = NULL;
889     fc->strand_order  = NULL;
890     fc->strand_start  = NULL;
891     fc->strand_end    = NULL;
892     fc->nucleotides   = NULL;
893     fc->alignment     = NULL;
894 
895     fc->hc            = NULL;
896     fc->matrices      = NULL;
897     fc->exp_matrices  = NULL;
898     fc->params        = NULL;
899     fc->exp_params    = NULL;
900     fc->iindx         = NULL;
901     fc->jindx         = NULL;
902 
903     fc->stat_cb       = NULL;
904     fc->auxdata       = NULL;
905     fc->free_auxdata  = NULL;
906 
907     fc->domains_struc = NULL;
908     fc->domains_up    = NULL;
909     fc->aux_grammar   = NULL;
910 
911     switch (fc->type) {
912       case VRNA_FC_TYPE_SINGLE:
913         fc->sequence            = NULL;
914         fc->sequence_encoding   = NULL;
915         fc->sequence_encoding2  = NULL;
916         fc->ptype               = NULL;
917         fc->ptype_pf_compat     = NULL;
918         fc->sc                  = NULL;
919 
920         break;
921 
922       case VRNA_FC_TYPE_COMPARATIVE:
923         fc->sequences         = NULL;
924         fc->n_seq             = 0;
925         fc->cons_seq          = NULL;
926         fc->S_cons            = NULL;
927         fc->S                 = NULL;
928         fc->S5                = NULL;
929         fc->S3                = NULL;
930         fc->Ss                = NULL;
931         fc->a2s               = NULL;
932         fc->pscore            = NULL;
933         fc->pscore_local      = NULL;
934         fc->pscore_pf_compat  = NULL;
935         fc->scs               = NULL;
936         fc->oldAliEn          = 0;
937 
938         break;
939     }
940 
941     fc->maxD1         = 0;
942     fc->maxD2         = 0;
943     fc->reference_pt1 = NULL;
944     fc->reference_pt2 = NULL;
945     fc->referenceBPs1 = NULL;
946     fc->referenceBPs2 = NULL;
947     fc->bpdist        = NULL;
948     fc->mm1           = NULL;
949     fc->mm2           = NULL;
950 
951     fc->window_size = -1;
952     fc->ptype_local = NULL;
953 #ifdef VRNA_WITH_SVM
954     fc->zscore_data = NULL;
955 #endif
956   }
957 }
958