1 /** \file fold_compound.c **/
2
3 /*
4 * Fold Compound API
5 *
6 * This file contains everything necessary to create
7 * and destroy data structures of type vrna_fold_compound_s,
8 * the most fundamental data structure throughout RNAlib
9 *
10 * c Ronny Lorenz
11 *
12 * ViennaRNA package
13 */
14
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <math.h>
22 #include <ctype.h>
23 #include <string.h>
24 #include <limits.h>
25
26 #include "ViennaRNA/utils/basic.h"
27 #include "ViennaRNA/utils/structures.h"
28 #include "ViennaRNA/utils/strings.h"
29 #include "ViennaRNA/params/default.h"
30 #include "ViennaRNA/datastructures/basic.h"
31 #include "ViennaRNA/fold_vars.h"
32 #include "ViennaRNA/params/basic.h"
33 #include "ViennaRNA/gquad.h"
34 #include "ViennaRNA/utils/alignments.h"
35 #include "ViennaRNA/ribo.h"
36 #include "ViennaRNA/constraints/hard.h"
37 #include "ViennaRNA/constraints/soft.h"
38 #include "ViennaRNA/part_func.h"
39 #include "ViennaRNA/cofold.h"
40 #include "ViennaRNA/mm.h"
41 #include "ViennaRNA/alphabet.h"
42 #include "ViennaRNA/fold_compound.h"
43
44 /*
45 #################################
46 # PRIVATE MACROS #
47 #################################
48 */
49
50 #define WITH_PTYPE 1L /* passed to set_fold_compound() to indicate that we need to set fc->ptype */
51 #define WITH_PTYPE_COMPAT 2L /* passed to set_fold_compound() to indicate that we need to set fc->ptype_compat */
52
53 /*
54 #################################
55 # GLOBAL VARIABLES #
56 #################################
57 */
58
59 /*
60 #################################
61 # PRIVATE VARIABLES #
62 #################################
63 */
64
65 /*
66 #################################
67 # PRIVATE FUNCTION DECLARATIONS #
68 #################################
69 */
70 PRIVATE void
71 set_fold_compound(vrna_fold_compound_t *fc,
72 unsigned int options,
73 unsigned int aux);
74
75
76 PRIVATE void
77 make_pscores(vrna_fold_compound_t *fc);
78
79
80 PRIVATE void
81 sanitize_bp_span(vrna_fold_compound_t *fc,
82 unsigned int options);
83
84
85 PRIVATE void
86 add_params(vrna_fold_compound_t *fc,
87 vrna_md_t *md_p,
88 unsigned int options);
89
90
91 PRIVATE vrna_fold_compound_t *
92 init_fc_single(void);
93
94
95 PRIVATE vrna_fold_compound_t *
96 init_fc_comparative(void);
97
98
99 PRIVATE INLINE void
100 nullify(vrna_fold_compound_t *fc);
101
102
103 /*
104 #################################
105 # BEGIN OF FUNCTION DEFINITIONS #
106 #################################
107 */
108 PUBLIC void
vrna_fold_compound_free(vrna_fold_compound_t * fc)109 vrna_fold_compound_free(vrna_fold_compound_t *fc)
110 {
111 int s;
112
113 if (fc) {
114 /* first destroy common attributes */
115 vrna_mx_mfe_free(fc);
116 vrna_mx_pf_free(fc);
117 free(fc->iindx);
118 free(fc->jindx);
119 free(fc->params);
120 free(fc->exp_params);
121
122 vrna_hc_free(fc->hc);
123 vrna_ud_remove(fc);
124 vrna_sequence_remove_all(fc);
125
126 /* now distinguish the fc type */
127 switch (fc->type) {
128 case VRNA_FC_TYPE_SINGLE:
129 free(fc->sequence);
130 free(fc->sequence_encoding);
131 free(fc->sequence_encoding2);
132 free(fc->ptype);
133 free(fc->ptype_pf_compat);
134 vrna_sc_free(fc->sc);
135 break;
136 case VRNA_FC_TYPE_COMPARATIVE:
137 for (s = 0; s < fc->n_seq; s++) {
138 free(fc->sequences[s]);
139 free(fc->S[s]);
140 free(fc->S5[s]);
141 free(fc->S3[s]);
142 free(fc->Ss[s]);
143 free(fc->a2s[s]);
144 }
145 free(fc->sequences);
146 free(fc->cons_seq);
147 free(fc->S_cons);
148 free(fc->S);
149 free(fc->S5);
150 free(fc->S3);
151 free(fc->Ss);
152 free(fc->a2s);
153 free(fc->pscore);
154 free(fc->pscore_pf_compat);
155 if (fc->scs) {
156 for (s = 0; s < fc->n_seq; s++)
157 vrna_sc_free(fc->scs[s]);
158 free(fc->scs);
159 }
160
161 break;
162 default: /* do nothing */
163 break;
164 }
165
166 /* free Distance Class Partitioning stuff (should be NULL if not used) */
167 free(fc->reference_pt1);
168 free(fc->reference_pt2);
169 free(fc->referenceBPs1);
170 free(fc->referenceBPs2);
171 free(fc->bpdist);
172 free(fc->mm1);
173 free(fc->mm2);
174
175 /* free local folding related stuff (should be NULL if not used) */
176 free(fc->ptype_local);
177 free(fc->pscore_local);
178 #ifdef VRNA_WITH_SVM
179 vrna_zsc_filter_free(fc);
180 #endif
181
182 if (fc->free_auxdata)
183 fc->free_auxdata(fc->auxdata);
184
185 free(fc);
186 }
187 }
188
189
190 PUBLIC vrna_fold_compound_t *
vrna_fold_compound(const char * sequence,const vrna_md_t * md_p,unsigned int options)191 vrna_fold_compound(const char *sequence,
192 const vrna_md_t *md_p,
193 unsigned int options)
194 {
195 unsigned int length, aux_options;
196 vrna_fold_compound_t *fc;
197 vrna_md_t md;
198
199 if (sequence == NULL)
200 return NULL;
201
202 /* sanity check */
203 length = strlen(sequence);
204 if (length == 0) {
205 vrna_message_warning("vrna_fold_compound@data_structures.c: "
206 "sequence length must be greater 0");
207 return NULL;
208 }
209
210 if (length > vrna_sequence_length_max(options)) {
211 vrna_message_warning("vrna_fold_compound@data_structures.c: "
212 "sequence length of %d exceeds addressable range",
213 length);
214 return NULL;
215 }
216
217 fc = init_fc_single();
218
219 fc->length = length;
220 fc->sequence = strdup(sequence);
221
222 aux_options = 0L;
223
224
225 /* get a copy of the model details */
226 if (md_p)
227 md = *md_p;
228 else
229 vrna_md_set_default(&md);
230
231 /* now for the energy parameters */
232 add_params(fc, &md, options);
233
234 sanitize_bp_span(fc, options);
235
236 if (options & VRNA_OPTION_WINDOW) {
237 set_fold_compound(fc, options, aux_options);
238
239 if (!(options & VRNA_OPTION_EVAL_ONLY)) {
240 /* add minimal hard constraint data structure */
241 vrna_hc_init_window(fc);
242
243 /* add DP matrices */
244 vrna_mx_add(fc, VRNA_MX_WINDOW, options);
245 }
246 } else {
247 /* regular global structure prediction */
248 aux_options |= WITH_PTYPE;
249
250 if (options & VRNA_OPTION_PF)
251 aux_options |= WITH_PTYPE_COMPAT;
252
253 set_fold_compound(fc, options, aux_options);
254
255 if (!(options & VRNA_OPTION_EVAL_ONLY)) {
256 /* add default hard constraints */
257 vrna_hc_init(fc);
258
259 /* add DP matrices (if required) */
260 vrna_mx_add(fc, VRNA_MX_DEFAULT, options);
261 }
262 }
263
264 return fc;
265 }
266
267
268 PUBLIC vrna_fold_compound_t *
vrna_fold_compound_comparative(const char ** sequences,vrna_md_t * md_p,unsigned int options)269 vrna_fold_compound_comparative(const char **sequences,
270 vrna_md_t *md_p,
271 unsigned int options)
272 {
273 return vrna_fold_compound_comparative2(sequences,
274 NULL,
275 NULL,
276 NULL,
277 NULL,
278 md_p,
279 options);
280 }
281
282
283 PUBLIC vrna_fold_compound_t *
vrna_fold_compound_comparative2(const char ** sequences,const char ** names,const unsigned char * orientation,const unsigned long long * start,const unsigned long long * genome_size,vrna_md_t * md_p,unsigned int options)284 vrna_fold_compound_comparative2(const char **sequences,
285 const char **names,
286 const unsigned char *orientation,
287 const unsigned long long *start,
288 const unsigned long long *genome_size,
289 vrna_md_t *md_p,
290 unsigned int options)
291 {
292 int s, n_seq, length;
293 vrna_fold_compound_t *fc;
294 vrna_md_t md;
295 unsigned int aux_options;
296
297 aux_options = 0U;
298
299 if (sequences == NULL)
300 return NULL;
301
302 for (s = 0; sequences[s]; s++); /* count the sequences */
303
304 n_seq = s;
305
306 length = strlen(sequences[0]);
307 /* sanity check */
308 if (length == 0) {
309 vrna_message_warning("vrna_fold_compound_comparative: "
310 "sequence length must be greater 0");
311 } else if (length > vrna_sequence_length_max(options)) {
312 vrna_message_warning("vrna_fold_compound_comparative: "
313 "sequence length of %d exceeds addressable range",
314 length);
315 }
316
317 for (s = 0; s < n_seq; s++)
318 if (strlen(sequences[s]) != length) {
319 vrna_message_warning("vrna_fold_compound_comparative: "
320 "uneqal sequence lengths in alignment");
321 return NULL;
322 }
323
324 fc = init_fc_comparative();
325
326 if (fc) {
327 fc->n_seq = n_seq;
328 fc->length = length;
329
330 /* get a copy of the model details */
331 if (md_p)
332 md = *md_p;
333 else /* this fallback relies on global parameters and thus is not threadsafe */
334 vrna_md_set_default(&md);
335
336 /* now for the energy parameters */
337 add_params(fc, &md, options);
338
339 sanitize_bp_span(fc, options);
340
341 vrna_msa_add( fc,
342 sequences,
343 names,
344 orientation,
345 start,
346 genome_size,
347 VRNA_SEQUENCE_RNA);
348
349 fc->sequences = vrna_alloc(sizeof(char *) * (fc->n_seq + 1));
350 for (s = 0; sequences[s]; s++)
351 fc->sequences[s] = strdup(sequences[s]);
352
353 if (options & VRNA_OPTION_WINDOW) {
354 set_fold_compound(fc, options, aux_options);
355
356 fc->pscore_local = vrna_alloc(sizeof(int *) * (fc->length + 1));
357
358 #if 0
359 for (i = (int)fc->length; (i > (int)fc->length - fc->window_size - 5) && (i >= 0); i--)
360 fc->pscore_local[i] = vrna_alloc(sizeof(int) * (fc->window_size + 5));
361 #endif
362
363 if (!(options & VRNA_OPTION_EVAL_ONLY)) {
364 /* add minimal hard constraint data structure */
365 vrna_hc_init_window(fc);
366
367 /* add DP matrices */
368 vrna_mx_add(fc, VRNA_MX_WINDOW, options);
369 }
370 } else {
371 /* regular global structure prediction */
372
373 aux_options |= WITH_PTYPE;
374
375 if (options & VRNA_OPTION_PF)
376 aux_options |= WITH_PTYPE_COMPAT;
377
378 set_fold_compound(fc, options, aux_options);
379
380 make_pscores(fc);
381
382 if (!(options & VRNA_OPTION_EVAL_ONLY)) {
383 /* add default hard constraints */
384 vrna_hc_init(fc);
385
386 /* add DP matrices (if required) */
387 vrna_mx_add(fc, VRNA_MX_DEFAULT, options);
388 }
389 }
390 }
391
392 return fc;
393 }
394
395
396 PUBLIC vrna_fold_compound_t *
vrna_fold_compound_TwoD(const char * sequence,const char * s1,const char * s2,vrna_md_t * md_p,unsigned int options)397 vrna_fold_compound_TwoD(const char *sequence,
398 const char *s1,
399 const char *s2,
400 vrna_md_t *md_p,
401 unsigned int options)
402 {
403 int length, l, turn;
404 vrna_fold_compound_t *fc;
405 vrna_md_t md;
406
407
408 if (sequence == NULL)
409 return NULL;
410
411 /* sanity check */
412 length = strlen(sequence);
413 if (length == 0) {
414 vrna_message_warning("vrna_fold_compound_TwoD: "
415 "sequence length must be greater 0");
416 return NULL;
417 } else if (length > vrna_sequence_length_max(options)) {
418 vrna_message_warning("vrna_fold_compound_TwoD: "
419 "sequence length of %d exceeds addressable range",
420 length);
421 return NULL;
422 }
423
424 l = strlen(s1);
425 if (l != length) {
426 vrna_message_warning("vrna_fold_compound_TwoD: "
427 "sequence and s1 differ in length");
428 return NULL;
429 }
430
431 l = strlen(s2);
432 if (l != length) {
433 vrna_message_warning("vrna_fold_compound_TwoD: "
434 "sequence and s2 differ in length");
435 return NULL;
436 }
437
438 fc = init_fc_single();
439 if (fc) {
440 fc->length = length;
441 fc->sequence = strdup(sequence);
442
443 /* get a copy of the model details */
444 if (md_p)
445 md = *md_p;
446 else /* this fallback relies on global parameters and thus is not threadsafe */
447 vrna_md_set_default(&md);
448
449 /* always make uniq ML decomposition ! */
450 md.uniq_ML = 1;
451 md.compute_bpp = 0;
452
453 /* now for the energy parameters */
454 add_params(fc, &md, options);
455
456 set_fold_compound(fc, options, WITH_PTYPE | WITH_PTYPE_COMPAT);
457
458 if (!(options & VRNA_OPTION_EVAL_ONLY)) {
459 vrna_hc_init(fc); /* add default hard constraints */
460
461 /* add DP matrices */
462 vrna_mx_add(fc, VRNA_MX_2DFOLD, options);
463 }
464
465 /* set all fields that are unique to Distance class partitioning... */
466 turn = fc->params->model_details.min_loop_size;
467 fc->reference_pt1 = vrna_ptable(s1);
468 fc->reference_pt2 = vrna_ptable(s2);
469 fc->referenceBPs1 = vrna_refBPcnt_matrix(fc->reference_pt1, turn);
470 fc->referenceBPs2 = vrna_refBPcnt_matrix(fc->reference_pt2, turn);
471 fc->bpdist = vrna_refBPdist_matrix(fc->reference_pt1, fc->reference_pt2, turn);
472 /* compute maximum matching with reference structure 1 disallowed */
473 fc->mm1 = maximumMatchingConstraint(fc->sequence, fc->reference_pt1);
474 /* compute maximum matching with reference structure 2 disallowed */
475 fc->mm2 = maximumMatchingConstraint(fc->sequence, fc->reference_pt2);
476
477 fc->maxD1 = fc->mm1[fc->iindx[1] - length] + fc->referenceBPs1[fc->iindx[1] - length];
478 fc->maxD2 = fc->mm2[fc->iindx[1] - length] + fc->referenceBPs2[fc->iindx[1] - length];
479 }
480
481 return fc;
482 }
483
484
485 PUBLIC void
vrna_fold_compound_add_auxdata(vrna_fold_compound_t * fc,void * data,vrna_callback_free_auxdata * f)486 vrna_fold_compound_add_auxdata(vrna_fold_compound_t *fc,
487 void *data,
488 vrna_callback_free_auxdata *f)
489 {
490 if (fc && data) {
491 if (fc->free_auxdata) /* free pre-existing auxdata */
492 fc->free_auxdata(fc->auxdata);
493
494 fc->auxdata = data;
495 fc->free_auxdata = f;
496 }
497 }
498
499
500 PUBLIC void
vrna_fold_compound_add_callback(vrna_fold_compound_t * fc,vrna_callback_recursion_status * f)501 vrna_fold_compound_add_callback(vrna_fold_compound_t *fc,
502 vrna_callback_recursion_status *f)
503 {
504 if (fc && f)
505 fc->stat_cb = f;
506 }
507
508
509 PUBLIC int
vrna_fold_compound_prepare(vrna_fold_compound_t * fc,unsigned int options)510 vrna_fold_compound_prepare(vrna_fold_compound_t *fc,
511 unsigned int options)
512 {
513 int ret = 1; /* success */
514
515 /* check maximum sequence length restrictions */
516 if (fc->length > vrna_sequence_length_max(options)) {
517 vrna_message_warning(
518 "vrna_fold_compound_prepare@data_structures.c: sequence length of %d exceeds addressable range",
519 fc->length);
520 return 0;
521 }
522
523 /* make sure to always provide sane bp-span settings */
524 sanitize_bp_span(fc, options);
525
526 /* prepare Boltzmann factors if required */
527 vrna_params_prepare(fc, options);
528
529 /* prepare ptype array(s) */
530 vrna_ptypes_prepare(fc, options);
531
532 if (options & VRNA_OPTION_PF) {
533 /* prepare for partition function computation */
534
535 switch (fc->type) {
536 case VRNA_FC_TYPE_SINGLE: /* get pre-computed Boltzmann factors if not present*/
537 if (fc->domains_up) /* turn on unique ML decomposition with qm1 array */
538 fc->exp_params->model_details.uniq_ML = 1;
539
540 break;
541
542 default:
543 /* not doing anything here... */
544 break;
545 }
546 }
547
548 /* prepare hard constraints */
549 vrna_hc_prepare(fc, options);
550
551 /* prepare soft constraints data structure, if required */
552 vrna_sc_prepare(fc, options);
553
554 /* Add DP matrices, if not they are not present or do not fit current settings */
555 vrna_mx_prepare(fc, options);
556
557 return ret;
558 }
559
560
561 /*
562 #####################################
563 # BEGIN OF STATIC HELPER FUNCTIONS #
564 #####################################
565 */
566 PRIVATE void
sanitize_bp_span(vrna_fold_compound_t * fc,unsigned int options)567 sanitize_bp_span(vrna_fold_compound_t *fc,
568 unsigned int options)
569 {
570 vrna_md_t *md;
571
572 md = &(fc->params->model_details);
573
574 /* make sure that min_loop_size, max_bp_span, and window_size are sane */
575 if (options & VRNA_OPTION_WINDOW) {
576 if (md->window_size <= 0)
577 md->window_size = (int)fc->length;
578 else if (md->window_size > (int)fc->length)
579 md->window_size = (int)fc->length;
580
581 fc->window_size = md->window_size;
582 } else {
583 /* non-local fold mode */
584 md->window_size = (int)fc->length;
585 }
586
587 if ((md->max_bp_span <= 0) || (md->max_bp_span > md->window_size))
588 md->max_bp_span = md->window_size;
589 }
590
591
592 PRIVATE void
add_params(vrna_fold_compound_t * fc,vrna_md_t * md_p,unsigned int options)593 add_params(vrna_fold_compound_t *fc,
594 vrna_md_t *md_p,
595 unsigned int options)
596 {
597 /*
598 * ALWAYS provide regular energy parameters
599 * remove previous parameters if present and they differ from current model
600 */
601 if (fc->params) {
602 if (memcmp(md_p, &(fc->params->model_details), sizeof(vrna_md_t)) != 0) {
603 free(fc->params);
604 fc->params = NULL;
605 }
606 }
607
608 if (!fc->params)
609 fc->params = vrna_params(md_p);
610
611 vrna_params_prepare(fc, options);
612 }
613
614
615 PRIVATE void
set_fold_compound(vrna_fold_compound_t * fc,unsigned int options,unsigned int aux)616 set_fold_compound(vrna_fold_compound_t *fc,
617 unsigned int options,
618 unsigned int aux)
619 {
620 char *sequence, **sequences, **ptr;
621 unsigned int length, s;
622 vrna_md_t *md_p;
623
624 sequence = NULL;
625 sequences = NULL;
626
627 md_p = &(fc->params->model_details);
628
629 switch (fc->type) {
630 case VRNA_FC_TYPE_SINGLE:
631 sequence = fc->sequence;
632
633 fc->sequence = NULL;
634 fc->length = 0;
635
636 /* split input sequences at default delimiter '&' */
637 sequences = vrna_strsplit(sequence, NULL);
638
639 /* add individual sequences to fold compound */
640 for (ptr = sequences; *ptr; ptr++) {
641 vrna_sequence_add(fc, *ptr, VRNA_SEQUENCE_RNA);
642 free(*ptr);
643 }
644
645 free(sequences);
646 free(sequence);
647
648 if (fc->strands > 1) {
649 fc->cutpoint = fc->nucleotides[0].length + 1;
650
651 if (md_p->min_loop_size == TURN)
652 md_p->min_loop_size = 0; /* is it safe to set this here? */
653 }
654
655 if (!(options & VRNA_OPTION_EVAL_ONLY)) {
656 fc->ptype = (aux & WITH_PTYPE) ? vrna_ptypes(fc->sequence_encoding2, md_p) : NULL;
657 /* backward compatibility ptypes */
658 fc->ptype_pf_compat =
659 (aux & WITH_PTYPE_COMPAT) ? get_ptypes(fc->sequence_encoding2, md_p, 1) : NULL;
660 }
661
662 break;
663
664 case VRNA_FC_TYPE_COMPARATIVE:
665 sequences = fc->sequences;
666
667 fc->length = length = fc->length;
668
669 fc->cons_seq = vrna_aln_consensus_sequence((const char **)sequences, md_p);
670 fc->S_cons = vrna_seq_encode_simple(fc->cons_seq, md_p);
671
672 fc->pscore = vrna_alloc(sizeof(int) * ((length * (length + 1)) / 2 + 2));
673 /* backward compatibility ptypes */
674 fc->pscore_pf_compat =
675 (aux & WITH_PTYPE_COMPAT) ? vrna_alloc(sizeof(int) *
676 ((length * (length + 1)) / 2 + 2)) : NULL;
677
678 oldAliEn = fc->oldAliEn = md_p->oldAliEn;
679
680 fc->S = vrna_alloc((fc->n_seq + 1) * sizeof(short *));
681 fc->S5 = vrna_alloc((fc->n_seq + 1) * sizeof(short *));
682 fc->S3 = vrna_alloc((fc->n_seq + 1) * sizeof(short *));
683 fc->a2s = vrna_alloc((fc->n_seq + 1) * sizeof(unsigned int *));
684 fc->Ss = vrna_alloc((fc->n_seq + 1) * sizeof(char *));
685
686 for (s = 0; s < fc->n_seq; s++) {
687 vrna_aln_encode(fc->sequences[s],
688 &(fc->S[s]),
689 &(fc->S5[s]),
690 &(fc->S3[s]),
691 &(fc->Ss[s]),
692 &(fc->a2s[s]),
693 md_p);
694 }
695 fc->S5[fc->n_seq] = NULL;
696 fc->S3[fc->n_seq] = NULL;
697 fc->a2s[fc->n_seq] = NULL;
698 fc->Ss[fc->n_seq] = NULL;
699 fc->S[fc->n_seq] = NULL;
700
701 break;
702
703 default: /* do nothing ? */
704 break;
705 }
706
707 vrna_sequence_prepare(fc);
708
709 if (!(options & VRNA_OPTION_WINDOW) && (fc->length <= vrna_sequence_length_max(options))) {
710 fc->iindx = vrna_idx_row_wise(fc->length);
711 fc->jindx = vrna_idx_col_wise(fc->length);
712 }
713 }
714
715
716 PRIVATE void
make_pscores(vrna_fold_compound_t * fc)717 make_pscores(vrna_fold_compound_t *fc)
718 {
719 /*
720 * calculate co-variance bonus for each pair depending on
721 * compensatory/consistent mutations and incompatible seqs
722 * should be 0 for conserved pairs, >0 for good pairs
723 */
724
725 #define NONE -10000 /* score for forbidden pairs */
726
727 int i, j, k, l, s, max_span, turn;
728 float **dm;
729 int olddm[7][7] = { { 0, 0, 0, 0, 0, 0, 0 }, /* hamming distance between pairs */
730 { 0, 0, 2, 2, 1, 2, 2 } /* CG */,
731 { 0, 2, 0, 1, 2, 2, 2 } /* GC */,
732 { 0, 2, 1, 0, 2, 1, 2 } /* GU */,
733 { 0, 1, 2, 2, 0, 2, 1 } /* UG */,
734 { 0, 2, 2, 1, 2, 0, 2 } /* AU */,
735 { 0, 2, 2, 2, 1, 2, 0 } /* UA */ };
736
737 short **S = fc->S;
738 char **AS = fc->sequences;
739 int n_seq = fc->n_seq;
740 vrna_md_t *md =
741 (fc->params) ? &(fc->params->model_details) : &(fc->exp_params->model_details);
742 int *pscore = fc->pscore; /* precomputed array of pair types */
743 int *indx = fc->jindx;
744 int *my_iindx = fc->iindx;
745 int n = fc->length;
746
747 turn = md->min_loop_size;
748
749 if (md->ribo) {
750 if (RibosumFile != NULL)
751 dm = readribosum(RibosumFile);
752 else
753 dm = get_ribosum((const char **)AS, n_seq, n);
754 } else {
755 /*use usual matrix*/
756 dm = vrna_alloc(7 * sizeof(float *));
757 for (i = 0; i < 7; i++) {
758 dm[i] = vrna_alloc(7 * sizeof(float));
759 for (j = 0; j < 7; j++)
760 dm[i][j] = (float)olddm[i][j];
761 }
762 }
763
764 max_span = md->max_bp_span;
765 if ((max_span < turn + 2) || (max_span > n))
766 max_span = n;
767
768 for (i = 1; i < n; i++) {
769 for (j = i + 1; (j < i + turn + 1) && (j <= n); j++)
770 pscore[indx[j] + i] = NONE;
771 for (j = i + turn + 1; j <= n; j++) {
772 int pfreq[8] = {
773 0, 0, 0, 0, 0, 0, 0, 0
774 };
775 double score;
776 for (s = 0; s < n_seq; s++) {
777 int type;
778 if (S[s][i] == 0 && S[s][j] == 0) {
779 type = 7; /* gap-gap */
780 } else {
781 if ((AS[s][i] == '~') || (AS[s][j] == '~')) {
782 type = 7;
783 } else {
784 type = md->pair[S[s][i]][S[s][j]];
785 if ((md->noGU) && ((type == 3) || (type == 4)))
786 type = 0;
787 }
788 }
789
790 pfreq[type]++;
791 }
792 if (pfreq[0] * 2 + pfreq[7] > n_seq) {
793 pscore[indx[j] + i] = NONE;
794 continue;
795 }
796
797 for (k = 1, score = 0; k <= 6; k++) /* ignore pairtype 7 (gap-gap) */
798 for (l = k; l <= 6; l++)
799 score += pfreq[k] * pfreq[l] * dm[k][l];
800 /* counter examples score -1, gap-gap scores -0.25 */
801 pscore[indx[j] + i] = md->cv_fact *
802 ((UNIT * score) / n_seq - md->nc_fact * UNIT *
803 (pfreq[0] + pfreq[7] * 0.25));
804
805 if ((j - i + 1) > max_span)
806 pscore[indx[j] + i] = NONE;
807 }
808 }
809
810 if (md->noLP) {
811 /* remove unwanted pairs */
812 for (k = 1; k < n - turn - 1; k++)
813 for (l = 1; l <= 2; l++) {
814 int type, ntype = 0, otype = 0;
815 i = k;
816 j = i + turn + l;
817 type = pscore[indx[j] + i];
818 while ((i >= 1) && (j <= n)) {
819 if ((i > 1) && (j < n))
820 ntype = pscore[indx[j + 1] + i - 1];
821
822 if ((otype < md->cv_fact * MINPSCORE) && (ntype < md->cv_fact * MINPSCORE)) /* too many counterexamples */
823 pscore[indx[j] + i] = NONE; /* i.j can only form isolated pairs */
824
825 otype = type;
826 type = ntype;
827 i--;
828 j++;
829 }
830 }
831 }
832
833 /*free dm */
834 for (i = 0; i < 7; i++)
835 free(dm[i]);
836 free(dm);
837
838 /* copy over pscores for backward compatibility */
839 if (fc->pscore_pf_compat) {
840 for (i = 1; i < n; i++)
841 for (j = i; j <= n; j++)
842 fc->pscore_pf_compat[my_iindx[i] - j] = (short)pscore[indx[j] + i];
843 }
844 }
845
846
847 PRIVATE vrna_fold_compound_t *
init_fc_single(void)848 init_fc_single(void)
849 {
850 vrna_fold_compound_t init = {
851 .type = VRNA_FC_TYPE_SINGLE
852 };
853 vrna_fold_compound_t *fc = vrna_alloc(sizeof(vrna_fold_compound_t));
854
855 if (fc) {
856 memcpy(fc, &init, sizeof(vrna_fold_compound_t));
857 nullify(fc);
858 }
859
860 return fc;
861 }
862
863
864 PRIVATE vrna_fold_compound_t *
init_fc_comparative(void)865 init_fc_comparative(void)
866 {
867 vrna_fold_compound_t init = {
868 .type = VRNA_FC_TYPE_COMPARATIVE
869 };
870 vrna_fold_compound_t *fc = vrna_alloc(sizeof(vrna_fold_compound_t));
871
872 if (fc) {
873 memcpy(fc, &init, sizeof(vrna_fold_compound_t));
874 nullify(fc);
875 }
876
877 return fc;
878 }
879
880
881 PRIVATE INLINE void
nullify(vrna_fold_compound_t * fc)882 nullify(vrna_fold_compound_t *fc)
883 {
884 if (fc) {
885 fc->length = 0;
886 fc->strands = 0;
887 fc->cutpoint = -1;
888 fc->strand_number = NULL;
889 fc->strand_order = NULL;
890 fc->strand_start = NULL;
891 fc->strand_end = NULL;
892 fc->nucleotides = NULL;
893 fc->alignment = NULL;
894
895 fc->hc = NULL;
896 fc->matrices = NULL;
897 fc->exp_matrices = NULL;
898 fc->params = NULL;
899 fc->exp_params = NULL;
900 fc->iindx = NULL;
901 fc->jindx = NULL;
902
903 fc->stat_cb = NULL;
904 fc->auxdata = NULL;
905 fc->free_auxdata = NULL;
906
907 fc->domains_struc = NULL;
908 fc->domains_up = NULL;
909 fc->aux_grammar = NULL;
910
911 switch (fc->type) {
912 case VRNA_FC_TYPE_SINGLE:
913 fc->sequence = NULL;
914 fc->sequence_encoding = NULL;
915 fc->sequence_encoding2 = NULL;
916 fc->ptype = NULL;
917 fc->ptype_pf_compat = NULL;
918 fc->sc = NULL;
919
920 break;
921
922 case VRNA_FC_TYPE_COMPARATIVE:
923 fc->sequences = NULL;
924 fc->n_seq = 0;
925 fc->cons_seq = NULL;
926 fc->S_cons = NULL;
927 fc->S = NULL;
928 fc->S5 = NULL;
929 fc->S3 = NULL;
930 fc->Ss = NULL;
931 fc->a2s = NULL;
932 fc->pscore = NULL;
933 fc->pscore_local = NULL;
934 fc->pscore_pf_compat = NULL;
935 fc->scs = NULL;
936 fc->oldAliEn = 0;
937
938 break;
939 }
940
941 fc->maxD1 = 0;
942 fc->maxD2 = 0;
943 fc->reference_pt1 = NULL;
944 fc->reference_pt2 = NULL;
945 fc->referenceBPs1 = NULL;
946 fc->referenceBPs2 = NULL;
947 fc->bpdist = NULL;
948 fc->mm1 = NULL;
949 fc->mm2 = NULL;
950
951 fc->window_size = -1;
952 fc->ptype_local = NULL;
953 #ifdef VRNA_WITH_SVM
954 fc->zscore_data = NULL;
955 #endif
956 }
957 }
958