1 #ifndef VIENNA_RNA_PACKAGE_CONSTRAINTS_SHAPE_H
2 #define VIENNA_RNA_PACKAGE_CONSTRAINTS_SHAPE_H
3 
4 #include <ViennaRNA/fold_compound.h>
5 
6 /**
7  *  @file constraints/SHAPE.h
8  *  @ingroup SHAPE_reactivities
9  *  @brief This module provides function to incorporate SHAPE reactivity data
10  *  into the folding recursions by means of soft constraints
11  */
12 
13 /**
14  *
15  *  @addtogroup SHAPE_reactivities
16  *  @brief Incorporate SHAPE reactivity structure probing data
17  *  into the folding recursions by means of soft constraints
18  *
19  *  Details for our implementation to incorporate SHAPE reactivity data to guide
20  *  secondary structure prediction can be found in @cite lorenz:2016a
21  *
22  */
23 void vrna_constraints_add_SHAPE(vrna_fold_compound_t *vc,
24                                 const char *shape_file,
25                                 const char *shape_method,
26                                 const char *shape_conversion,
27                                 int verbose,
28                                 unsigned int constraint_type);
29 
30 void vrna_constraints_add_SHAPE_ali(vrna_fold_compound_t *vc,
31                                     const char *shape_method,
32                                     const char **shape_files,
33                                     const int  *shape_file_association,
34                                     int verbose,
35                                     unsigned int constraint_type);
36 /**
37  *  @brief  Add SHAPE reactivity data as soft constraints (Deigan et al. method)
38  *
39  *  This approach of SHAPE directed RNA folding uses the simple linear ansatz
40  *  @f[ \Delta G_{\text{SHAPE}}(i) = m \ln(\text{SHAPE reactivity}(i)+1)+ b @f]
41  *  to convert SHAPE reactivity values to pseudo energies whenever a
42  *  nucleotide @f$ i @f$ contributes to a stacked pair. A positive slope @f$ m @f$
43  *  penalizes high reactivities in paired regions, while a negative intercept @f$ b @f$
44  *  results in a confirmatory ``bonus'' free energy for correctly predicted base pairs.
45  *  Since the energy evaluation of a base pair stack involves two pairs, the pseudo
46  *  energies are added for all four contributing nucleotides. Consequently, the
47  *  energy term is applied twice for pairs inside a helix and only once for pairs
48  *  adjacent to other structures. For all other loop types the energy model remains
49  *  unchanged even when the experimental data highly disagrees with a certain motif.
50  *
51  *  @see  For further details, we refer to @cite deigan:2009.
52  *  @see  vrna_sc_remove(), vrna_sc_add_SHAPE_zarringhalam(), vrna_sc_minimize_pertubation()
53  *  @ingroup SHAPE_reactivities
54  *  @param  vc            The #vrna_fold_compound_t the soft constraints are associated with
55  *  @param  reactivities  A vector of normalized SHAPE reactivities
56  *  @param  m             The slope of the conversion function
57  *  @param  b             The intercept of the conversion function
58  *  @param  options       The options flag indicating how/where to store the soft constraints
59  *  @return               1 on successful extraction of the method, 0 on errors
60  */
61 int vrna_sc_add_SHAPE_deigan( vrna_fold_compound_t *vc,
62                               const double *reactivities,
63                               double m,
64                               double b,
65                               unsigned int options);
66 
67 /**
68  *  @brief  Add SHAPE reactivity data from files as soft constraints for consensus structure prediction (Deigan et al. method)
69  *
70  *  @ingroup SHAPE_reactivities
71  *  @param  vc            The #vrna_fold_compound_t the soft constraints are associated with
72  *  @param  shape_files   A set of filenames that contain normalized SHAPE reactivity data
73  *  @param  shape_file_association  An array of integers that associate the files with sequences in the alignment
74  *  @param  m             The slope of the conversion function
75  *  @param  b             The intercept of the conversion function
76  *  @param  options       The options flag indicating how/where to store the soft constraints
77  *  @return               1 on successful extraction of the method, 0 on errors
78  */
79 int vrna_sc_add_SHAPE_deigan_ali( vrna_fold_compound_t *vc,
80                                   const char **shape_files,
81                                   const int *shape_file_association,
82                                   double m,
83                                   double b,
84                                   unsigned int options);
85 
86 /**
87  *  @brief  Add SHAPE reactivity data as soft constraints (Zarringhalam et al. method)
88  *
89  *  This method first converts the observed SHAPE reactivity of nucleotide @f$ i @f$ into a
90  *  probability @f$ q_i @f$ that position @f$ i @f$ is unpaired by means of a non-linear map.
91  *  Then pseudo-energies of the form @f[ \Delta G_{\text{SHAPE}}(x,i) = \beta\ |x_i - q_i| @f]
92  *  are computed, where @f$ x_i=0 @f$ if position @f$ i @f$ is unpaired and @f$ x_i=1 @f$
93  *  if @f$ i @f$ is paired in a given secondary structure. The parameter @f$ \beta @f$ serves as
94  *  scaling factor. The magnitude of discrepancy between prediction and experimental observation
95  *  is represented by @f$ |x_i - q_i| @f$.
96  *
97  *  @see For further details, we refer to @cite zarringhalam:2012
98  *  @see  vrna_sc_remove(), vrna_sc_add_SHAPE_deigan(), vrna_sc_minimize_pertubation()
99  *  @ingroup SHAPE_reactivities
100  *  @param  vc                The #vrna_fold_compound_t the soft constraints are associated with
101  *  @param  reactivities      A vector of normalized SHAPE reactivities
102  *  @param  b                 The scaling factor @f$ \beta @f$ of the conversion function
103  *  @param  default_value     The default value for a nucleotide where reactivity data is missing for
104  *  @param  shape_conversion  A flag that specifies how to convert reactivities to probabilities
105  *  @param  options           The options flag indicating how/where to store the soft constraints
106  *  @return                   1 on successful extraction of the method, 0 on errors
107  */
108 int vrna_sc_add_SHAPE_zarringhalam( vrna_fold_compound_t *vc,
109                                     const double *reactivities,
110                                     double b,
111                                     double default_value,
112                                     const char *shape_conversion,
113                                     unsigned int options);
114 
115 /**
116  *  @brief  Parse a character string and extract the encoded SHAPE reactivity conversion
117  *          method and possibly the parameters for conversion into pseudo free energies
118  *
119  *  @ingroup soft_cosntraints
120  *
121  *  @param  method_string   The string that contains the encoded SHAPE reactivity conversion method
122  *  @param  method          A pointer to the memory location where the method character will be stored
123  *  @param  param_1         A pointer to the memory location where the first parameter of the corresponding method will be stored
124  *  @param  param_2         A pointer to the memory location where the second parameter of the corresponding method will be stored
125  *  @return                 1 on successful extraction of the method, 0 on errors
126  */
127 int vrna_sc_SHAPE_parse_method( const char *method_string,
128                                 char *method,
129                                 float *param_1,
130                                 float *param_2);
131 
132 /**
133  *  @brief Convert SHAPE reactivity values to probabilities for being unpaired
134  *
135  *  This function parses the informations from a given file and stores the result
136  *  in the preallocated string sequence and the #FLT_OR_DBL array values.
137  *
138  *  @ingroup SHAPE_reactivities
139  *
140  *  @see vrna_file_SHAPE_read()
141  *  @param shape_conversion String definining the method used for the conversion process
142  *  @param values           Pointer to an array of SHAPE reactivities
143  *  @param length           Length of the array of SHAPE reactivities
144  *  @param default_value    Result used for position with invalid/missing reactivity values
145  */
146 int vrna_sc_SHAPE_to_pr(const char *shape_conversion,
147                         double *values,
148                         int length,
149                         double default_value);
150 
151 #endif
152