1 #ifndef VIENNA_RNA_PACKAGE_MFE_H
2 #define VIENNA_RNA_PACKAGE_MFE_H
3 
4 #include <stdio.h>
5 #include <ViennaRNA/datastructures/basic.h>
6 #include <ViennaRNA/fold_compound.h>
7 
8 /**
9  *
10  *  @file mfe.h
11  *  @ingroup  mfe, mfe_global
12  *  @brief Compute Minimum Free energy (MFE) and backtrace corresponding secondary
13  *         structures from RNA sequence data.
14  *
15  *  This file includes (almost) all function declarations within the RNAlib that are related to
16  *  MFE folding...
17  */
18 
19 /**
20  *  @addtogroup mfe
21  *  @{
22  *  @brief  Predicting the Minimum Free Energy (MFE) and a corresponding (consensus) secondary structure
23  *
24  *  In a nutshell we provide two different flavors for MFE prediction:
25  *  * @ref mfe_global - to compute the MFE for the entire sequence
26  *  * @ref mfe_window - to compute MFEs for each window using a sliding window approach
27  *
28  *  Each of these flavors, again, provides two implementations to either compute the MFE based on
29  *  *  single RNA (DNA) sequence(s), or
30  *  *  a comparative approach using multiple sequence alignments (MSA).
31  *
32  *  For the latter, a consensus secondary structure is predicted and our implementations compute
33  *  an average of free energies for each sequence in the MSA plus an additional covariance
34  *  pseudo-energy term.
35  *
36  *  The implementations for @ref mfe_backtracking are generally agnostic with respect to whether
37  *  local or global structure prediction is in place.
38  *  @}
39  */
40 
41 
42 /**
43  *  @addtogroup  mfe_global
44  *  @{
45  *  @brief  Variations of the global Minimum Free Energy (MFE) prediction algorithm
46  *
47  *  We provide implementations of the global MFE prediction algorithm for
48  *  * Single sequences,
49  *  * Multiple sequence alignments (MSA), and
50  *  * RNA-RNA hybrids
51  */
52 
53 /**
54  *  @name Basic global MFE prediction interface
55  *  @{
56  */
57 
58 /**
59  *  @brief Compute minimum free energy and an appropriate secondary
60  *  structure of an RNA sequence, or RNA sequence alignment
61  *
62  *  Depending on the type of the provided #vrna_fold_compound_t, this function
63  *  predicts the MFE for a single sequence, or a corresponding averaged MFE for
64  *  a sequence alignment. If backtracking is activated, it also constructs the
65  *  corresponding secondary structure, or consensus structure.
66  *  Therefore, the second parameter, @a structure, has to point to an allocated
67  *  block of memory with a size of at least @f$\mathrm{strlen}(\mathrm{sequence})+1@f$ to
68  *  store the backtracked MFE structure. (For consensus structures, this is the length of
69  *  the alignment + 1. If @p NULL is passed, no backtracking will be performed.
70  *
71  *  @note This function is polymorphic. It accepts #vrna_fold_compound_t of type
72  *        #VRNA_FC_TYPE_SINGLE, and #VRNA_FC_TYPE_COMPARATIVE.
73  *
74  *  @see #vrna_fold_compound_t, vrna_fold_compound(), vrna_fold(), vrna_circfold(),
75  *        vrna_fold_compound_comparative(), vrna_alifold(), vrna_circalifold()
76  *
77  *  @param vc             fold compound
78  *  @param structure      A pointer to the character array where the
79  *                        secondary structure in dot-bracket notation will be written to (Maybe NULL)
80  *
81  *  @return the minimum free energy (MFE) in kcal/mol
82  */
83 float
84 vrna_mfe(vrna_fold_compound_t *vc,
85          char                 *structure);
86 
87 
88 /**
89  *  @brief Compute the minimum free energy of two interacting RNA molecules
90  *
91  *  The code is analog to the vrna_mfe() function.
92  *
93  *  @param    vc  fold compound
94  *  @param    structure Will hold the barcket dot structure of the dimer molecule
95  *  @return   minimum free energy of the structure
96  */
97 float
98 vrna_mfe_dimer(vrna_fold_compound_t *vc,
99                char                 *structure);
100 
101 
102 /**
103  * End basic MFE interface
104  * @}
105  */
106 
107 
108 /**
109  *  @name Simplified global MFE prediction using sequence(s) or multiple sequence alignment(s)
110  *  @{
111  */
112 
113 /**
114  *  @brief Compute Minimum Free Energy (MFE), and a corresponding secondary structure for an RNA sequence
115  *
116  *  This simplified interface to vrna_mfe() computes the MFE and, if required, a secondary structure for an
117  *  RNA sequence using default options. Memory required for dynamic programming (DP) matrices will
118  *  be allocated and free'd on-the-fly. Hence, after return of this function, the recursively filled
119  *  matrices are not available any more for any post-processing, e.g. suboptimal backtracking, etc.
120  *
121  *  @note In case you want to use the filled DP matrices for any subsequent post-processing step, or
122  *  you require other conditions than specified by the default model details, use vrna_mfe(),
123  *  and the data structure #vrna_fold_compound_t instead.
124  *
125  *  @see vrna_circfold(), vrna_mfe()
126  *
127  *  @param sequence   RNA sequence
128  *  @param structure  A pointer to the character array where the
129  *         secondary structure in dot-bracket notation will be written to
130  *  @return the minimum free energy (MFE) in kcal/mol
131  */
132 float
133 vrna_fold(const char  *sequence,
134           char        *structure);
135 
136 
137 /**
138  *  @brief Compute Minimum Free Energy (MFE), and a corresponding secondary structure for a circular RNA sequence
139  *
140  *  This simplified interface to vrna_mfe() computes the MFE and, if required, a secondary structure for a
141  *  circular RNA sequence using default options. Memory required for dynamic programming (DP) matrices will
142  *  be allocated and free'd on-the-fly. Hence, after return of this function, the recursively filled
143  *  matrices are not available any more for any post-processing, e.g. suboptimal backtracking, etc.
144  *
145  *  Folding of circular RNA sequences is handled as a post-processing step of the forward
146  *  recursions. See @cite hofacker:2006 for further details.
147  *
148  *  @note In case you want to use the filled DP matrices for any subsequent post-processing step, or
149  *  you require other conditions than specified by the default model details, use vrna_mfe(),
150  *  and the data structure #vrna_fold_compound_t instead.
151  *
152  *  @see vrna_fold(), vrna_mfe()
153  *
154  *  @param sequence   RNA sequence
155  *  @param structure  A pointer to the character array where the
156  *         secondary structure in dot-bracket notation will be written to
157  *  @return the minimum free energy (MFE) in kcal/mol
158  */
159 float
160 vrna_circfold(const char  *sequence,
161               char        *structure);
162 
163 
164 /**
165  *  @brief  Compute Minimum Free Energy (MFE), and a corresponding consensus secondary structure
166  *          for an RNA sequence alignment using a comparative method
167  *
168  *  This simplified interface to vrna_mfe() computes the MFE and, if required, a consensus secondary
169  *  structure for an RNA sequence alignment using default options. Memory required for dynamic programming
170  *  (DP) matrices will be allocated and free'd on-the-fly. Hence, after return of this function, the
171  *  recursively filled matrices are not available any more for any post-processing, e.g. suboptimal
172  *  backtracking, etc.
173  *
174  *  @note In case you want to use the filled DP matrices for any subsequent post-processing step, or
175  *  you require other conditions than specified by the default model details, use vrna_mfe(),
176  *  and the data structure #vrna_fold_compound_t instead.
177  *
178  *  @see vrna_circalifold(), vrna_mfe()
179  *
180  *  @param sequences  RNA sequence alignment
181  *  @param structure  A pointer to the character array where the
182  *         secondary structure in dot-bracket notation will be written to
183  *  @return the minimum free energy (MFE) in kcal/mol
184  */
185 float
186 vrna_alifold(const char **sequences,
187              char       *structure);
188 
189 
190 /**
191  *  @brief  Compute Minimum Free Energy (MFE), and a corresponding consensus secondary structure
192  *          for a sequence alignment of circular RNAs using a comparative method
193  *
194  *  This simplified interface to vrna_mfe() computes the MFE and, if required, a consensus secondary
195  *  structure for an RNA sequence alignment using default options. Memory required for dynamic programming
196  *  (DP) matrices will be allocated and free'd on-the-fly. Hence, after return of this function, the
197  *  recursively filled matrices are not available any more for any post-processing, e.g. suboptimal
198  *  backtracking, etc.
199  *
200  *  Folding of circular RNA sequences is handled as a post-processing step of the forward
201  *  recursions. See @cite hofacker:2006 for further details.
202  *
203  *  @note In case you want to use the filled DP matrices for any subsequent post-processing step, or
204  *  you require other conditions than specified by the default model details, use vrna_mfe(),
205  *  and the data structure #vrna_fold_compound_t instead.
206  *
207  *  @see vrna_alifold(), vrna_mfe()
208  *
209  *  @param sequences  Sequence alignment of circular RNAs
210  *  @param structure  A pointer to the character array where the
211  *         secondary structure in dot-bracket notation will be written to
212  *  @return the minimum free energy (MFE) in kcal/mol
213  */
214 float
215 vrna_circalifold(const char **sequences,
216                  char       *structure);
217 
218 
219 /**
220  *  @brief Compute Minimum Free Energy (MFE), and a corresponding secondary structure for two dimerized RNA sequences
221  *
222  *  This simplified interface to vrna_mfe() computes the MFE and, if required, a secondary structure for
223  *  two RNA sequences upon dimerization using default options. Memory required for dynamic programming
224  *  (DP) matrices will be allocated and free'd on-the-fly. Hence, after return of this function, the
225  *  recursively filled matrices are not available any more for any post-processing, e.g. suboptimal
226  *  backtracking, etc.
227  *
228  *  @note In case you want to use the filled DP matrices for any subsequent post-processing step, or
229  *  you require other conditions than specified by the default model details, use vrna_mfe(),
230  *  and the data structure #vrna_fold_compound_t instead.
231  *
232  *  @see vrna_mfe_dimer(), vrna_fold_compound(), #vrna_fold_compound_t, vrna_cut_point_insert()
233  *
234  *  @param sequence   two RNA sequences separated by the '&' character
235  *  @param structure  A pointer to the character array where the
236  *         secondary structure in dot-bracket notation will be written to
237  *  @return the minimum free energy (MFE) in kcal/mol
238  */
239 float
240 vrna_cofold(const char  *sequence,
241             char        *structure);
242 
243 
244 /**
245  * End simplified global MFE interface
246  * @}
247  */
248 
249 /**
250  * End group mfe_global
251  * @}
252  */
253 
254 /**
255  *  @addtogroup mfe_backtracking
256  *  @{
257  *  @brief   Backtracking related interfaces
258  */
259 
260 /**
261  *  @brief
262  */
263 int
264 vrna_backtrack_from_intervals(vrna_fold_compound_t  *vc,
265                               vrna_bp_stack_t       *bp_stack,
266                               sect                  bt_stack[],
267                               int                   s);
268 
269 
270 /**
271  *  @brief Backtrack an MFE (sub)structure
272  *
273  *  This function allows one to backtrack the MFE structure for a (sub)sequence
274  *
275  *  @note On error, the function returns #INF / 100. and stores the empty string
276  *        in @p structure.
277  *
278  *  @pre  Requires pre-filled MFE dynamic programming matrices, i.e. one has to call vrna_mfe()
279  *        prior to calling this function
280  *
281  *  @see vrna_mfe(), vrna_pbacktrack5()
282  *
283  *  @param fc             fold compound
284  *  @param length         The length of the subsequence, starting from the 5' end
285  *  @param structure      A pointer to the character array where the secondary structure in
286  *                        dot-bracket notation will be written to. (Must have size of at least $p length + 1)
287  *
288  *  @return               The minimum free energy (MFE) for the specified @p length in kcal/mol and
289  *                        a corresponding secondary structure in dot-bracket notation (stored in @p structure)
290  */
291 float
292 vrna_backtrack5(vrna_fold_compound_t  *fc,
293                 unsigned int          length,
294                 char                  *structure);
295 
296 int
297 vrna_backtrack_window(vrna_fold_compound_t  *fc,
298                       const char            *Lfold_filename,
299                       long                  file_pos,
300                       char                  **structure,
301                       double                mfe);
302 
303 /**
304  * End backtracking related interfaces
305  * @}
306  */
307 
308 
309 #endif
310