1 #ifndef VIENNA_RNA_PACKAGE_MFE_H 2 #define VIENNA_RNA_PACKAGE_MFE_H 3 4 #include <stdio.h> 5 #include <ViennaRNA/datastructures/basic.h> 6 #include <ViennaRNA/fold_compound.h> 7 8 /** 9 * 10 * @file mfe.h 11 * @ingroup mfe, mfe_global 12 * @brief Compute Minimum Free energy (MFE) and backtrace corresponding secondary 13 * structures from RNA sequence data. 14 * 15 * This file includes (almost) all function declarations within the RNAlib that are related to 16 * MFE folding... 17 */ 18 19 /** 20 * @addtogroup mfe 21 * @{ 22 * @brief Predicting the Minimum Free Energy (MFE) and a corresponding (consensus) secondary structure 23 * 24 * In a nutshell we provide two different flavors for MFE prediction: 25 * * @ref mfe_global - to compute the MFE for the entire sequence 26 * * @ref mfe_window - to compute MFEs for each window using a sliding window approach 27 * 28 * Each of these flavors, again, provides two implementations to either compute the MFE based on 29 * * single RNA (DNA) sequence(s), or 30 * * a comparative approach using multiple sequence alignments (MSA). 31 * 32 * For the latter, a consensus secondary structure is predicted and our implementations compute 33 * an average of free energies for each sequence in the MSA plus an additional covariance 34 * pseudo-energy term. 35 * 36 * The implementations for @ref mfe_backtracking are generally agnostic with respect to whether 37 * local or global structure prediction is in place. 38 * @} 39 */ 40 41 42 /** 43 * @addtogroup mfe_global 44 * @{ 45 * @brief Variations of the global Minimum Free Energy (MFE) prediction algorithm 46 * 47 * We provide implementations of the global MFE prediction algorithm for 48 * * Single sequences, 49 * * Multiple sequence alignments (MSA), and 50 * * RNA-RNA hybrids 51 */ 52 53 /** 54 * @name Basic global MFE prediction interface 55 * @{ 56 */ 57 58 /** 59 * @brief Compute minimum free energy and an appropriate secondary 60 * structure of an RNA sequence, or RNA sequence alignment 61 * 62 * Depending on the type of the provided #vrna_fold_compound_t, this function 63 * predicts the MFE for a single sequence, or a corresponding averaged MFE for 64 * a sequence alignment. If backtracking is activated, it also constructs the 65 * corresponding secondary structure, or consensus structure. 66 * Therefore, the second parameter, @a structure, has to point to an allocated 67 * block of memory with a size of at least @f$\mathrm{strlen}(\mathrm{sequence})+1@f$ to 68 * store the backtracked MFE structure. (For consensus structures, this is the length of 69 * the alignment + 1. If @p NULL is passed, no backtracking will be performed. 70 * 71 * @note This function is polymorphic. It accepts #vrna_fold_compound_t of type 72 * #VRNA_FC_TYPE_SINGLE, and #VRNA_FC_TYPE_COMPARATIVE. 73 * 74 * @see #vrna_fold_compound_t, vrna_fold_compound(), vrna_fold(), vrna_circfold(), 75 * vrna_fold_compound_comparative(), vrna_alifold(), vrna_circalifold() 76 * 77 * @param vc fold compound 78 * @param structure A pointer to the character array where the 79 * secondary structure in dot-bracket notation will be written to (Maybe NULL) 80 * 81 * @return the minimum free energy (MFE) in kcal/mol 82 */ 83 float 84 vrna_mfe(vrna_fold_compound_t *vc, 85 char *structure); 86 87 88 /** 89 * @brief Compute the minimum free energy of two interacting RNA molecules 90 * 91 * The code is analog to the vrna_mfe() function. 92 * 93 * @param vc fold compound 94 * @param structure Will hold the barcket dot structure of the dimer molecule 95 * @return minimum free energy of the structure 96 */ 97 float 98 vrna_mfe_dimer(vrna_fold_compound_t *vc, 99 char *structure); 100 101 102 /** 103 * End basic MFE interface 104 * @} 105 */ 106 107 108 /** 109 * @name Simplified global MFE prediction using sequence(s) or multiple sequence alignment(s) 110 * @{ 111 */ 112 113 /** 114 * @brief Compute Minimum Free Energy (MFE), and a corresponding secondary structure for an RNA sequence 115 * 116 * This simplified interface to vrna_mfe() computes the MFE and, if required, a secondary structure for an 117 * RNA sequence using default options. Memory required for dynamic programming (DP) matrices will 118 * be allocated and free'd on-the-fly. Hence, after return of this function, the recursively filled 119 * matrices are not available any more for any post-processing, e.g. suboptimal backtracking, etc. 120 * 121 * @note In case you want to use the filled DP matrices for any subsequent post-processing step, or 122 * you require other conditions than specified by the default model details, use vrna_mfe(), 123 * and the data structure #vrna_fold_compound_t instead. 124 * 125 * @see vrna_circfold(), vrna_mfe() 126 * 127 * @param sequence RNA sequence 128 * @param structure A pointer to the character array where the 129 * secondary structure in dot-bracket notation will be written to 130 * @return the minimum free energy (MFE) in kcal/mol 131 */ 132 float 133 vrna_fold(const char *sequence, 134 char *structure); 135 136 137 /** 138 * @brief Compute Minimum Free Energy (MFE), and a corresponding secondary structure for a circular RNA sequence 139 * 140 * This simplified interface to vrna_mfe() computes the MFE and, if required, a secondary structure for a 141 * circular RNA sequence using default options. Memory required for dynamic programming (DP) matrices will 142 * be allocated and free'd on-the-fly. Hence, after return of this function, the recursively filled 143 * matrices are not available any more for any post-processing, e.g. suboptimal backtracking, etc. 144 * 145 * Folding of circular RNA sequences is handled as a post-processing step of the forward 146 * recursions. See @cite hofacker:2006 for further details. 147 * 148 * @note In case you want to use the filled DP matrices for any subsequent post-processing step, or 149 * you require other conditions than specified by the default model details, use vrna_mfe(), 150 * and the data structure #vrna_fold_compound_t instead. 151 * 152 * @see vrna_fold(), vrna_mfe() 153 * 154 * @param sequence RNA sequence 155 * @param structure A pointer to the character array where the 156 * secondary structure in dot-bracket notation will be written to 157 * @return the minimum free energy (MFE) in kcal/mol 158 */ 159 float 160 vrna_circfold(const char *sequence, 161 char *structure); 162 163 164 /** 165 * @brief Compute Minimum Free Energy (MFE), and a corresponding consensus secondary structure 166 * for an RNA sequence alignment using a comparative method 167 * 168 * This simplified interface to vrna_mfe() computes the MFE and, if required, a consensus secondary 169 * structure for an RNA sequence alignment using default options. Memory required for dynamic programming 170 * (DP) matrices will be allocated and free'd on-the-fly. Hence, after return of this function, the 171 * recursively filled matrices are not available any more for any post-processing, e.g. suboptimal 172 * backtracking, etc. 173 * 174 * @note In case you want to use the filled DP matrices for any subsequent post-processing step, or 175 * you require other conditions than specified by the default model details, use vrna_mfe(), 176 * and the data structure #vrna_fold_compound_t instead. 177 * 178 * @see vrna_circalifold(), vrna_mfe() 179 * 180 * @param sequences RNA sequence alignment 181 * @param structure A pointer to the character array where the 182 * secondary structure in dot-bracket notation will be written to 183 * @return the minimum free energy (MFE) in kcal/mol 184 */ 185 float 186 vrna_alifold(const char **sequences, 187 char *structure); 188 189 190 /** 191 * @brief Compute Minimum Free Energy (MFE), and a corresponding consensus secondary structure 192 * for a sequence alignment of circular RNAs using a comparative method 193 * 194 * This simplified interface to vrna_mfe() computes the MFE and, if required, a consensus secondary 195 * structure for an RNA sequence alignment using default options. Memory required for dynamic programming 196 * (DP) matrices will be allocated and free'd on-the-fly. Hence, after return of this function, the 197 * recursively filled matrices are not available any more for any post-processing, e.g. suboptimal 198 * backtracking, etc. 199 * 200 * Folding of circular RNA sequences is handled as a post-processing step of the forward 201 * recursions. See @cite hofacker:2006 for further details. 202 * 203 * @note In case you want to use the filled DP matrices for any subsequent post-processing step, or 204 * you require other conditions than specified by the default model details, use vrna_mfe(), 205 * and the data structure #vrna_fold_compound_t instead. 206 * 207 * @see vrna_alifold(), vrna_mfe() 208 * 209 * @param sequences Sequence alignment of circular RNAs 210 * @param structure A pointer to the character array where the 211 * secondary structure in dot-bracket notation will be written to 212 * @return the minimum free energy (MFE) in kcal/mol 213 */ 214 float 215 vrna_circalifold(const char **sequences, 216 char *structure); 217 218 219 /** 220 * @brief Compute Minimum Free Energy (MFE), and a corresponding secondary structure for two dimerized RNA sequences 221 * 222 * This simplified interface to vrna_mfe() computes the MFE and, if required, a secondary structure for 223 * two RNA sequences upon dimerization using default options. Memory required for dynamic programming 224 * (DP) matrices will be allocated and free'd on-the-fly. Hence, after return of this function, the 225 * recursively filled matrices are not available any more for any post-processing, e.g. suboptimal 226 * backtracking, etc. 227 * 228 * @note In case you want to use the filled DP matrices for any subsequent post-processing step, or 229 * you require other conditions than specified by the default model details, use vrna_mfe(), 230 * and the data structure #vrna_fold_compound_t instead. 231 * 232 * @see vrna_mfe_dimer(), vrna_fold_compound(), #vrna_fold_compound_t, vrna_cut_point_insert() 233 * 234 * @param sequence two RNA sequences separated by the '&' character 235 * @param structure A pointer to the character array where the 236 * secondary structure in dot-bracket notation will be written to 237 * @return the minimum free energy (MFE) in kcal/mol 238 */ 239 float 240 vrna_cofold(const char *sequence, 241 char *structure); 242 243 244 /** 245 * End simplified global MFE interface 246 * @} 247 */ 248 249 /** 250 * End group mfe_global 251 * @} 252 */ 253 254 /** 255 * @addtogroup mfe_backtracking 256 * @{ 257 * @brief Backtracking related interfaces 258 */ 259 260 /** 261 * @brief 262 */ 263 int 264 vrna_backtrack_from_intervals(vrna_fold_compound_t *vc, 265 vrna_bp_stack_t *bp_stack, 266 sect bt_stack[], 267 int s); 268 269 270 /** 271 * @brief Backtrack an MFE (sub)structure 272 * 273 * This function allows one to backtrack the MFE structure for a (sub)sequence 274 * 275 * @note On error, the function returns #INF / 100. and stores the empty string 276 * in @p structure. 277 * 278 * @pre Requires pre-filled MFE dynamic programming matrices, i.e. one has to call vrna_mfe() 279 * prior to calling this function 280 * 281 * @see vrna_mfe(), vrna_pbacktrack5() 282 * 283 * @param fc fold compound 284 * @param length The length of the subsequence, starting from the 5' end 285 * @param structure A pointer to the character array where the secondary structure in 286 * dot-bracket notation will be written to. (Must have size of at least $p length + 1) 287 * 288 * @return The minimum free energy (MFE) for the specified @p length in kcal/mol and 289 * a corresponding secondary structure in dot-bracket notation (stored in @p structure) 290 */ 291 float 292 vrna_backtrack5(vrna_fold_compound_t *fc, 293 unsigned int length, 294 char *structure); 295 296 int 297 vrna_backtrack_window(vrna_fold_compound_t *fc, 298 const char *Lfold_filename, 299 long file_pos, 300 char **structure, 301 double mfe); 302 303 /** 304 * End backtracking related interfaces 305 * @} 306 */ 307 308 309 #endif 310