1 #ifndef VIENNA_RNA_PACKAGE_UNSTRUCTURED_DOMAIN_H 2 #define VIENNA_RNA_PACKAGE_UNSTRUCTURED_DOMAIN_H 3 4 /** 5 * @file unstructured_domains.h 6 * @ingroup domains_up 7 * @brief Functions to modify unstructured domains, e.g. to incorporate ligands binding to unpaired stretches 8 */ 9 10 /** 11 * @addtogroup domains_up 12 * 13 * @brief Add and modify unstructured domains to the RNA folding grammar 14 * 15 * This module provides the tools to add and modify unstructured domains to the production rules of the RNA folding grammar. 16 * Usually this functionality is utilized for incorporating ligand binding to unpaired stretches of an RNA. 17 * 18 * @bug Although the additional production rule(s) for unstructured domains as descibed in @ref sec_domains_up 19 * are always treated as 'segments possibly bound to one or more ligands', the current implementation requires 20 * that at least one ligand is bound. The default implementation already takes care of the required changes, 21 * however, upon using callback functions other than the default ones, one has to take care of this fact. 22 * Please also note, that this behavior might change in one of the next releases, such that the decomposition 23 * schemes as shown above comply with the actual implementation. 24 * 25 * A default implementation allows one to readily use this feature by simply adding sequence motifs and corresponding 26 * binding free energies with the function vrna_ud_add_motif() (see also @ref ligands_up). 27 * 28 * The grammar extension is realized using a callback function that 29 * - evaluates the binding free energy of a ligand to its target sequence segment (white boxes in the figures above), or 30 * - returns the free energy of an unpaired stretch possibly bound by a ligand, stored in the additional @em U DP matrix. 31 * 32 * The callback is passed the segment positions, the loop context, and which of the two above mentioned 33 * evaluations are required. A second callback implements the pre-processing step that 34 * prepares the @em U DP matrix by evaluating all possible cases of the additional production rule. 35 * Both callbacks have a default implementation in @em RNAlib, but may be over-written by a 36 * user-implementation, making it fully user-customizable. 37 * 38 * For equilibrium probability computations, two additional callbacks exist. One to store/add and one to retrieve the 39 * probability of unstructured domains at particular positions. Our implementation already takes care of computing 40 * the probabilities, but users of the unstructured domain feature are required to provide a mechanism to efficiently 41 * store/add the corresponding values into some external data structure. 42 */ 43 44 45 /** 46 * @addtogroup ligands_up 47 * 48 * @brief Add ligand binding to loop regions using the @ref domains_up feature 49 * 50 * Sometime, certain ligands, like single strand binding (SSB) proteins, compete with intramolecular 51 * base pairing of the RNA. In situations, where the dissociation constant of the ligand is known and 52 * the ligand binds to a consecutive stretch of single-stranded nucleotides we can use the @ref domains_up 53 * functionality to extend the RNA folding grammar. This module provides a convenience default implementation 54 * that covers most of the application scenarios. 55 * 56 * The function vrna_ud_add_motif() attaches a ligands sequence motif and corresponding binding free energy 57 * to the list of known ligand motifs within a #vrna_fold_compound_t.domains_up attribute. The first call to 58 * this function initializes the @ref domains_up feature with our default implementation. Subsequent calls of 59 * secondary structure predciction algorithms with the modified #vrna_fold_compound_t then directly include 60 * the competition of the ligand with regules base pairing. Since we utilize the unstructured domain extension, 61 * The ligand binding model can be removed again using the vrna_ud_remove() function. 62 * 63 */ 64 65 66 /** @brief Typename for the ligand binding extension data structure #vrna_unstructured_domain_s 67 * @ingroup domains_up 68 */ 69 typedef struct vrna_unstructured_domain_s vrna_ud_t; 70 71 typedef struct vrna_unstructured_domain_motif_s vrna_ud_motif_t; 72 73 #include <ViennaRNA/datastructures/basic.h> 74 #include <ViennaRNA/fold_compound.h> 75 #include <ViennaRNA/utils/structures.h> 76 77 /** 78 * @brief Callback to retrieve binding free energy of a ligand bound to an unpaired sequence segment 79 * 80 * @ingroup domains_up 81 * 82 * @callback 83 * @parblock 84 * This function will be called to determine the additional energy contribution of a specific unstructured 85 * domain, e.g. the binding free energy of some ligand. 86 * @endparblock 87 * 88 * @param vc The current #vrna_fold_compound_t 89 * @param i The start of the unstructured domain (5' end) 90 * @param j The end of the unstructured domain (3' end) 91 * @param loop_type The loop context of the unstructured domain 92 * @param data Auxiliary data 93 * @return The auxiliary energy contribution in deka-cal/mol 94 */ 95 typedef int (vrna_callback_ud_energy)(vrna_fold_compound_t *vc, 96 int i, 97 int j, 98 unsigned int loop_type, 99 void *data); 100 101 /** 102 * @brief Callback to retrieve Boltzmann factor of the binding free energy of a ligand bound to an unpaired sequence segment 103 * @ingroup domains_up 104 * 105 * @callback 106 * @parblock 107 * This function will be called to determine the additional energy contribution of a specific unstructured 108 * domain, e.g. the binding free energy of some ligand (Partition function variant, i.e. the Boltzmann factors 109 * instead of actual free energies). 110 * @endparblock 111 * 112 * @param vc The current #vrna_fold_compound_t 113 * @param i The start of the unstructured domain (5' end) 114 * @param j The end of the unstructured domain (3' end) 115 * @param loop_type The loop context of the unstructured domain 116 * @param data Auxiliary data 117 * @return The auxiliary energy contribution as Boltzmann factor 118 */ 119 typedef FLT_OR_DBL (vrna_callback_ud_exp_energy)(vrna_fold_compound_t *vc, 120 int i, 121 int j, 122 unsigned int loop_type, 123 void *data); 124 125 /** 126 * @brief Callback for pre-processing the production rule of the ligand binding to unpaired stretches feature 127 * 128 * @ingroup domains_up 129 * 130 * @callback 131 * @parblock 132 * The production rule for the unstructured domain grammar extension 133 * @endparblock 134 */ 135 typedef void (vrna_callback_ud_production)(vrna_fold_compound_t *vc, 136 void *data); 137 138 /** 139 * @brief Callback for pre-processing the production rule of the ligand binding to unpaired stretches feature (partition function variant) 140 * 141 * @ingroup domains_up 142 * 143 * @callback 144 * @parblock 145 * The production rule for the unstructured domain grammar extension (Partition function variant) 146 * @endparblock 147 */ 148 typedef void (vrna_callback_ud_exp_production)(vrna_fold_compound_t *vc, 149 void *data); 150 151 152 /** 153 * @brief Callback to store/add equilibrium probability for a ligand bound to an unpaired sequence segment 154 * @ingroup domains_up 155 * 156 * @callback 157 * @parblock 158 * A callback function to store equilibrium probabilities for the unstructured domain feature 159 * @endparblock 160 */ 161 typedef void (vrna_callback_ud_probs_add)(vrna_fold_compound_t *vc, 162 int i, 163 int j, 164 unsigned int loop_type, 165 FLT_OR_DBL exp_energy, 166 void *data); 167 168 /** 169 * @brief Callback to retrieve equilibrium probability for a ligand bound to an unpaired sequence segment 170 * @ingroup domains_up 171 * 172 * @callback 173 * @parblock 174 * A callback function to retrieve equilibrium probabilities for the unstructured domain feature 175 * @endparblock 176 */ 177 typedef FLT_OR_DBL (vrna_callback_ud_probs_get)(vrna_fold_compound_t *vc, 178 int i, 179 int j, 180 unsigned int loop_type, 181 int motif, 182 void *data); 183 184 185 /** 186 * @brief Flag to indicate ligand bound to unpiared stretch in the exterior loop 187 * @ingroup domains_up 188 */ 189 #define VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP 1U 190 191 /** 192 * @brief Flag to indicate ligand bound to unpaired stretch in a hairpin loop 193 * @ingroup domains_up 194 */ 195 #define VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP 2U 196 197 /** 198 * @brief Flag to indicate ligand bound to unpiared stretch in an interior loop 199 * @ingroup domains_up 200 */ 201 #define VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP 4U 202 203 /** 204 * @brief Flag to indicate ligand bound to unpiared stretch in a multibranch loop 205 * @ingroup domains_up 206 */ 207 #define VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP 8U 208 209 /** 210 * @brief Flag to indicate ligand binding without additional unbound nucleotides (motif-only) 211 * @ingroup domains_up 212 */ 213 #define VRNA_UNSTRUCTURED_DOMAIN_MOTIF 16U 214 215 /** 216 * @brief Flag to indicate ligand bound to unpiared stretch in any loop (convenience macro) 217 * @ingroup domains_up 218 */ 219 #define VRNA_UNSTRUCTURED_DOMAIN_ALL_LOOPS (VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP | \ 220 VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP | \ 221 VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP | \ 222 VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP) 223 224 /** 225 * @brief Data structure to store all functionality for ligand binding 226 * @ingroup domains_up 227 */ 228 struct vrna_unstructured_domain_s { 229 /* 230 ********************************** 231 * Keep track of all motifs added 232 ********************************** 233 */ 234 int uniq_motif_count; /**< @brief The unique number of motifs of different lengths */ 235 unsigned int *uniq_motif_size; /**< @brief An array storing a unique list of motif lengths */ 236 237 int motif_count; /**< @brief Total number of distinguished motifs */ 238 char **motif; /**< @brief Motif sequences */ 239 char **motif_name; /**< @brief Motif identifier/name */ 240 unsigned int *motif_size; /**< @brief Motif lengths */ 241 double *motif_en; /**< @brief Ligand binding free energy contribution */ 242 unsigned int *motif_type; /**< @brief Type of motif, i.e. loop type the ligand binds to */ 243 244 /* 245 ********************************** 246 * Grammar extension for ligand 247 * binding 248 ********************************** 249 */ 250 vrna_callback_ud_production *prod_cb; /**< @brief Callback to ligand binding production rule, i.e. create/fill DP free energy matrices 251 * @details This callback will be executed right before the actual secondary structure decompositions, 252 * and, therefore, any implementation must not interleave with the regular DP matrices. 253 */ 254 vrna_callback_ud_exp_production *exp_prod_cb; /**< @brief Callback to ligand binding production rule, i.e. create/fill DP partition function matrices */ 255 vrna_callback_ud_energy *energy_cb; /**< @brief Callback to evaluate free energy of ligand binding to a particular unpaired stretch */ 256 vrna_callback_ud_exp_energy *exp_energy_cb; /**< @brief Callback to evaluate Boltzmann factor of ligand binding to a particular unpaired stretch */ 257 void *data; /**< @brief Auxiliary data structure passed to energy evaluation callbacks */ 258 vrna_callback_free_auxdata *free_data; /**< @brief Callback to free auxiliary data structure */ 259 vrna_callback_ud_probs_add *probs_add; /**< @brief Callback to store/add outside partition function */ 260 vrna_callback_ud_probs_get *probs_get; /**< @brief Callback to retrieve outside partition function */ 261 }; 262 263 264 struct vrna_unstructured_domain_motif_s { 265 int start; 266 int number; 267 }; 268 269 270 /** 271 * @brief Detect unstructured domains in centroid structure 272 * 273 * Given a centroid structure and a set of unstructured domains compute 274 * the list of unstructured domain motifs present in the centroid. 275 * Since we do not explicitly annotate unstructured domain motifs in 276 * dot-bracket strings, this function can be used to check for the 277 * presence and location of unstructured domain motifs under the 278 * assumption that the dot-bracket string is the centroid structure 279 * of the equiibrium ensemble. 280 * 281 * @see vrna_centroid() 282 * @ingroup domains_up 283 * 284 * @param fc The fold_compound data structure with pre-computed equilibrium probabilities and model settings 285 * @param structure The centroid structure in dot-bracket notation 286 * @return A list of unstructured domain motifs (possibly NULL). The last element terminates the list with 287 * @p start=0, @p number=-1 288 */ 289 vrna_ud_motif_t * 290 vrna_ud_motifs_centroid(vrna_fold_compound_t *fc, 291 const char *structure); 292 293 294 /** 295 * @brief Detect unstructured domains in MEA structure 296 * 297 * Given an MEA structure and a set of unstructured domains compute 298 * the list of unstructured domain motifs present in the MEA structure. 299 * Since we do not explicitly annotate unstructured domain motifs in 300 * dot-bracket strings, this function can be used to check for the 301 * presence and location of unstructured domain motifs under the 302 * assumption that the dot-bracket string is the MEA structure 303 * of the equiibrium ensemble. 304 * 305 * @see MEA() 306 * @ingroup domains_up 307 * 308 * @param fc The fold_compound data structure with pre-computed equilibrium probabilities and model settings 309 * @param structure The MEA structure in dot-bracket notation 310 * @param probability_list The list of probabilities to extract the MEA structure from 311 * @return A list of unstructured domain motifs (possibly NULL). The last element terminates the list 312 * with @p start=0, @p number=-1 313 */ 314 vrna_ud_motif_t * 315 vrna_ud_motifs_MEA(vrna_fold_compound_t *fc, 316 const char *structure, 317 vrna_ep_t *probability_list); 318 319 320 /** 321 * @brief Detect unstructured domains in MFE structure 322 * 323 * Given an MFE structure and a set of unstructured domains compute 324 * the list of unstructured domain motifs present in the MFE structure. 325 * Since we do not explicitly annotate unstructured domain motifs in 326 * dot-bracket strings, this function can be used to check for the 327 * presence and location of unstructured domain motifs under the 328 * assumption that the dot-bracket string is the MFE structure 329 * of the equiibrium ensemble. 330 * 331 * @see vrna_mfe() 332 * @ingroup domains_up 333 * 334 * @param fc The fold_compound data structure with model settings 335 * @param structure The MFE structure in dot-bracket notation 336 * @return A list of unstructured domain motifs (possibly NULL). The last element terminates the list with @p start=0, @p number=-1 337 */ 338 vrna_ud_motif_t * 339 vrna_ud_motifs_MFE(vrna_fold_compound_t *fc, 340 const char *structure); 341 342 343 /** 344 * @brief Add an unstructured domain motif, e.g. for ligand binding 345 * 346 * This function adds a ligand binding motif and the associated binding free energy 347 * to the #vrna_ud_t attribute of a #vrna_fold_compound_t. The motif data 348 * will then be used in subsequent secondary structure predictions. Multiple calls 349 * to this function with different motifs append all additional data to a list of 350 * ligands, which all will be evaluated. Ligand motif data can be removed from the 351 * #vrna_fold_compound_t again using the vrna_ud_remove() function. The loop 352 * type parameter allows one to limit the ligand binding to particular loop type, 353 * such as the exterior loop, hairpin loops, interior loops, or multibranch loops. 354 * 355 * @see #VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP, #VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP, 356 * #VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP, #VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP, #VRNA_UNSTRUCTURED_DOMAIN_ALL_LOOPS, 357 * vrna_ud_remove() 358 * 359 * @ingroup domains_up 360 * 361 * @param vc The #vrna_fold_compound_t data structure the ligand motif should be bound to 362 * @param motif The sequence motif the ligand binds to 363 * @param motif_en The binding free energy of the ligand in kcal/mol 364 * @param motif_name The name/id of the motif (may be @p NULL) 365 * @param loop_type The loop type the ligand binds to 366 * 367 */ 368 void vrna_ud_add_motif(vrna_fold_compound_t *vc, 369 const char *motif, 370 double motif_en, 371 const char *motif_name, 372 unsigned int loop_type); 373 374 375 /** 376 * @brief Get a list of unique motif sizes that start at a certain position within the sequence 377 * 378 */ 379 int *vrna_ud_get_motif_size_at(vrna_fold_compound_t *vc, 380 int i, 381 unsigned int loop_type); 382 383 384 int * 385 vrna_ud_get_motifs_at(vrna_fold_compound_t *vc, 386 int i, 387 unsigned int loop_type); 388 389 390 vrna_ud_motif_t * 391 vrna_ud_detect_motifs(vrna_fold_compound_t *vc, 392 const char *structure); 393 394 395 /** 396 * @brief Remove ligand binding to unpaired stretches 397 * 398 * This function removes all ligand motifs that were bound to a #vrna_fold_compound_t using 399 * the vrna_ud_add_motif() function. 400 * 401 * @ingroup domains_up 402 * 403 * @param vc The #vrna_fold_compound_t data structure the ligand motif data should be removed from 404 */ 405 void vrna_ud_remove(vrna_fold_compound_t *vc); 406 407 408 /** 409 * @brief Attach an auxiliary data structure 410 * 411 * This function binds an arbitrary, auxiliary data structure for user-implemented ligand binding. 412 * The optional callback @p free_cb will be passed the bound data structure whenever the #vrna_fold_compound_t 413 * is removed from memory to avoid memory leaks. 414 * 415 * @see vrna_ud_set_prod_rule_cb(), vrna_ud_set_exp_prod_rule_cb(), 416 * vrna_ud_remove() 417 * 418 * @ingroup domains_up 419 * 420 * @param vc The #vrna_fold_compound_t data structure the auxiliary data structure should be bound to 421 * @param data A pointer to the auxiliary data structure 422 * @param free_cb A pointer to a callback function that free's memory occupied by @p data 423 */ 424 void vrna_ud_set_data(vrna_fold_compound_t *vc, 425 void *data, 426 vrna_callback_free_auxdata *free_cb); 427 428 429 /** 430 * @brief Attach production rule callbacks for free energies computations 431 * 432 * Use this function to bind a user-implemented grammar extension for unstructured 433 * domains. 434 * 435 * The callback @p e_cb needs to evaluate the free energy contribution @f$f(i,j)@f$ of 436 * the unpaired segment @f$[i,j]@f$. It will be executed in each of the regular secondary 437 * structure production rules. Whenever the callback is passed the #VRNA_UNSTRUCTURED_DOMAIN_MOTIF 438 * flag via its @p loop_type parameter the contribution of any ligand that consecutively 439 * binds from position @f$i@f$ to @f$j@f$ (the white box) is requested. Otherwise, the callback 440 * usually performs a lookup in the precomputed @p B matrices. Which @p B matrix is 441 * addressed will be indicated by the flags #VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP, #VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP 442 * #VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP, and #VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP. As their names already imply, 443 * they specify exterior loops (@p F production rule), hairpin loops and interior loops 444 * (@p C production rule), and multibranch loops (@p M and @p M1 production rule). 445 * 446 * @image html ligands_up_callback.svg 447 * @image latex ligands_up_callback.eps 448 * 449 * The @p pre_cb callback will be executed as a pre-processing step right before the 450 * regular secondary structure rules. Usually one would use this callback to fill the 451 * dynamic programming matrices @p U and preparations of the auxiliary data structure 452 * #vrna_unstructured_domain_s.data 453 * 454 * @image html B_prod_rule.svg 455 * @image latex B_prod_rule.eps 456 * 457 * @ingroup domains_up 458 * 459 * @param vc The #vrna_fold_compound_t data structure the callback will be bound to 460 * @param pre_cb A pointer to a callback function for the @p B production rule 461 * @param e_cb A pointer to a callback function for free energy evaluation 462 */ 463 void vrna_ud_set_prod_rule_cb(vrna_fold_compound_t *vc, 464 vrna_callback_ud_production *pre_cb, 465 vrna_callback_ud_energy *e_cb); 466 467 468 /** 469 * @brief Attach production rule for partition function 470 * 471 * This function is the partition function companion of vrna_ud_set_prod_rule_cb(). 472 * 473 * Use it to bind callbacks to (i) fill the @p U production rule dynamic programming 474 * matrices and/or prepare the #vrna_unstructured_domain_s.data, and (ii) provide a callback 475 * to retrieve partition functions for subsegments @f$ [i,j] @f$. 476 * 477 * @image html B_prod_rule.svg 478 * @image latex B_prod_rule.eps 479 * 480 * @image html ligands_up_callback.svg 481 * @image latex ligands_up_callback.eps 482 * 483 * @ingroup domains_up 484 * 485 * @see vrna_ud_set_prod_rule_cb() 486 * 487 * @param vc The #vrna_fold_compound_t data structure the callback will be bound to 488 * @param pre_cb A pointer to a callback function for the @p B production rule 489 * @param exp_e_cb A pointer to a callback function that retrieves the partition function 490 * for a segment @f$[i,j]@f$ that may be bound by one or more ligands. 491 */ 492 void vrna_ud_set_exp_prod_rule_cb(vrna_fold_compound_t *vc, 493 vrna_callback_ud_exp_production *pre_cb, 494 vrna_callback_ud_exp_energy *exp_e_cb); 495 496 497 void vrna_ud_set_prob_cb(vrna_fold_compound_t *vc, 498 vrna_callback_ud_probs_add *setter, 499 vrna_callback_ud_probs_get *getter); 500 501 502 #endif 503