1 #ifndef VIENNA_RNA_PACKAGE_NEIGHBOR_H
2 #define VIENNA_RNA_PACKAGE_NEIGHBOR_H
3 
4 /**
5  *  @file     ViennaRNA/landscape/neighbor.h
6  *  @ingroup  neighbors
7  *  @brief    Methods to compute the neighbors of an RNA secondary structure.
8  */
9 
10 /**
11  *  @addtogroup neighbors
12  *  @{
13  *
14  *  @brief Different functions to generate structural neighbors of a secondary structure according to a particular Move Set.
15  *
16  *  This module contains methods to compute the neighbors of an RNA secondary structure. Neighbors of a given structure
17  *  are all structures that differ in exactly one base pair. That means one can insert an delete base pairs in the
18  *  given structure. These insertions and deletions of base pairs are usually called moves. A third move which is
19  *  considered in these methods is a shift move. A shifted base pair has one stable position and one position that
20  *  changes. These moves are encoded as follows: \n
21  *  - insertion: (i, j) where i,j > 0 \n
22  *  - deletion: (i, j) where i,j < 0 \n
23  *  shift: (i, j) where either i > 0, j < 0 or i < 0, j > 0 \n
24  *
25  *  The negative position of a shift indicates the position that has changed.
26  *
27  *  \code
28  *  Example:
29  *           We have given a sequence and a structure.
30  *           Sequence  AAGGAAACC
31  *           Structure ..(.....)
32  *           Indices   123456789
33  *
34  *           The given base pair is (3,9) and the neighbors are the insertion (4, 8), the deletion (-3,-9), the shift (3,-8)
35  *           and the shift (-4, 9).
36  *           This leads to the neighbored structures:
37  *           ...(....)
38  *           .........
39  *           ...(...).
40  *           ....(...)
41  *  \endcode
42  *
43  *  A simple method to construct all insertions is to iterate over the positions of a sequence twice. The first
44  *  iteration has the index i in [1, sequence length], the second iteration has the index j in [i+1, sequence length].
45  *  All pairs (i,j) with compatible letters and which are non-crossing with present base pairs are valid neighbored
46  *  insertion moves.
47  *  Valid deletion moves are all present base pairs with negative sign.
48  *  Valid shift moves are constructed by taking all paired positions as fix position of a shift move and iterating over
49  *  all positions of the sequence. If the letters of a position are compatible and if it the move is non-crossing with
50  *  existing base pairs, we have a valid shift move.
51  *  The method of generating shift moves can be accelerated by skipping neighbored base pairs.
52  *
53  *  If we need to construct all neighbors several times for subsequent moves, we can speed up the task by using
54  *  the move set of the previous structure. The previous move set has to be filtered, such that all moves that would
55  *  cross the next selected move are non-crossing. Next, the selected move has to be removed. Then one has to only
56  *  to generate all moves that were not possible before.
57  *  One move is the inverted selected move (if it was an insertion, simply make the indices negative).
58  *  The generation of all other new moves is different and depends on the selected move. It is easy for an insertion move,
59  *  because we have only to include all non-crossing shift moves, that are possible with the new base pair. For that we can
60  *  either iterate over the sequence or we can select all crossing shift moves in the filter procedure and convert them into
61  *  shifts.
62  *
63  *  The generation of new moves given a deletion is a little bit more complex, because we can create more moves. At first
64  *  we can insert the deleted pair as insertion move. Then we generate all insertions that would have crossed the deleted
65  *  base pair. Finally we construct all crossing shift moves.
66  *
67  *  If the given move is a shift, we can save much time by specifying the intervals for the generation of new moves.
68  *  The interval which was enclosed by the positive position of the shift move and the previous paired position is the
69  *  freed interval after applying the move. This freed interval includes all positions and base pairs that we need to
70  *  construct new insertions and shifts. All these new moves have one position in the freed interval and the other position
71  *  in the environment of the freed interval. The environment are all position which are outside the freed interval, but within
72  *  the same enclosing loop of the shift move. The environment for valid base pairs can be divided into one or more intervals,
73  *  depending on the shift move. The following examples describe a few scenarios to specify the intervals of the environment.
74  *
75  * \internal
76  * Example 1:
77  * increase the freed interval with a shift move
78  * AAAAGACAAGAAACAAAAGAGAAACAACAAACAAGAAACAAACAAAA
79  * ....(....(...)....(.(...)..)......(...)...).... // structure before the shift
80  * ....(....(...)....(.(...)......)..(...)...).... // structure after the shift
81  * ............................[__]............... // freed interval
82  * ..................[________]................... // interval that can pair with the freed interval
83  *
84  * Example 2:
85  * switch the freed interval with a shift move
86  * AAAAGACAAGAAACAAAAGAGAAACAACAAACAAGAAACAAACAAAA
87  * ....(....(...)....(.(...)..)......(...)...).... // structure before the shift
88  * ....(.(..(...)....).(...).........(...)...).... // structure after the shift
89  * ...................[_______]................... // freed interval
90  * ....[_].....................[_____________].... // intervals that can pair with the freed interval
91  *
92  * Example 3:
93  * decrease the freed interval with a shift move
94  * AAAAGACAAGAAACAAAAGAGAAACAACAAACAAGAAACAAACAAAA
95  * ....(....(...)....(.(...)......)..(...)...).... // structure before the shift
96  * ....(....(...)....(.(...)..)......(...)...).... // structure after the shift
97  * ............................[__]............... // freed interval
98  * ....[____________].............[__________].... // intervals that can pair with the freed interval
99  * \endinternal
100  *
101  *  @image html shift_move_intervals.svg
102  *  @image latex shift_move_intervals.eps
103  *
104  *  Given the intervals of the environment and the freed interval, the new shift moves can be constructed quickly.
105  *  One has to take all positions of pairs from the environment in order to create valid pairs with positions in the freed interval.
106  *  The same procedure can be applied for the other direction. This is taking all paired positions within the freed interval
107  *  in order to look for pairs with valid positions in the intervals of the environment.
108  *
109  */
110 
111 #include <ViennaRNA/fold_compound.h>
112 #include <ViennaRNA/landscape/move.h>
113 
114 /**
115  *  @brief  Prototype of the neighborhood update callback
116  *
117  *  @see  vrna_move_neighbor_diff_cb(), #VRNA_NEIGHBOR_CHANGE, #VRNA_NEIGHBOR_INVALID, #VRNA_NEIGHBOR_NEW
118  *
119  *  @param  fc        The fold compound the calling function is working on
120  *  @param  neighbor  The move that generates the (changed or new) neighbor
121  *  @param  state     The state of the neighbor (move) as supplied by argument @p neighbor
122  *  @param  data      Some arbitrary data pointer as passed to vrna_move_neighbor_diff_cb()
123  */
124 typedef void (vrna_callback_move_update)(vrna_fold_compound_t *fc,
125                                          vrna_move_t          neighbor,
126                                          unsigned int         state,
127                                          void                 *data);
128 
129 
130 /**
131  *  @brief  State indicator for a neighbor that has been changed
132  *
133  *  @see vrna_move_neighbor_diff_cb()
134  */
135 #define VRNA_NEIGHBOR_CHANGE   1
136 
137 
138 /**
139  *  @brief  State indicator for a neighbor that has been invalidated
140  *
141  *  @see vrna_move_neighbor_diff_cb()
142  */
143 #define VRNA_NEIGHBOR_INVALID   2
144 
145 
146 /**
147  *  @brief  State indicator for a neighbor that has become newly available
148  *
149  *  @see vrna_move_neighbor_diff_cb()
150  */
151 #define VRNA_NEIGHBOR_NEW       3
152 
153 
154 /**
155  * @brief Alters the loopIndices array that was constructed with vrna_loopidx_from_ptable().
156  *
157  * The loopIndex of the current move will be inserted.
158  * The correctness of the input will not be checked because the speed should be optimized.
159  *
160  * @param[in,out] loopidx   The loop index data structure that needs an update
161  * @param[in]     pt        A pair table on which the move will be executed
162  * @param         length    The length of the structure
163  * @param[in]     m         The move that is applied to the current structure
164  */
165 void
166 vrna_loopidx_update(int               *loopidx,
167                     const short       *pt,
168                     int               length,
169                     const vrna_move_t *m);
170 
171 
172 /**
173  * @brief Generate neighbors of a secondary structure
174  *
175  * This function allows one to generate all structural neighbors (according to a particular move set)
176  * of an RNA secondary structure. The neighborhood is then returned as a list of transitions / moves
177  * required to transform the current structure into the actual neighbor.
178  *
179  * @see vrna_neighbors_successive(), vrna_move_apply(),
180  * #VRNA_MOVESET_INSERTION, #VRNA_MOVESET_DELETION, #VRNA_MOVESET_SHIFT, #VRNA_MOVESET_DEFAULT
181  *
182  * @param[in] vc        A vrna_fold_compound_t containing the energy parameters and model details
183  * @param[in] pt        The pair table representation of the structure
184  * @param     options   Options to modify the behavior of this function, e.g. available move set
185  * @return              Neighbors as a list of moves / transitions (the last element in the list has both of its fields set to 0)
186  */
187 vrna_move_t *
188 vrna_neighbors(vrna_fold_compound_t *vc,
189                const short          *pt,
190                unsigned int         options);
191 
192 
193 /**
194  * @brief Generate neighbors of a secondary structure (the fast way)
195  *
196  * This function implements a fast way to generate all neighbors of a secondary structure
197  * that results from successive applications of individual moves. The speed-up results from
198  * updating an already known list of valid neighbors before the individual move towards the
199  * current structure took place. In essence, this function removes neighbors that are not
200  * accessible anymore and inserts neighbors emerging after a move took place.
201  *
202  * @see vrna_neighbors(), vrna_move_apply(),
203  * #VRNA_MOVESET_INSERTION, #VRNA_MOVESET_DELETION, #VRNA_MOVESET_SHIFT, #VRNA_MOVESET_DEFAULT
204  *
205  * @param[in]   vc                  A vrna_fold_compound_t containing the energy parameters and model details
206  * @param[in]   curr_move           The move that was/will be applied to @p prev_pt
207  * @param[in]   prev_pt             A pair table representation of the structure before @p curr_move is/was applied
208  * @param[in]   prev_neighbors      The list of neighbors of @p prev_pt
209  * @param       size_prev_neighbors The size of @p prev_neighbors, i.e. the lists length
210  * @param[out]  size_neighbors      A pointer to store the size / length of the new neighbor list
211  * @param       options             Options to modify the behavior of this function, e.g. available move set
212  * @return                          Neighbors as a list of moves / transitions (the last element in the list has both of its fields set to 0)
213  */
214 vrna_move_t *
215 vrna_neighbors_successive(const vrna_fold_compound_t  *vc,
216                           const vrna_move_t           *curr_move,
217                           const short                 *prev_pt,
218                           const vrna_move_t           *prev_neighbors,
219                           int                         size_prev_neighbors,
220                           int                         *size_neighbors,
221                           unsigned int                options);
222 
223 
224 /**
225  *  @brief  Apply a move to a secondary structure and indicate which neighbors have changed consequentially
226  *
227  *  This function applies a move to a secondary structure and explores the local neighborhood of the
228  *  affected loop. Any changes to previously compatible neighbors that have been affected by this loop
229  *  will be reported through a callback function. In particular, any of the three cases might appear:
230  *  - A previously available neighbor move has changed, usually the free energy change of the move (#VRNA_NEIGHBOR_CHANGE)
231  *  - A previously available neighbor move became invalid (#VRNA_NEIGHBOR_INVALID)
232  *  - A new neighbor move becomes available (#VRNA_NEIGHBOR_NEW)
233  *
234  *  @see  vrna_move_neighbor_diff(), #VRNA_NEIGHBOR_CHANGE, #VRNA_NEIGHBOR_INVALID, #VRNA_NEIGHBOR_NEW,
235  *        #vrna_callback_move_update
236  *
237  *  @param  fc        A fold compound for the RNA sequence(s) that this function operates on
238  *  @param  ptable    The current structure as pair table
239  *  @param  move      The move to apply
240  *  @param  cb        The address of the callback function that is passed the neighborhood changes
241  *  @param  data      An arbitrary data pointer that will be passed through to the callback function @p cb
242  *  @param  options   Options to modify the behavior of this function, .e.g available move set
243  *  @return           Non-zero on success, 0 otherwise
244  */
245 int
246 vrna_move_neighbor_diff_cb(vrna_fold_compound_t       *fc,
247                            short                      *ptable,
248                            vrna_move_t                move,
249                            vrna_callback_move_update  *cb,
250                            void                       *data,
251                            unsigned int               options);
252 
253 
254 /**
255  *  @brief  Apply a move to a secondary structure and indicate which neighbors have changed consequentially
256  *
257  *  Similar to vrna_move_neighbor_diff_cb(), this function applies a move to a secondary structure and
258  *  reports back the neighbors of the current structure become affected by this move. Instead of executing
259  *  a callback for each of the affected neighbors, this function compiles two lists of neighbor moves, one
260  *  that is returned and consists of all moves that are novel or may have changed in energy, and a second,
261  *  @p invalid_moves, that consists of all the neighbor moves that become invalid, respectively.
262  *
263  *  @param  fc        A fold compound for the RNA sequence(s) that this function operates on
264  *  @param  ptable    The current structure as pair table
265  *  @param  move      The move to apply
266  *  @param  invalid_moves The address of a move list where the function stores those moves that become invalid
267  *  @param  options   Options to modify the behavior of this function, .e.g available move set
268  *  @return           A list of moves that might have changed in energy or are novel compared to the structure before application of the move
269  */
270 vrna_move_t *
271 vrna_move_neighbor_diff(vrna_fold_compound_t  *fc,
272                         short                 *ptable,
273                         vrna_move_t           move,
274                         vrna_move_t           **invalid_moves,
275                         unsigned int          options);
276 
277 
278 /**
279  *  @}
280  */
281 #endif /* VIENNA_RNA_PACKAGE_NEIGHBOR_H */
282