1 /*
2   Last changed Time-stamp: <2006-02-14 01:01:41 xtof>
3   $Id: MorganHiggs.h,v 1.7 2007/10/01 08:29:47 Kinwalker Exp $
4 */
5 #ifndef _MORGANHIGGS_H_
6 #define _MORGANHIGGS_H_
7 #include <deque>
8 #include <vector>
9 #include <string>
10 #include <utility>
11 #include <map>
12 #include <algorithm>
13 #include <iterator>
14 
15 #include <cmath>
16 #include <limits>
17 #include <iostream>
18 #include <fstream>
19 
20 #include "Energy.h"
21 #include "Util.h"
22 #include "options.h"
23 
24 //std::pair<double,std::string>
25 std::vector<std::pair<double,std::string> >
26 MorganHiggsStudlaEnergy(std::string sequence,
27 		  std::string src,
28 		  std::string tgt,
29 		  double saddlE,int look_ahead,std::string grouping);
30 
31 std::vector<std::pair<double,std::string> >
32 MorganHiggsEnergy(std::string sequence,
33 		  std::string src,
34 		  std::string tgt,
35 		  double saddlE,int look_ahead,std::string grouping);
36 
37 //std::map<int,std::vector<int> > GetConflictGroup(std::vector<std::pair<int,int> > only_in_base_pairs,std::vector<std::pair<int,int> > only_in_base_pairs2);
38 void GetConflictGroup(std::map<int,std::vector<int> > & conflict_group, const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2);
39 
40 //std::vector<std::pair<double,std::string> > PartialPath(std::vector<int> combination, std::string sequence,std::vector<std::pair<int,int> > backtrack_base,
41 //						std::map<int,std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs,std::vector<std::pair<int,int> > only_in_base_pairs2);
42 
43 void DoPartialPath(std::vector<std::pair<double,std::string> > & path, const std::vector<int> & combination, std::string sequence, const std::vector<std::pair<int,int> > &
44 backtrack_base,  const std::map<int,std::vector<int> > & conflict_group, const std::vector<std::pair<int,int> > & only_in_base_pairs,
45 		  const std::vector<std::pair<int,int> > & only_in_base_pairs2);
46 
47 //void PartialPath(std::vector<std::pair<double,std::string> > & path,const std::vector<int> & combination, std::string sequence,const std::vector<std::pair<int,int> > &
48 //backtrack_base, const std::map<int,std::vector<int> > & conflict_group,const std::vector<std::pair<int,int> > & only_in_base_pairs,
49 //		 const std::vector<std::pair<int,int> > & only_in_base_pairs2);
50 
51 
52 //std::string PrintConflictGroup(std::map<int,std::vector<int> > conflict_group);
53 std::string PrintConflictGroup(std::map<int,std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs,
54 			       std::vector<std::pair<int,int> > only_in_base_pairs2);
55 
56 std::string PrintConflictGroup(std::vector<std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs,
57 			       std::vector<std::pair<int,int> > only_in_base_pairs2);
58 
59 std::string PrintCombination(std::vector<int> combination);
60 void MakePairTableFromBasePairs(const std::vector<std::pair<int,int> >& base_pairs,int length);
61 
62  void UpdateBacktrackBase(std::vector<std::pair<int,int> >& base_pairs,const std::map<int,std::vector<int> > & conflict_group,const std::vector<int> & remove,
63 			  const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2);
64 
65 void FindBestPartialPathCombination(std::vector<int> & best_combination,int lookahead,std::string sequence,const std::vector<std::pair<int,int> > & backtrack_base,
66 const std::map<int,std::vector<int> > & conflict_group,const std::vector<std::pair<int,int> > & only_in_base_pairs,
67 				    const std::vector<std::pair<int,int> > & only_in_base_pairs2);
68 
69 bool IsGCPair(std::pair<int,int> bp,const std::string & sequence);
70 
71 std::vector<std::pair<int,int> > GetLonelyBasePairs(const std::vector<std::pair<int,int> > & current,const std::vector<std::pair<int,int> > & target,const std::string & sequence );
72 
73 
74 
75 
76 
77 
78 
79 
80 
81 /**
82 class for a base pair stack, i.e. sets of consecutive basepairs. Bulges do not
83 count.
84 */
85 class Stack{
86 public:
87 
88 
89   //are all bases in the stack GC
90   bool GC;//=true;
91 
92   //maximal size for which a GC stack is minuscule
93   const static int GCMaxMinusculeStackSize=1;
94 
95 
96   //maximal size for which a non-GC stack is minuscule
97   const static int NonGCMaxMinusculeStackSize=2;
98 
99 
100   //yhr base pairs that make up the stack. the innermost bp is at the front, the outermost at the back.
101   std::deque<std::pair<int,int> > bp;
102 
103 
104   bool CanBeAddedInside(std::pair<int,int> bp );
105 
106   bool CanBeAddedOutside(std::pair<int,int> bp );
107 
108   bool CanBeAddedInside(int first,int second);
109 
110   bool CanBeAddedOutside(int first,int second);
111 
112   bool IsMinuscule();
113 
114   static bool IsMinuscule(int size,bool GC);
115 
116   Stack();
117 
118 
119   int GetSize();
120 
121 
122 
123   bool Contains(std::pair<int,int> bp );
124 
125   bool Contains(int first,int second);
126 
127   void Remove(std::pair<int,int> bp );
128 
129   void Remove(int bp_idx);
130 
131 
132   void AddOutside(std::pair<int,int> bp,char first,char second);
133 
134   void AddInside(std::pair<int,int> bp,char first,char second);
135 
136   void AddOutside(int first,int second,char cfirst,char csecond);
137 
138   void AddInside(int first,int second,char cfirst,char csecond);
139 
140 
141  static  bool IsGCPair(char first,char second);
142 
143   void Clear();
144 
145 
146 
147   static inline bool IsGC(char base);
148 
149   std::string  Print();
150 };
151 
152 
153 
154 
155 
156 
157 
158 
159 /**
160 PartialPath tries all combination in a conflict group and appends the best path to the path traversed so far
161 NOTE:Indices of elements that are added are augmented by BP_ADD_CONST, so when undoing a trail, we now in which
162 data structure the index was and whether it is to be added or removed.
163 */
164 
165 class PartialPath{
166 public:
167 
168  PartialPath(std::vector<std::pair<int,int> > src,std::vector<std::pair<int,int> > target,std::string sequence,int lookahead,const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2);
169 
170 
171 
172   /**
173     Function invoked to find partial paths and append them to sequence of structures. Creates a conflict group, then tries all combinations and selects a
174      path covering lookahead members of the conflict group until it is empty.
175   */
176 
177   void FindOptimalPath(std::vector<std::pair<double,std::string> >& structures);
178 
179   /**
180       Add/removes all elements in add/remove_catalog given in combination.
181   */
182 
183   double WalkPartialPath( std::vector<int> combination );
184 
185   /**
186      adds/removes all elements in the conflit group at the given index thus constructing
187 a partial path
188   */
189   void TreatConflictGroupIndex(int idx);
190 
191 
192   /*
193    returns true if the pair (first,second) is in pair_table, false otherwise.
194   */
195 
196   bool PairIsInPairtable(int first,int second);
197 
198 
199   /**
200      Get the first element of the base pair at index idx in the add_catalog,
201  i.e. those elts in the conflict group that need ot be added to the src structure
202   */
203   // int GetFirstFromAddCatalog(int idx);
204   /**
205      Get the second element of the base pair at index idx in the add_catalog,
206   */
207   //  int GetSecondFromAddCatalog(int idx);
208 
209   /*
210      Retrieves index of the first element in the idx-th component of conflict_group,
211      which is an index in the add_catalog
212   */
213  int GetAddCatalogIndex(int idx);
214 
215 
216  /*
217      Retrieves index of the remove_idx element in the group_idx-th component of conflict_group,
218      which is an index in the remove_catalog
219   */
220   int GetRemoveCatalogIndex(int group_idx,int remove_idx);
221 
222 
223  /**
224      Get the first element at index of the remove_idx element in the group_idx-th component of conflict_group
225      i.e. remove_catalog[conflict_group[group_idx][remove_idx]].first
226   */
227 
228   int GetFirstFromConflictGroupRemove(int group_idx,int remove_idx);
229 
230 
231  /**
232      Get the second element at index of the remove_idx element in the group_idx-th component of conflict_group
233      i.e. remove_catalog[conflict_group[group_idx][remove_idx]].second
234   */
235 
236   int GetSecondFromConflictGroupRemove(int group_idx,int remove_idx);
237 
238   /*
239      Retrieves first element of base pair in add_catalog at index of the first element in the idx-th component of conflict_group,
240      i.e. add_catalog[conflict_group[group_idx][0]].first
241   */
242 
243  int GetFirstFromConflictGroupAdd(int group_idx);
244 
245 
246   /*
247      Retrieves second element of base pair in add_catalog at index of the first element in the idx-th component of conflict_group,
248      i.e. add_catalog[conflict_group[group_idx][0]].first
249   */
250 
251   int GetSecondFromConflictGroupAdd(int group_idx);
252 
253 
254  /**
255      add the base pair bp to pair_table
256   */
257   //  void AddBasePair(const std::pair<int,int> bp);
258 
259   /**
260     Add a pair to pair_table
261   */
262   void AddToPairTable(std::pair<int,int> bp);
263 
264   /**
265     Add the pair (first,second) to pair_table.
266   */
267   void AddToPairTable(int first,int second);
268 
269  /**
270     Add the pair (first,second) to pair_table and record to this->currentPartialPathTrail so it can be undone by UndoPartialPathTrail().
271   */
272   void AddToPairTableWithLog(int first,int second,int addCatalogIdx);
273 
274 
275   /**
276     Add pair from pair_table
277   */
278 
279   void RemoveFromPairTable(std::pair<int,int> bp);
280   /**
281     Remove the pair (first,second) from pair_table.
282   */
283   void RemoveFromPairTable(int first,int second);
284 
285 /**
286     Remove the pair (first,second) to pair_table and record to this->currentPartialPathTrail so it can be undone by UndoPartialPathTrail().
287   */
288   void RemoveFromPairTableWithLog(int first,int second,int removeCatalogIdx);
289 
290   /**
291     Lists all minuscule Stacks at the beginning of treating a conflict group. This ensures that after the removal
292     of initial minuscule stacks there is at most 1 minuscule stack at a time.
293   */
294   std::vector<Stack> GetMinusculeStacks();
295 
296   //===========================
297   // list of base_pairs that need to be added to move from src to target. They make up the first index in each component of a conflict_group
298   std::vector<std::pair<int,int> > add_catalog;
299  // list of base_pairs that need to be removed to move from src to target. They make up all indices after the first in each component of a conflict_group
300   std::vector<std::pair<int,int> > remove_catalog;
301 
302 
303   /*
304    List of vectors. The first element in each vector gives index of element in add_catalog, all other indices point to elts in remove_catalog that are
305    in conflict with it.
306   */
307   std::vector<std::vector<int> > conflict_group;
308 
309 
310   /**
311     A sequence of this->lookahead indices to elements in conflict_group that denote a partial path.
312   */
313   std::vector<int> combination;
314 
315 
316   /**
317     The initial structure for the barrier heuristic
318   */
319   std::vector<std::pair<int,int> > src;
320 
321   /*
322   final structure of barrier heuristic
323   */
324  std::vector<std::pair<int,int> > target;
325 
326   /*
327      The RNA that is being folded
328   */
329 
330      std::string sequence;
331 
332   /**
333     Number of elements treated at once in a combination.
334   */
335   int lookahead;
336 
337 
338   /**
339     the current minuscule stack (at most one is allowed). If there
340 is none, it is empty, i.e. of size 0.
341   */
342 
343   Stack minusculeStack;
344 
345 
346   /**
347      list of indices add/removed during latest call to TreatConflictGroupIndex()
348   */
349   std::vector<int> currentPartialPathTrail;
350 
351   /**
352      list of indices add/removed during the call to TreatConflictGroupIndex() that yielded the
353      lowest saddle of all combinations tried so far.
354   */
355 
356   std::vector<int> bestPartialPathTrail;
357 
358   /**
359      list of concatenated partial paths since the conflict_group has been built. (i.e. FindOptimalPath was called the last time).
360   */
361   std::vector<int> pathTrail;
362 
363 
364 
365 
366   /**
367      Add the structures traversed by integrating the elements from conflict_group
368      to the list of such structures. called at the end of FindOptimalPath.
369   */
370 
371   void AddPathTrailToStructureList(std::vector<std::pair<double,std::string> >& structures);
372 
373   /**
374      Add the indices of the elts in add/remove_catalog that were added to the path when dealing
375      with the best combination of lookahead elements in conflict_group to the path trail.
376      Updates pair_table to the structure obtained when best partial path is taken.
377   */
378   void  AddBestPartialPathTrailToPathTrail();
379 
380   /**
381      Empty the vector that holds the last combination of treated indices for a combination
382      of lookahead elelemtns from the conflict_group.
383   */
384   void UndoPartialPathTrail();
385 
386 
387   /*
388      Find the elements in add_catalog that have the lowest number of conflicts with the current
389      structure and build the conflict_group form those and their conflicts.
390 
391   */
392   void GetConflictGroup();
393 
394   /*
395     Remove the base pairs in the minuscule Stack mStack from pair_table unless it is part of the target conformation.
396     In that case base_pairs are added (inside first) until it is not minuscule any more.
397 
398   */
399 
400   void RemoveMinusculeStack(Stack mStack);
401   /*
402     Remove the base pairs in the instnace variable mStack from pair_table unless it is part of the target conformation.
403     In that case base_pairs are added (inside first) until it is not minuscule any more.
404 
405   */
406 
407   void RemoveMinusculeStack();
408 
409 
410   /**
411      Returns true if a adding the base pair (first,second) creates a second minuscule stack, false otherwise.
412      Note that adding a base pair can never create two minuscle stacks.
413   */
414   bool BPAdditionCreatesSecondMinusculeStack(int first,int second);
415 
416   /**
417     Update add_catalog, remove_catalog and src after the best path for the elements in conflict_group has been
418     found and accepted as a path.
419    */
420   void UpdateBacktrackBase();
421 
422   /**
423     Counts the number of stacks that are created by removing the base pair (first,second) from pair_table.
424     If a new minuscule stakc is created it is placed in createdMinusculeStack1, a second one in createdMinusculeStack2
425     (both global variables). If mStack disappears by removing (first,second) it is cleared.
426 
427   */
428 
429   int StacksCreatedByBPRemoval(int first,int second);
430 
431   /**
432   Returns true if the pair (first,second) is lonely in pair_table, false otherwise.
433   */
434   bool IsLonelyPair(int first,int second);
435 
436   /**
437    Returns true if the minuscul stack is contained in target. Actually, if both the
438   first and last bp are in target, the function returns true as currently no stacks
439   greater than size 2 are minuscule. Returns false if the stack is not contained in the
440   target structure.
441   */
442   bool MinusculeStackIsInTarget(Stack mStack);
443 };
444 
445 
446 
447 
448 
449 
450 
451 #endif
452 
453 /* End of file */
454