1 /* 2 Last changed Time-stamp: <2006-02-14 01:01:41 xtof> 3 $Id: MorganHiggs.h,v 1.7 2007/10/01 08:29:47 Kinwalker Exp $ 4 */ 5 #ifndef _MORGANHIGGS_H_ 6 #define _MORGANHIGGS_H_ 7 #include <deque> 8 #include <vector> 9 #include <string> 10 #include <utility> 11 #include <map> 12 #include <algorithm> 13 #include <iterator> 14 15 #include <cmath> 16 #include <limits> 17 #include <iostream> 18 #include <fstream> 19 20 #include "Energy.h" 21 #include "Util.h" 22 #include "options.h" 23 24 //std::pair<double,std::string> 25 std::vector<std::pair<double,std::string> > 26 MorganHiggsStudlaEnergy(std::string sequence, 27 std::string src, 28 std::string tgt, 29 double saddlE,int look_ahead,std::string grouping); 30 31 std::vector<std::pair<double,std::string> > 32 MorganHiggsEnergy(std::string sequence, 33 std::string src, 34 std::string tgt, 35 double saddlE,int look_ahead,std::string grouping); 36 37 //std::map<int,std::vector<int> > GetConflictGroup(std::vector<std::pair<int,int> > only_in_base_pairs,std::vector<std::pair<int,int> > only_in_base_pairs2); 38 void GetConflictGroup(std::map<int,std::vector<int> > & conflict_group, const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2); 39 40 //std::vector<std::pair<double,std::string> > PartialPath(std::vector<int> combination, std::string sequence,std::vector<std::pair<int,int> > backtrack_base, 41 // std::map<int,std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs,std::vector<std::pair<int,int> > only_in_base_pairs2); 42 43 void DoPartialPath(std::vector<std::pair<double,std::string> > & path, const std::vector<int> & combination, std::string sequence, const std::vector<std::pair<int,int> > & 44 backtrack_base, const std::map<int,std::vector<int> > & conflict_group, const std::vector<std::pair<int,int> > & only_in_base_pairs, 45 const std::vector<std::pair<int,int> > & only_in_base_pairs2); 46 47 //void PartialPath(std::vector<std::pair<double,std::string> > & path,const std::vector<int> & combination, std::string sequence,const std::vector<std::pair<int,int> > & 48 //backtrack_base, const std::map<int,std::vector<int> > & conflict_group,const std::vector<std::pair<int,int> > & only_in_base_pairs, 49 // const std::vector<std::pair<int,int> > & only_in_base_pairs2); 50 51 52 //std::string PrintConflictGroup(std::map<int,std::vector<int> > conflict_group); 53 std::string PrintConflictGroup(std::map<int,std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs, 54 std::vector<std::pair<int,int> > only_in_base_pairs2); 55 56 std::string PrintConflictGroup(std::vector<std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs, 57 std::vector<std::pair<int,int> > only_in_base_pairs2); 58 59 std::string PrintCombination(std::vector<int> combination); 60 void MakePairTableFromBasePairs(const std::vector<std::pair<int,int> >& base_pairs,int length); 61 62 void UpdateBacktrackBase(std::vector<std::pair<int,int> >& base_pairs,const std::map<int,std::vector<int> > & conflict_group,const std::vector<int> & remove, 63 const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2); 64 65 void FindBestPartialPathCombination(std::vector<int> & best_combination,int lookahead,std::string sequence,const std::vector<std::pair<int,int> > & backtrack_base, 66 const std::map<int,std::vector<int> > & conflict_group,const std::vector<std::pair<int,int> > & only_in_base_pairs, 67 const std::vector<std::pair<int,int> > & only_in_base_pairs2); 68 69 bool IsGCPair(std::pair<int,int> bp,const std::string & sequence); 70 71 std::vector<std::pair<int,int> > GetLonelyBasePairs(const std::vector<std::pair<int,int> > & current,const std::vector<std::pair<int,int> > & target,const std::string & sequence ); 72 73 74 75 76 77 78 79 80 81 /** 82 class for a base pair stack, i.e. sets of consecutive basepairs. Bulges do not 83 count. 84 */ 85 class Stack{ 86 public: 87 88 89 //are all bases in the stack GC 90 bool GC;//=true; 91 92 //maximal size for which a GC stack is minuscule 93 const static int GCMaxMinusculeStackSize=1; 94 95 96 //maximal size for which a non-GC stack is minuscule 97 const static int NonGCMaxMinusculeStackSize=2; 98 99 100 //yhr base pairs that make up the stack. the innermost bp is at the front, the outermost at the back. 101 std::deque<std::pair<int,int> > bp; 102 103 104 bool CanBeAddedInside(std::pair<int,int> bp ); 105 106 bool CanBeAddedOutside(std::pair<int,int> bp ); 107 108 bool CanBeAddedInside(int first,int second); 109 110 bool CanBeAddedOutside(int first,int second); 111 112 bool IsMinuscule(); 113 114 static bool IsMinuscule(int size,bool GC); 115 116 Stack(); 117 118 119 int GetSize(); 120 121 122 123 bool Contains(std::pair<int,int> bp ); 124 125 bool Contains(int first,int second); 126 127 void Remove(std::pair<int,int> bp ); 128 129 void Remove(int bp_idx); 130 131 132 void AddOutside(std::pair<int,int> bp,char first,char second); 133 134 void AddInside(std::pair<int,int> bp,char first,char second); 135 136 void AddOutside(int first,int second,char cfirst,char csecond); 137 138 void AddInside(int first,int second,char cfirst,char csecond); 139 140 141 static bool IsGCPair(char first,char second); 142 143 void Clear(); 144 145 146 147 static inline bool IsGC(char base); 148 149 std::string Print(); 150 }; 151 152 153 154 155 156 157 158 159 /** 160 PartialPath tries all combination in a conflict group and appends the best path to the path traversed so far 161 NOTE:Indices of elements that are added are augmented by BP_ADD_CONST, so when undoing a trail, we now in which 162 data structure the index was and whether it is to be added or removed. 163 */ 164 165 class PartialPath{ 166 public: 167 168 PartialPath(std::vector<std::pair<int,int> > src,std::vector<std::pair<int,int> > target,std::string sequence,int lookahead,const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2); 169 170 171 172 /** 173 Function invoked to find partial paths and append them to sequence of structures. Creates a conflict group, then tries all combinations and selects a 174 path covering lookahead members of the conflict group until it is empty. 175 */ 176 177 void FindOptimalPath(std::vector<std::pair<double,std::string> >& structures); 178 179 /** 180 Add/removes all elements in add/remove_catalog given in combination. 181 */ 182 183 double WalkPartialPath( std::vector<int> combination ); 184 185 /** 186 adds/removes all elements in the conflit group at the given index thus constructing 187 a partial path 188 */ 189 void TreatConflictGroupIndex(int idx); 190 191 192 /* 193 returns true if the pair (first,second) is in pair_table, false otherwise. 194 */ 195 196 bool PairIsInPairtable(int first,int second); 197 198 199 /** 200 Get the first element of the base pair at index idx in the add_catalog, 201 i.e. those elts in the conflict group that need ot be added to the src structure 202 */ 203 // int GetFirstFromAddCatalog(int idx); 204 /** 205 Get the second element of the base pair at index idx in the add_catalog, 206 */ 207 // int GetSecondFromAddCatalog(int idx); 208 209 /* 210 Retrieves index of the first element in the idx-th component of conflict_group, 211 which is an index in the add_catalog 212 */ 213 int GetAddCatalogIndex(int idx); 214 215 216 /* 217 Retrieves index of the remove_idx element in the group_idx-th component of conflict_group, 218 which is an index in the remove_catalog 219 */ 220 int GetRemoveCatalogIndex(int group_idx,int remove_idx); 221 222 223 /** 224 Get the first element at index of the remove_idx element in the group_idx-th component of conflict_group 225 i.e. remove_catalog[conflict_group[group_idx][remove_idx]].first 226 */ 227 228 int GetFirstFromConflictGroupRemove(int group_idx,int remove_idx); 229 230 231 /** 232 Get the second element at index of the remove_idx element in the group_idx-th component of conflict_group 233 i.e. remove_catalog[conflict_group[group_idx][remove_idx]].second 234 */ 235 236 int GetSecondFromConflictGroupRemove(int group_idx,int remove_idx); 237 238 /* 239 Retrieves first element of base pair in add_catalog at index of the first element in the idx-th component of conflict_group, 240 i.e. add_catalog[conflict_group[group_idx][0]].first 241 */ 242 243 int GetFirstFromConflictGroupAdd(int group_idx); 244 245 246 /* 247 Retrieves second element of base pair in add_catalog at index of the first element in the idx-th component of conflict_group, 248 i.e. add_catalog[conflict_group[group_idx][0]].first 249 */ 250 251 int GetSecondFromConflictGroupAdd(int group_idx); 252 253 254 /** 255 add the base pair bp to pair_table 256 */ 257 // void AddBasePair(const std::pair<int,int> bp); 258 259 /** 260 Add a pair to pair_table 261 */ 262 void AddToPairTable(std::pair<int,int> bp); 263 264 /** 265 Add the pair (first,second) to pair_table. 266 */ 267 void AddToPairTable(int first,int second); 268 269 /** 270 Add the pair (first,second) to pair_table and record to this->currentPartialPathTrail so it can be undone by UndoPartialPathTrail(). 271 */ 272 void AddToPairTableWithLog(int first,int second,int addCatalogIdx); 273 274 275 /** 276 Add pair from pair_table 277 */ 278 279 void RemoveFromPairTable(std::pair<int,int> bp); 280 /** 281 Remove the pair (first,second) from pair_table. 282 */ 283 void RemoveFromPairTable(int first,int second); 284 285 /** 286 Remove the pair (first,second) to pair_table and record to this->currentPartialPathTrail so it can be undone by UndoPartialPathTrail(). 287 */ 288 void RemoveFromPairTableWithLog(int first,int second,int removeCatalogIdx); 289 290 /** 291 Lists all minuscule Stacks at the beginning of treating a conflict group. This ensures that after the removal 292 of initial minuscule stacks there is at most 1 minuscule stack at a time. 293 */ 294 std::vector<Stack> GetMinusculeStacks(); 295 296 //=========================== 297 // list of base_pairs that need to be added to move from src to target. They make up the first index in each component of a conflict_group 298 std::vector<std::pair<int,int> > add_catalog; 299 // list of base_pairs that need to be removed to move from src to target. They make up all indices after the first in each component of a conflict_group 300 std::vector<std::pair<int,int> > remove_catalog; 301 302 303 /* 304 List of vectors. The first element in each vector gives index of element in add_catalog, all other indices point to elts in remove_catalog that are 305 in conflict with it. 306 */ 307 std::vector<std::vector<int> > conflict_group; 308 309 310 /** 311 A sequence of this->lookahead indices to elements in conflict_group that denote a partial path. 312 */ 313 std::vector<int> combination; 314 315 316 /** 317 The initial structure for the barrier heuristic 318 */ 319 std::vector<std::pair<int,int> > src; 320 321 /* 322 final structure of barrier heuristic 323 */ 324 std::vector<std::pair<int,int> > target; 325 326 /* 327 The RNA that is being folded 328 */ 329 330 std::string sequence; 331 332 /** 333 Number of elements treated at once in a combination. 334 */ 335 int lookahead; 336 337 338 /** 339 the current minuscule stack (at most one is allowed). If there 340 is none, it is empty, i.e. of size 0. 341 */ 342 343 Stack minusculeStack; 344 345 346 /** 347 list of indices add/removed during latest call to TreatConflictGroupIndex() 348 */ 349 std::vector<int> currentPartialPathTrail; 350 351 /** 352 list of indices add/removed during the call to TreatConflictGroupIndex() that yielded the 353 lowest saddle of all combinations tried so far. 354 */ 355 356 std::vector<int> bestPartialPathTrail; 357 358 /** 359 list of concatenated partial paths since the conflict_group has been built. (i.e. FindOptimalPath was called the last time). 360 */ 361 std::vector<int> pathTrail; 362 363 364 365 366 /** 367 Add the structures traversed by integrating the elements from conflict_group 368 to the list of such structures. called at the end of FindOptimalPath. 369 */ 370 371 void AddPathTrailToStructureList(std::vector<std::pair<double,std::string> >& structures); 372 373 /** 374 Add the indices of the elts in add/remove_catalog that were added to the path when dealing 375 with the best combination of lookahead elements in conflict_group to the path trail. 376 Updates pair_table to the structure obtained when best partial path is taken. 377 */ 378 void AddBestPartialPathTrailToPathTrail(); 379 380 /** 381 Empty the vector that holds the last combination of treated indices for a combination 382 of lookahead elelemtns from the conflict_group. 383 */ 384 void UndoPartialPathTrail(); 385 386 387 /* 388 Find the elements in add_catalog that have the lowest number of conflicts with the current 389 structure and build the conflict_group form those and their conflicts. 390 391 */ 392 void GetConflictGroup(); 393 394 /* 395 Remove the base pairs in the minuscule Stack mStack from pair_table unless it is part of the target conformation. 396 In that case base_pairs are added (inside first) until it is not minuscule any more. 397 398 */ 399 400 void RemoveMinusculeStack(Stack mStack); 401 /* 402 Remove the base pairs in the instnace variable mStack from pair_table unless it is part of the target conformation. 403 In that case base_pairs are added (inside first) until it is not minuscule any more. 404 405 */ 406 407 void RemoveMinusculeStack(); 408 409 410 /** 411 Returns true if a adding the base pair (first,second) creates a second minuscule stack, false otherwise. 412 Note that adding a base pair can never create two minuscle stacks. 413 */ 414 bool BPAdditionCreatesSecondMinusculeStack(int first,int second); 415 416 /** 417 Update add_catalog, remove_catalog and src after the best path for the elements in conflict_group has been 418 found and accepted as a path. 419 */ 420 void UpdateBacktrackBase(); 421 422 /** 423 Counts the number of stacks that are created by removing the base pair (first,second) from pair_table. 424 If a new minuscule stakc is created it is placed in createdMinusculeStack1, a second one in createdMinusculeStack2 425 (both global variables). If mStack disappears by removing (first,second) it is cleared. 426 427 */ 428 429 int StacksCreatedByBPRemoval(int first,int second); 430 431 /** 432 Returns true if the pair (first,second) is lonely in pair_table, false otherwise. 433 */ 434 bool IsLonelyPair(int first,int second); 435 436 /** 437 Returns true if the minuscul stack is contained in target. Actually, if both the 438 first and last bp are in target, the function returns true as currently no stacks 439 greater than size 2 are minuscule. Returns false if the stack is not contained in the 440 target structure. 441 */ 442 bool MinusculeStackIsInTarget(Stack mStack); 443 }; 444 445 446 447 448 449 450 451 #endif 452 453 /* End of file */ 454