1 /*
2 Last changed Time-stamp: <2007-04-02 14:38:35 caro>
3 $Id: MorganHiggs.cpp,v 1.13 2007/10/21 21:01:35 Kinwalker Exp $
4 */
5
6 #include <cstring>
7 #include "MorganHiggs.h"
8 #define BP_ADD_CONST 10000
9 using std::cout;
10 using std::endl;
11
12 extern short * pair_table;
13
14 extern "C" {
15 #include "energy_const.h" /* defines INF */
16 }
17
18 bool MHS_debug=false;
19 int min_stack_size=1;
20
21 /**
22 rekursive Fakultaetsformel fuer n>=0.
23 */
24 inline int
Factorial(int n)25 Factorial(int n)
26 {
27 if(n==1 || n==0)
28 return (1);
29
30 return (n*Factorial(n-1));
31 }
32
N_take_k(int n,int k)33 int N_take_k(int n,int k){
34
35 if(k>=n) return Factorial(n);
36 int ret=1;
37 for(int i=0;i<k;i++) ret*=n-i;
38 return ret;
39
40 }
41
42
43 /**
44 * Indices in the permutation group start at 0
45 */
46 std::vector<int>
GetPermutation(int elts,int idx)47 GetPermutation(int elts,int idx)
48 {
49 std::vector<int> permutation = std::vector<int>();
50
51 for (int i=0; i<elts-1; i++) {
52
53 // position i is occupied by element rank, i.e. which takes the
54 // slice of element [(elts-i)*rank,(elts-i)*(rank+1)] in the
55 // lexicographical order of the elts in the permutation group
56 int basket_size = Factorial(elts-i-1);
57 int rank = (int)std::floor(idx/basket_size);
58 permutation.push_back(rank);
59 idx = idx % basket_size;
60 }
61
62 std::vector<int> ret = std::vector<int>();
63 std::vector<int> available = std::vector<int>();
64
65 for (int i=1; i<=elts; i++) available.push_back(i);
66
67 for(int i=0;i<elts-1;i++) {
68 // have to choose the permutation[i]'s elements from those that
69 // are still available.
70 ret.push_back(available[permutation[i]]);
71 available.erase(available.begin()+permutation[i]);
72 }
73
74 // with the last element there is no choice.
75 ret.push_back(available.front());
76 return ret;
77 }
78
79
80 /**
81 Get a combination of k elements from a set of n without replacement.
82 Idx denotes which of the n choose k (=binomial coefficient) combinations is
83 returned. Indices range from 1 to n.
84 */
85
GetCombination(int n,int k,long int idx)86 std::vector<int> GetCombination(int n,int k,long int idx)
87 {
88
89 if(n<=k) return GetPermutation(n,idx);
90 #ifdef _DEBUG_MH_GETCOMBINATION_
91 std::cout<<"n "<<n<<"\n";
92 std::cout<<"k "<<k<<"\n";
93 std::cout<<"idx "<<idx<<"\n";
94 #endif
95 //Example 10 choose 4
96 //idx=x_0*7*8*9+x_1*7*8+x_2*7+x_3
97 //9*8*7
98 long int modulus=1;
99 for(int i=n-1;i>n-k;i--) modulus*=i;
100
101 #ifdef _DEBUG_MH_GETCOMBINATION_
102 std::cout<<"final modulus "<<modulus<<"\n";
103 #endif
104 // vector<int> ret(n);
105
106 //std::vector<int> elts=std::vector<int>();
107 //workaround as the the previous line did not compile
108 //std::vector<int> elts = *(new std::vector<int>());
109 std::vector<int> elts = std::vector<int>();
110 for (int i=1; i<=n; i++) elts.push_back(i);
111 //i is idx of a number in the combination, i=0 stands for the right side.
112 for(int i=0;i<k;i++){
113 #ifdef _DEBUG_MH_GETCOMBINATION_
114 std::cout<<"i "<<i<<"\n";
115 #endif
116 //ranges from 0 to n-i-1
117 #ifdef _DEBUG_MH_GETCOMBINATION_
118 std::cout<<"idx "<<idx<<"\n";
119 #endif
120 int chosen_idx=(int)floor(idx/modulus);
121 #ifdef _DEBUG_MH_GETCOMBINATION_
122 std::cout<<"chosen_idx "<<chosen_idx<<"\n";
123 #endif
124 idx-=chosen_idx*modulus;
125 #ifdef _DEBUG_MH_GETCOMBINATION_
126 std::cout<<"idx "<<idx<<"\n";
127 #endif
128 //if condition to avoid division by 0
129 if(modulus!=1) modulus=(int)(modulus/(n-i-1));
130 #ifdef _DEBUG_MH_GETCOMBINATION_
131 std::cout<<"modulus "<<modulus<<"\n";
132 #endif
133 int i1=elts[i];
134 int i2=elts[i+chosen_idx];
135 #ifdef _DEBUG_MH_GETCOMBINATION_
136 std::cout<<"i1 "<<i1<<"\n";
137 #endif
138 #ifdef _DEBUG_MH_GETCOMBINATION_
139 std::cout<<"i2 "<<i2<<"\n";
140 #endif
141 elts[i]=i2;
142 elts[i+chosen_idx]=i1;
143 }
144 elts.erase(elts.begin()+k,elts.end());
145 #ifdef _DEBUG_MH_GETCOMBINATION_
146 std::cout<<"GetCombination returns"<<std::endl;
147 #endif
148 return elts;
149 }
150 /**
151 Returns true, if p1 and p2 are incompatible, false otherwise.
152 Returns false if p1==p2.
153 */
154
BasePairConflict(std::pair<int,int> p1,std::pair<int,int> p2)155 bool BasePairConflict(std::pair<int,int> p1, std::pair<int,int> p2){
156 // same base pair in both structures
157 // (-----)
158 // (-----)
159 if (p1.first == p2.first && p1.second == p2.second)
160 return (false);
161
162 // base pairs have on position in common
163 // (-----) (-----)
164 // (-----) or (-----)
165 if (p1.first == p2.first || p1.first == p2.second
166 || p1.second == p2.first || p1.second == p2.second)
167 return (true);
168
169 // "crossing" base pairs
170 // (-------)
171 // (-----)
172 if (p1.first <= p2.first && p1.second >= p2.first
173 && p1.second <= p2.second)
174 return (true);
175
176 // "crossing" base pairs
177 // (-----) (---------)
178 // (-------) or (-------)
179 if (p1.first >= p2.first && p1.first <= p2.second
180 && p1.second >= p2.second)
181 return (true);
182 else
183 return (false);
184 }
185
186
187 /**
188 Lists the indices in a vector pairs that are in conflict with a base pair p.
189 */
190
191 std::vector<int>
FindConflicts(std::pair<int,int> p,std::vector<std::pair<int,int>> pairs)192 FindConflicts(std::pair<int,int> p, std::vector<std::pair<int,int> > pairs)
193 {
194 std::vector<int> conflicts= std::vector<int>();
195
196 for (size_t i=0; i<pairs.size(); i++) {
197
198 if ( BasePairConflict(p,pairs[i]))
199 conflicts.push_back(i);
200 }
201
202 return (conflicts);
203 }
204
205
206
207 /*
208 Given a base pair and a sequence, returns if the pair is GC or CG,
209 false otherwise. Used by the stack class to determine whether a stack
210 i minuscule, as the max size for minuscule GC stacks is Stack::GCMaxMinusculeStackSize
211 while it is Stack::NonGCMaxMinusculeStackSize for non-GC stacks.
212 */
213
IsGCPair(std::pair<int,int> bp,const std::string & sequence)214 bool IsGCPair(std::pair<int,int> bp,const std::string & sequence){
215
216 bool first=(sequence[bp.first-1]=='G' || sequence[bp.first-1]=='C');
217 bool second=(sequence[bp.second-1]=='G' || sequence[bp.second-1]=='C');
218
219 if(first && second) return true;
220 else return false;
221 }
222
223
224
225 //default constructor for a "stack" without base pairs
Stack()226 Stack::Stack(){
227 GC=true;
228 }
229
230 //removes all basepairs from the stack, so its size becomes 0. GC therefore true.
Clear()231 void Stack::Clear(){
232 bp.clear();
233 GC=true;
234 }
235
236 //returns number of base pairs in the stack
GetSize()237 int Stack::GetSize(){
238 return bp.size();
239 }
240
241
242 /*
243 returns true if the base pair (first,second) is inside of and adjacent to the current innermost pair,
244 false otherwise.
245 */
CanBeAddedInside(int first,int second)246 bool Stack::CanBeAddedInside(int first,int second){
247 if(GetSize()==0) return true;
248 //have to go back from the 5' end if bp is inside innerMost
249 if(first-1==bp[0].first && second+1==bp[0].second) return true;
250 else return false;
251 }
252
253 /*
254 returns true if the base pair (first,second) is outside of and adjacent to the current outermost pair,
255 false otherwise.
256 */
CanBeAddedOutside(int first,int second)257 bool Stack::CanBeAddedOutside(int first,int second){
258 if(GetSize()==0) return true;
259 if(first+1==bp[0].first && second-1==bp[0].second) return true;
260 else return false;
261 }
262
263 /*
264 returns true if base_pair is inside of and adjacent to the current innermost pair,
265 false otherwise.
266 */
CanBeAddedInside(std::pair<int,int> base_pair)267 bool Stack::CanBeAddedInside(std::pair<int,int> base_pair ){
268 //have to go back from the 5' end if bp is inside innerMost
269 if(std::make_pair(base_pair.first-1,base_pair.second+1)==bp[0]) return true;
270 else return false;
271 }
272
273 /*
274 returns true if base_pair is outside of and adjacent to the current outermost pair,
275 false otherwise.
276 */
CanBeAddedOutside(std::pair<int,int> base_pair)277 bool Stack::CanBeAddedOutside(std::pair<int,int> base_pair ){
278 //have to go forward from the 5' end if bp is outside outerMost
279 if(std::make_pair(base_pair.first+1,base_pair.second-1)==bp.back()) return true;
280 else return false;
281 }
282
283
284 /**
285 ToString() function that lists whether the stack is GC and its basepairs.
286 */
Print()287 std::string Stack::Print(){
288 std::string s;
289 s+="GC "+Str(GC);
290 for(size_t i=0;i<bp.size();i++) s+=" "+PrintBasePair(bp[i]);
291 return s+"\n";
292 }
293
294 /*
295 returns true, if all pairs are GC pairs, or there are no pairs.
296 False otherwise.
297 */
IsGC(char base)298 bool Stack::IsGC(char base){
299 if(base=='G' || base=='C') return true;
300 else return false;
301 }
302 /*
303 Returns true, if first and second are both either 'G' or 'C',false otherwise.
304 */
305
IsGCPair(char first,char second)306 bool Stack::IsGCPair(char first,char second){
307 if(IsGC(first) && IsGC(second)) return true;
308 else return false;
309
310 }
311
312
313 /*
314 Returns true, if the stack is minuscule, i.e. of size at most
315 GCMaxMinusculeStackSize for GC stacks and NonGCMaxMinusculeStackSize
316 for non-GC stacks.Static version.
317 */
318
IsMinuscule(int size,bool GC)319 bool Stack::IsMinuscule(int size,bool GC){
320 if(GC){
321 if(size<=GCMaxMinusculeStackSize) return true;
322 }
323 else if(size<=NonGCMaxMinusculeStackSize) return true;
324 return false;
325 }
326
327 /*
328 Returns true, if the stack is minuscule, i.e. of size at most
329 GCMaxMinusculeStackSize for GC stacks and NonGCMaxMinusculeStackSize
330 for non-GC stacks.
331 */
IsMinuscule()332 bool Stack::IsMinuscule(){
333 int size=GetSize();
334 if(GC){
335 if(size<=GCMaxMinusculeStackSize) return true;
336 }
337 else if(size<=NonGCMaxMinusculeStackSize) return true;
338 return false;
339
340 }
341
342
343 /*
344 Returns true, if the base pair (first,second) is part of the stack, false otherwise.
345 */
Contains(int first,int second)346 bool Stack::Contains(int first,int second){
347 if(find(bp.begin(),bp.end(),std::make_pair(first,second))!=bp.end()) return true;
348 else return false;
349 }
350
351 /*
352 Returns true, if base_pair is part of the stack, false otherwise.
353 */
Contains(std::pair<int,int> base_pair)354 bool Stack::Contains(std::pair<int,int> base_pair ){
355 if(find(bp.begin(),bp.end(),base_pair)!=bp.end()) return true;
356 else return false;
357 }
358
359 /**
360 removes base_pair from the stack (provided it was part of it).
361 */
Remove(std::pair<int,int> base_pair)362 void Stack::Remove(std::pair<int,int> base_pair ){
363 std::deque<std::pair<int,int> >::iterator it=find(bp.begin(),bp.end(),base_pair);
364 bp.erase(it);
365 }
366
367 /**
368 removes the base pair at index bp_index in bp from the stack. No protection for invalid
369 indices.
370 */
Remove(int bp_index)371 void Stack::Remove(int bp_index){
372 bp.erase(bp.begin()+bp_index);
373 }
374
375
376
377 /**
378 adds the pair base_pair with characters first and second to the outside of the
379 stack. You need to be sure that the pair actually can be added before calling this
380 function.
381 */
382
AddOutside(std::pair<int,int> base_pair,char first,char second)383 void Stack::AddOutside(std::pair<int,int> base_pair,char first,char second ){
384 bp.push_back(base_pair);
385 if(!IsGCPair(first,second)) GC=false;
386 }
387
388 /**
389 adds the pair (first,second) with characters cfirst and csecond to the outside of the
390 stack. You need to be sure that the pair actually can be added before calling this
391 function.
392 */
AddOutside(int first,int second,char cfirst,char csecond)393 void Stack::AddOutside(int first,int second,char cfirst,char csecond ){
394 bp.push_back(std::make_pair(first,second));
395 if(!IsGCPair(cfirst,csecond)) GC=false;
396 }
397
398 /**
399 adds the pair base_pair with characters first and second to the inside of the
400 stack. You need to be sure that the pair actually can be added before calling this
401 function.
402 */
AddInside(std::pair<int,int> base_pair,char first,char second)403 void Stack::AddInside(std::pair<int,int> base_pair,char first,char second ){
404 bp.push_front(base_pair);
405 if(!IsGCPair(first,second)) GC=false;
406 }
407
408 /**
409 adds the pair (first,second) with characters cfirst and csecond to the inside of the
410 stack. You need to be sure that the pair actually can be added before calling this
411 function.
412 */
AddInside(int first,int second,char cfirst,char csecond)413 void Stack::AddInside(int first,int second,char cfirst,char csecond ){
414 bp.push_front(std::make_pair(first,second));
415 if(!IsGCPair(cfirst,csecond)) GC=false;
416 }
417
418
419
420
421
422 /**
423 larger one of the potentially two creatd minuscule stacks form base pair removal
424 */
425
426 Stack createdMinusculeStack1;
427
428 Stack createdMinusculeStack2;
429
430
431
GetConflictGroup()432 void PartialPath::GetConflictGroup() {
433 conflict_group.clear();
434 //bool debug=false;//true;
435 std::vector<int> conflicts=std::vector<int>(add_catalog.size());
436 // std::map<int,std::vector<int> > conflict_group=std::map<int,std::vector<int> >();
437 if(add_catalog.size()>0){
438 size_t minsize=remove_catalog.size();
439 for (size_t i=0; i<add_catalog.size();i++) {
440 conflicts[i]=0;
441 for (size_t j=0; j<remove_catalog.size();j++) {
442 if(BasePairConflict(add_catalog[i],remove_catalog[j])) conflicts[i]++;
443 }
444 #ifdef _DEBUG_MH_
445 std::cout<<"add_catalog["+Str((int)i)+"]: ("+Str(add_catalog[i].first)+","+Str(add_catalog[i].second)+")\n";
446 std::cout<<"conflicts[i]:"<<conflicts[i]<<"\n";
447 #endif
448 if(conflicts[i]<(int)minsize) minsize=conflicts[i];
449 }
450 #ifdef _DEBUG_MH_
451 std::cout<<"minsize:"<<minsize<<"\n";
452 #endif
453 for (size_t i=0; i<add_catalog.size();i++) {
454 if(conflicts[i]==(int)minsize){
455 std::vector<int> new_conflicts=std::vector<int>();
456 //idx in oibp2 (add_catalog) of the base pair to be added
457 new_conflicts.push_back(i);
458 //conflict_group[i]=std::vector<int>();
459
460
461 for (size_t j=0; j<remove_catalog.size();j++) {
462 if(BasePairConflict(add_catalog[i],remove_catalog[j]))
463 //idx in oibp (remove_catalog) of the base pair to be removed
464 new_conflicts.push_back(j);
465 //conflict_group[i].push_back(j);
466 }
467 conflict_group.push_back(new_conflicts);
468 }
469 }
470 }
471 //all has been added, so only bp to be removed are left
472 else{
473 //conflict_group[-1]=std::vector<int>();
474 std::vector<int> new_conflicts=std::vector<int>();
475 //idx in oibp2 (add_catalog) of the base pair to be added
476 new_conflicts.push_back(-1);
477 for (size_t i=0; i<remove_catalog.size(); i++) new_conflicts.push_back(i);
478 //conflict_group[-1].push_back(i);
479 conflict_group.push_back(new_conflicts);
480 }
481 // return conflict_group;
482 // std::cout<<"got conflict_group"<<"\n";
483 if(MHS_debug) std::cout<<"created ConflictGroup of size "+Str((int)conflict_group.size())<<endl;;
484 if(MHS_debug) std::cout<<PrintConflictGroup(conflict_group,remove_catalog,add_catalog)+"\n";
485 }
486 /**
487
488 TODO: energy evaluation
489 reverse addition deletion of pairs in pair table (only those, which were done (i.e. don't reverse removed pairs
490 that were not treated because they already have been dealt with before)
491
492 */
493
494
495
496 /**
497 idx - idx of the conflict group that generates the trail
498 */
499
500
501
UndoPartialPathTrail()502 void PartialPath::UndoPartialPathTrail(){
503 if(MHS_debug) cout<<"UndoPartialPathTrail of size "+Str((int)currentPartialPathTrail.size())<<endl;
504 // for(size_t i=currentPartialPathTrail.size()-1;i>=0;i--){
505 for(std::vector<int>::reverse_iterator it=currentPartialPathTrail.rbegin();it!=currentPartialPathTrail.rend();it++){
506 // int idx=*(it-1);//currentPartialPathTrail[i];
507 int idx=*it;
508 if(MHS_debug) cout<<"idx "+Str(idx)<<std::endl;
509 //base pair was added
510 if(idx>=BP_ADD_CONST-1) {
511 idx-=BP_ADD_CONST;
512 //ignore -1, as it means nothing got added and the index is only for the emptying of the conflict_group
513 if(idx==-1) continue;
514 RemoveFromPairTable(add_catalog[idx]);
515 }
516 //base pair was removed
517 else{
518 AddToPairTable(remove_catalog[idx]);
519 }
520 }
521 currentPartialPathTrail.clear();
522 }
523
524
MinusculeStackIsInTarget(Stack mStack)525 bool PartialPath::MinusculeStackIsInTarget(Stack mStack){
526 if(MHS_debug) cout<<"MinusculeStackIsInTarget with stack of size "+Str((int)mStack.bp.size())<<std::endl;
527 for(size_t i=0;i<mStack.bp.size();i++){
528 if(MHS_debug) cout<<i+" "+PrintBasePair(mStack.bp[i])<<endl;
529 if(find(target.begin(),target.end(),mStack.bp[i])==target.end()) return false;
530 }
531 return true;
532
533 }
534
StacksCreatedByBPRemoval(int first,int second)535 int PartialPath::StacksCreatedByBPRemoval(int first,int second){
536
537 int count=0;
538 if(IsLonelyPair(first,second)) {
539 minusculeStack.Clear();
540 return 0;
541 }
542 //the bp is part of the minuscule Stack of size 0. Its removal decreases the size
543 //of the minuscule Stack, but doesnt create a new one.
544 else if(minusculeStack.GetSize()>0 && minusculeStack.Contains(first,second)){
545 return 0;
546 }
547 //now it can only have been part of a stack that was not minuscule but might become minuscule due
548 //to the removal
549 //check elts in pairtable around bp to obtain whether the remaining stack is minuscule
550 int inside_count=0;
551 int inside_first=first+1;
552 int inside_second=second-1;
553 bool inside_GC=true;
554 while(PairIsInPairtable(inside_first,inside_second)){
555 if(!Stack::IsGCPair(sequence[inside_first-1],sequence[inside_second-1])) inside_GC=false;
556 inside_first++;
557 inside_second--;
558 inside_count++;
559 }
560 bool inside_minuscule=Stack::IsMinuscule(inside_count,inside_GC);
561 if(inside_count>0 && inside_minuscule) count++;
562
563
564 //=================================================
565 int outside_count=0;
566 int outside_first=first-1;
567 int outside_second=second+1;
568 bool outside_GC=true;
569 while(PairIsInPairtable(outside_first,outside_second)){
570 if(!Stack::IsGCPair(sequence[outside_first-1],sequence[outside_second-1])) outside_GC=false;
571 outside_first--;
572 outside_second++;
573 outside_count++;
574 }
575 bool outside_minuscule=Stack::IsMinuscule(outside_count,outside_GC);
576 if(outside_count>0 && outside_minuscule) count++;
577 //only one minuscule stack
578 createdMinusculeStack1.Clear();
579 createdMinusculeStack2.Clear();
580 if(count==1){
581 if(inside_minuscule) {
582 inside_first--;
583 inside_second++;
584 if(MHS_debug) std::cout<<"create inside_minuscule Stack with first "+Str(inside_first)<<endl;
585 while(inside_first!=first){
586 createdMinusculeStack1.AddOutside(inside_first,inside_second,sequence[inside_first-1],sequence[inside_second-1]);
587 inside_first--;
588 inside_second++;
589 }
590 }
591 else if(outside_minuscule){
592 outside_first++;
593 outside_second--;
594 if(MHS_debug) std::cout<<"create outside_minuscule Stack with first "+Str(outside_first)<<endl;
595 while(outside_first!=first){
596 createdMinusculeStack1.AddInside(outside_first,outside_second,sequence[outside_first-1],sequence[outside_second-1]);
597 outside_first++;
598 outside_second--;
599 }
600 }
601 if(MHS_debug) std::cout<<"createdMinusculeStack1 "+createdMinusculeStack1.Print()<<endl;
602 if(MinusculeStackIsInTarget(createdMinusculeStack1)){
603 if(MHS_debug) std::cout<<"ignore bc in target "<<endl;
604 createdMinusculeStack1.Clear();
605 count--;
606 }
607 }
608 if(count==2){
609 //add inside to stack1, outside to stack2
610
611
612 inside_first--;
613 inside_second++;
614 while(inside_first!=first){
615 createdMinusculeStack1.AddOutside(inside_first,inside_second,sequence[inside_first-1],sequence[inside_second-1]);
616 inside_first--;
617 inside_second++;
618 }
619
620
621 outside_first++;
622 outside_second--;
623 while(outside_first!=first){
624 createdMinusculeStack2.AddInside(outside_first,outside_second,sequence[outside_first-1],sequence[outside_second-1]);
625 outside_first++;
626 outside_second--;
627 }
628
629 if(MHS_debug) std::cout<<"createdMinusculeStack1 "+createdMinusculeStack1.Print()<<endl;
630 if(MinusculeStackIsInTarget(createdMinusculeStack1)){
631 if(MHS_debug) std::cout<<"ignore bc in target "<<endl;
632 createdMinusculeStack1.Clear();
633 count--;
634 }
635
636 if(MHS_debug) std::cout<<"createdMinusculeStack2 "+createdMinusculeStack2.Print()<<endl;
637 if(MinusculeStackIsInTarget(createdMinusculeStack2)){
638 if(MHS_debug) std::cout<<"ignore bc in target "<<endl;
639 createdMinusculeStack2.Clear();
640 count--;
641 }
642
643 }
644
645
646 return count;
647
648 }
649
650
651
652 /*
653 bool PartialPath::BPRemovalCreatesSecondMinusculeStack(int first,int second){
654 //do not allow creating 2 minuscule stacks at once.
655 // if(minusculeStack.GetSize()==0) return false;
656 //if the pair was single, it constituted the minusculeStack, which therefore disappears
657 if(IsLonelyPair(first,second)) {
658 minusculeStack.Clear();
659 return false;
660 }
661 //now it can only have been part of a stack that was not minuscule but might become minuscule due
662 //to the removal
663 //check elts in pairtable around bp to obtain whether the remaining stack is minuscule
664 int inside_count=0;
665 int inside_first=first+1;
666 int inside_second=second-1;
667 bool inside_GC=true;
668 while(PairIsInPairtable(inside_first,inside_second)){
669 if(!Stack.IsGCPair(sequence[inside_first],sequence[inside_second])) inside_GC=false;
670 inside_first++;
671 inside_second--;
672 inside_count++;
673 }
674 if(inside_count>0 && Stack.IsMinuscule(inside_count,inside_GC))
675 int outside_count=0;
676 return true;
677 }
678 */
RemoveMinusculeStack()679 void PartialPath::RemoveMinusculeStack(){
680 RemoveMinusculeStack(minusculeStack);
681 minusculeStack.Clear();
682 }
683
RemoveMinusculeStack(Stack mStack)684 void PartialPath::RemoveMinusculeStack(Stack mStack){
685 if(MHS_debug) cout<<"RemoveMinusculeStack "+mStack.Print()<<endl;
686
687 //record movements in a trail, so they can be undone
688
689
690 //if the Stack is increasing, add further base pairs to it
691 //if tgt contains stack
692 //the stack is here to stay if both its inner and outermost bp are in the tgt, otherwise it is not!!!
693 if(find(target.begin(),target.end(),mStack.bp.back())!=target.end() &&find(target.begin(),target.end(),mStack.bp.front())!=target.end() ){
694
695 //do we add base pairs inwards or outwards?
696 //inwards
697 int inwards_first=mStack.bp[0].first+1;
698 int inwards_second=mStack.bp[0].second-1;
699 // while(mStack.CanBeAddedInside(inwards_first,inwards_second)) {
700 int addCatalogIdx;
701 while((addCatalogIdx=find(add_catalog.begin(),add_catalog.end(),std::make_pair(inwards_first,inwards_second))-add_catalog.begin())<(int)add_catalog.size()){
702 mStack.AddInside(inwards_first,inwards_second,sequence[inwards_first-1],sequence[inwards_second-1]);
703 AddToPairTableWithLog(inwards_first,inwards_second,addCatalogIdx);
704 inwards_first++;
705 inwards_second--;
706 if(!mStack.IsMinuscule()) break;
707 }
708 //outwards
709 int outwards_first=mStack.bp.back().first-1;
710 int outwards_second=mStack.bp.back().second+1;
711 //while(mStack.CanBeAddedOutside(outwards_first,outwards_second)) {
712 while((addCatalogIdx=find(add_catalog.begin(),add_catalog.end(),std::make_pair(outwards_first,outwards_second))-add_catalog.begin())<(int)add_catalog.size()){
713 mStack.AddOutside(outwards_first,outwards_second,sequence[outwards_first-1],sequence[outwards_second-1]);
714 AddToPairTableWithLog(outwards_first,outwards_second,addCatalogIdx);
715 outwards_first--;
716 outwards_second++;
717 if(!mStack.IsMinuscule()) break;
718 }
719
720 }
721
722 //if it is decreasing, remove all base pairs from it
723 //if src contains stack
724 // while(mStack.GetSize()>0){
725 else{
726 for(int i=0;i<mStack.GetSize();i++){
727 //do not remove basepairs that are in the target
728 if(find(target.begin(),target.end(),mStack.bp[i])!=target.end()) continue;
729
730 int first=mStack.bp[i].first;
731 int second=mStack.bp[i].second;
732 if(!PairIsInPairtable(first,second)) continue;
733
734 int removeCatalogIdx=find(remove_catalog.begin(),remove_catalog.end(),mStack.bp[i])-remove_catalog.begin();
735 RemoveFromPairTableWithLog(first,second,removeCatalogIdx);
736 }
737 }
738
739 }
740
741
BPAdditionCreatesSecondMinusculeStack(int first,int second)742 bool PartialPath::BPAdditionCreatesSecondMinusculeStack(int first,int second){
743 if(MHS_debug) std::cout<<"BPAdditionCreatesSecondMinusculeStack "+Str(first)+" "+Str(second)<<endl;
744
745
746 if(minusculeStack.GetSize()==0) return false;
747 //if we are here, there already is a minuscule Stack
748 //the only way to create a new minuscule Stack is if the bp is lonely
749 //and if is still part of mStack in the target structure. As only base pairs in the
750 //target structure ever get added, this should never occur.
751
752 //determines whether the stack around a pair (first,second) is minuscule in target.
753 if(IsLonelyPair(first,second)) {
754 if(MHS_debug) std::cout<<"added pair is lonely"<<endl;
755 bool GC=Stack::IsGCPair(sequence[first-1],sequence[second-1]);
756 //is in target automatically
757 std::pair<int,int> bp=std::make_pair(first,second);
758 int stack_size=1;
759 //go inside
760 std::pair<int,int> bp_inner= std::pair<int,int>(bp.first+1,bp.second-1);
761 while(true){
762 //finds base pair outside of bp
763 if(find(target.begin(),target.end(),bp_inner)!=target.end()){
764 stack_size++;
765 if(!IsGCPair(bp_inner,sequence)) GC=false;
766 bp_inner= std::pair<int,int>(bp_inner.first+1,bp_inner.second-1);
767 }
768 else break;
769 }
770
771 std::pair<int,int> bp_outer= std::pair<int,int>(bp.first-1,bp.second+1);
772 while(true){
773 //finds base pair outside of bp
774 if(find(target.begin(),target.end(),bp_outer)!=target.end()){
775 stack_size++;
776 if(!IsGCPair(bp_outer,sequence)) GC=false;
777 bp_outer= std::pair<int,int>(bp_outer.first-1,bp_outer.second+1);
778 }
779 else break;
780 }
781 //if the stack is minuscule in the target structure, it is not counted as minuscule
782 //contribution to the current structure, so we return false
783 if(Stack::IsMinuscule(stack_size,GC)) return false;
784 else{
785 if(MHS_debug) std::cout<<"and creates new mStack"<<endl;
786 return true;
787 }
788 }
789 //the minusculeStack disappears if the current bp increases its size so it is not minuscule any more
790
791 if(minusculeStack.CanBeAddedOutside(first,second))
792 minusculeStack.AddOutside(first,second,sequence[first-1],sequence[second-1]);
793 else if(minusculeStack.CanBeAddedInside(first,second))
794 minusculeStack.AddInside(first,second,sequence[first-1],sequence[second-1]);
795 if(!minusculeStack.IsMinuscule()) minusculeStack.Clear();
796
797 //the count of minuscule Stacks is now either 0 or 1, so we didnt create a second one.
798 return false;
799 }
800
801
802
IsLonelyPair(int first,int second)803 bool PartialPath::IsLonelyPair(int first,int second){
804 bool no_bp_outside= (pair_table[first-1]!=second+1);
805 bool no_bp_inside= (pair_table[first+1]!=second-1);
806 if(no_bp_outside && no_bp_inside) return true;
807 else return false;
808 }
809
810
TreatConflictGroupIndex(int idx)811 void PartialPath::TreatConflictGroupIndex(int idx){
812 bool debug=false;
813 if(MHS_debug) cout<<"TreatConflictGroupIndex"<<endl;
814 if(debug) cout<<PrintPairTable()<<endl;
815 //the items to remove are stored from the second element onwards.
816 if(MHS_debug) cout<<"print conflict_group["+Str(idx)+"]"<<endl;
817 //for(size_t i=1;i<conflict_group[idx].size();i++){
818
819 // }
820 for(size_t i=1;i<conflict_group[idx].size();i++){
821 // int remove_idx=conflict_group[idx][i];
822
823 //if(!RemovePairIsPresent(idx,remove_idx)) continue;
824 int first=GetFirstFromConflictGroupRemove(idx,i);
825 int second=GetSecondFromConflictGroupRemove(idx,i);
826 // int first=remove_catalog[remove_idx].first;//GetFirstFromConflictGroupRemove(idx,remove_idx);
827 // int second=remove_catalog[remove_idx].second;//GetSecondFromConflictGroupRemove(idx,remove_idx);
828 if(MHS_debug) cout<<"remove idx "+Str(conflict_group[idx][i])+" bp "+PrintBasePair(std::make_pair(first,second))<<endl;
829 //do nothing if the pair has already been removed from pair_table
830 if(!PairIsInPairtable(first,second)) {
831 if(MHS_debug)cout<<"Is not in pair_table"<<endl;
832 continue;
833 }
834 if(MHS_debug) {
835 if(minusculeStack.GetSize()>0) cout<<"Already have a minuscule stack"<<endl;
836 else cout<<"Dont have a minuscule stack"<<endl;
837 }
838 int createdmStackCount=StacksCreatedByBPRemoval(first,second);
839 int totalmStackCount=createdmStackCount;
840
841 if(minusculeStack.GetSize()>0) totalmStackCount++;
842 if(MHS_debug) cout<<"createdmStackCount "+Str(createdmStackCount)+" totalmStackCount "+Str(totalmStackCount)<<endl;
843 if(totalmStackCount==1 && createdmStackCount==1){
844 if(createdMinusculeStack1.GetSize()>0) minusculeStack=createdMinusculeStack1;
845 else if(createdMinusculeStack2.GetSize()>0) minusculeStack=createdMinusculeStack2;
846 }
847
848
849 if(totalmStackCount>=2){
850 //if there already was a mStack, remove it
851 if(totalmStackCount>createdmStackCount){
852 if(MHS_debug) cout<<"just created a mStack, so remove the previous mStack"<<endl;
853 RemoveMinusculeStack();
854 //determine the new minuscule stack
855 }
856 //if two minusculeStacks just got created, remove the smaller of the two and keep the larger of the two as minusculeStack
857 //remove the smaller of the two created mStacks, and make the bigger the new mStack
858 if(createdmStackCount>=1) {
859 if(MHS_debug) cout<<" createdMinusculeStack1.GetSize() "+Str(createdMinusculeStack1.GetSize())+" createdMinusculeStack2.GetSize() "+Str(createdMinusculeStack2.GetSize())<<endl;
860 if(createdMinusculeStack1.GetSize()>createdMinusculeStack2.GetSize()){
861 if(createdMinusculeStack2.GetSize()>0) RemoveMinusculeStack(createdMinusculeStack2);
862 minusculeStack=createdMinusculeStack1;
863 }
864 else if(createdMinusculeStack2.GetSize()>createdMinusculeStack1.GetSize()){
865 if(createdMinusculeStack1.GetSize()>0) RemoveMinusculeStack(createdMinusculeStack1);
866 minusculeStack=createdMinusculeStack2;
867 }
868 if(MHS_debug) std::cout<<"MinusculeStack after removal "+minusculeStack.Print()<<endl;
869 }
870 }
871 int removeCatalogIdx=GetRemoveCatalogIndex(idx,i);//remove_idx);
872 RemoveFromPairTableWithLog(first,second,removeCatalogIdx);
873
874 }
875 //if to_add is -1, all has already been added, so what comes below is skipped.
876 if(conflict_group[idx][0]==-1) {
877 //it needs to be recorded in the trail, so the conflict_group can be emptied.
878 currentPartialPathTrail.push_back(-1+BP_ADD_CONST);
879 return;
880 }
881 //the element at given index inthe conflictgroup
882 int first=GetFirstFromConflictGroupAdd(idx);
883 int second=GetSecondFromConflictGroupAdd(idx);
884 int addCatalogIdx=GetAddCatalogIndex(idx);
885 if(BPAdditionCreatesSecondMinusculeStack(first,second)) {
886 RemoveMinusculeStack();
887 minusculeStack.AddOutside(first,second,sequence[first-1],sequence[second-1]);
888 }
889 //add the base pair to pair_table unless it is already part of it;
890 if(!PairIsInPairtable(first,second)) AddToPairTableWithLog(first,second,addCatalogIdx);
891
892 }
893
894
895 /*
896 std::vector<Stack> newMinusculeStacks=GetMinusculeStacksForRemoval(first,second);
897
898 if((int)newMinusculeStacks.size()>=minusculeMaxCount){
899 //1.A second stack jsut appeared via pushback
900 //2.remove the original stack, i.e. the first one in newMinusculeStacks
901 Stack firstMinuscule=minusculeStacks[0];
902 //get rid of all elements in the minuscule Stack.
903 //if its elements will be part of the final structure, we have to add bp until the stack is larger than minuscule
904 //if they won't be part, we have to remove all of its elements.
905 if(firstMinuscule.persists){
906
907 }
908 else{
909
910
911 }
912 minusculeStacks.erase(minusculeStacks.begin());
913 }
914 */
915
916
917
PairIsInPairtable(int first,int second)918 bool PartialPath::PairIsInPairtable(int first,int second){
919 if(first<=0) return false;
920 bool firstPaired=(pair_table[first]==second);
921 bool secondPaired=(pair_table[second]==first);
922 return (firstPaired && secondPaired);
923 }
924
925
926
927 /*
928 bool PartialPath::RemovePairIsPresent(int first,int second){
929 bool firstPaired=(pair_table[first]==second);
930 bool secondPaired=(pair_table[second]==first);
931 return (firstPaired && secondPaired);
932 }
933 */
934
935 /*
936 bool PartialPath::RemovePairIsPresent(int group_idx,int remove_idx){
937 int first=GetFirstFromConflictGroupRemove(group_idx,remove_idx);
938 int second=GetSecondFromConflictGroupRemove(group_idx,remove_idx);
939 bool firstPaired=(pair_table[first]==second);
940 bool secondPaired=(pair_table[second]==first);
941 return (firstPaired && secondPaired);
942 }
943 */
944
945
946
GetFirstFromConflictGroupRemove(int group_idx,int remove_idx)947 int PartialPath::GetFirstFromConflictGroupRemove(int group_idx,int remove_idx){
948 bool debug=false;
949 if(debug){
950 cout<<"GetFirstFromConflictGroupRemove group member "+Str(group_idx)+" idx in list "+Str(remove_idx)<<endl;
951 cout<<"yields index in remove catalog "+Str(conflict_group[group_idx][remove_idx])<<endl;
952 cout<<"bp "+PrintBasePair(remove_catalog[conflict_group[group_idx][remove_idx]])<<endl;
953 }
954 return remove_catalog[conflict_group[group_idx][remove_idx]].first;
955 }
956
GetSecondFromConflictGroupRemove(int group_idx,int remove_idx)957 int PartialPath::GetSecondFromConflictGroupRemove(int group_idx,int remove_idx){
958 bool debug=false;
959 if(debug){
960 cout<<"GetSecondFromConflictGroupRemove group member "+Str(group_idx)+" idx in list "+Str(remove_idx)<<endl;
961 cout<<"yields index in remove catalog "+Str(conflict_group[group_idx][remove_idx])<<endl;
962 cout<<"bp "+PrintBasePair(remove_catalog[conflict_group[group_idx][remove_idx]])<<endl;
963 }
964 return remove_catalog[conflict_group[group_idx][remove_idx]].second;
965 }
966
967
GetAddCatalogIndex(int idx)968 int PartialPath::GetAddCatalogIndex(int idx){
969 return conflict_group[idx][0];
970 }
971
972
973
GetRemoveCatalogIndex(int group_idx,int remove_idx)974 int PartialPath::GetRemoveCatalogIndex(int group_idx,int remove_idx){
975 if(MHS_debug) cout<<"GetRemoveCatalogIndex group member "+Str(group_idx)+" idx in list "+Str(remove_idx)+" ";
976 if(MHS_debug) cout<<"yields index in remove catalog "+Str(conflict_group[group_idx][remove_idx])<<endl;
977 return conflict_group[group_idx][remove_idx];
978 }
979
980
981
982
GetFirstFromConflictGroupAdd(int group_idx)983 int PartialPath::GetFirstFromConflictGroupAdd(int group_idx){
984 return add_catalog[conflict_group[group_idx][0]].first;
985 }
986
GetSecondFromConflictGroupAdd(int group_idx)987 int PartialPath::GetSecondFromConflictGroupAdd(int group_idx){
988 return add_catalog[conflict_group[group_idx][0]].second;
989 }
990
AddToPairTable(std::pair<int,int> bp)991 void PartialPath::AddToPairTable(std::pair<int,int> bp){
992 // cout<<"AddToPairTable"+PrintBasePair(bp) <<std::endl;
993 pair_table[bp.first]=bp.second;
994 pair_table[bp.second]=bp.first;
995 }
996
997
RemoveFromPairTable(std::pair<int,int> bp)998 void PartialPath::RemoveFromPairTable(std::pair<int,int> bp){
999 // cout<<"RemoveFromPairTable" +PrintBasePair(bp)<<std::endl;
1000 pair_table[bp.first]=0;
1001 pair_table[bp.second]=0;
1002 }
1003
1004
AddToPairTable(int first,int second)1005 void PartialPath::AddToPairTable(int first,int second){
1006 // cout<<"AddToPairTable" +PrintBasePair(std::make_pair(first,second))<<std::endl;
1007 pair_table[first]=second;
1008 pair_table[second]=first;
1009 }
1010
1011
1012
RemoveFromPairTable(int first,int second)1013 void PartialPath::RemoveFromPairTable(int first,int second){
1014 // cout<<"RemoveFromPairTable"+PrintBasePair(std::make_pair(first,second)) <<std::endl;
1015 pair_table[first]=0;
1016 pair_table[second]=0;
1017 }
1018
RemoveFromPairTableWithLog(int first,int second,int removeCatalogIdx)1019 void PartialPath::RemoveFromPairTableWithLog(int first,int second,int removeCatalogIdx){
1020 // cout<<"RemoveFromPairTableWithLog idx "+Str(removeCatalogIdx)<<endl;
1021 currentPartialPathTrail.push_back(removeCatalogIdx);
1022 RemoveFromPairTable(first,second);
1023 }
1024
AddToPairTableWithLog(int first,int second,int addCatalogIdx)1025 void PartialPath::AddToPairTableWithLog(int first,int second,int addCatalogIdx){
1026 // cout<<"AddToPairTableWithLog idx "+Str(addCatalogIdx)<<endl;
1027 currentPartialPathTrail.push_back(addCatalogIdx+BP_ADD_CONST);
1028 AddToPairTable(first,second);
1029 }
1030
1031
1032
PartialPath(std::vector<std::pair<int,int>> src,std::vector<std::pair<int,int>> target,std::string sequence,int lookahead,const std::vector<std::pair<int,int>> & remove_catalog,const std::vector<std::pair<int,int>> & add_catalog)1033 PartialPath::PartialPath(std::vector<std::pair<int,int> > src,std::vector<std::pair<int,int> > target,std::string sequence,int lookahead,const std::vector<std::pair<int,int> > & remove_catalog,const std::vector<std::pair<int,int> > & add_catalog){
1034 this->src=src;
1035 this->target=target;
1036 this->sequence=sequence;
1037 this->lookahead=lookahead;
1038 this->add_catalog =add_catalog;
1039 this->remove_catalog =remove_catalog;
1040 }
1041
1042
WalkPartialPath(std::vector<int> combination)1043 double PartialPath::WalkPartialPath( std::vector<int> combination ){
1044
1045 if(MHS_debug) cout<<"WalkPartialPath \n";
1046 // std::string current_structure=sequence;
1047 //std::string highest_structure;
1048
1049 // double highest_energy = std::numeric_limits<double>::min();
1050 double highest_energy = -INF;//std::numeric_limits<double>::min();
1051 //double current_value=std::numeric_limits<double>::min();
1052
1053
1054
1055
1056 for (size_t k=0; k<combination.size(); k++) {
1057 int idx = combination[k]-1;
1058 TreatConflictGroupIndex(idx);
1059 double current_energy=FastEvalEnergy(sequence);
1060 if(MHS_debug) cout<<"current_energy "+Str(current_energy)<<endl;
1061 if(current_energy>highest_energy) highest_energy=current_energy;
1062 // UndoPartialPathTrail();
1063 }
1064 if(MHS_debug) cout<<"currentPartialPathTrail after WalkPartialPath "+PrintCombination(currentPartialPathTrail)+"\n";
1065 return highest_energy;
1066 }
1067 /**
1068 std::map<int,std::vector<int> >::const_iterator it=conflict_group.begin();
1069 advance(it,idx);
1070 std::pair<int,std::vector<int> > conflict=*it;
1071 //XXXXXXXXXXXXXXXXXXXXXXXXX DO WE NEED TO TREAT TO AD HERE?
1072 // int to_add=conflict.first;
1073 //if(to_add==1) continue;
1074 std::vector<int> to_remove=conflict.second;
1075 // remove what impedes adding the pair in combination[k], then
1076 // add it. TREAT ONE ELEMENT IN THE CONFLICT GROUP
1077 for (size_t l=0; l<to_remove.size(); l++) {
1078
1079 std::pair<int,int> bp_to_remove=only_in_base_pairs[to_remove[l]]
1080 std::vector<Stack> newMinusculeStacks= PartialPath::GetMinusculeStacksForRemoval(bp_to_remove);
1081 if(newMinusculeStacks.size()>=minusculeMaxCount){
1082 //1.A second stack jsut appeared via pushback
1083 //2.remove the original stack, i.e. the first one in newMinusculeStacks
1084 Stack firstMinuscule=minusculeStacks.pop_front();//[0];
1085 //get rid of all elements in the minuscule Stack.
1086 //if its elements will be part of the final structure, we have to add bp until the stack is larger than minuscule
1087 //if they won't be part, we have to remove all of its elements.
1088 if(firstMinuscule.persists){
1089
1090 }
1091 else{
1092
1093
1094 }
1095 }
1096
1097
1098
1099
1100
1101
1102
1103
1104 //===========================================================
1105 //std::vector<std::pair<int,int> >::iterator it = remove(current.begin(),current.end(),only_in_base_pairs[to_remove[l]]);
1106 //if only_in_base_pairs[to_remove[l] hasn't been removed before
1107 if(it!=current.end()){
1108 std::vector<std::pair<int,int> > current_bp_after_remove= std::vector<std::pair<int,int> >(current.begin(),it);
1109
1110 std::vector<std::pair<int,int> > lonely_bp=GetMinusculeStacks();
1111 // std::vector<std::pair<int,int> > lonely_bp=GetMinusculeStacks(current_bp_after_remove,base_pairs2,sequence);
1112
1113 int lonely_bp_count=lonely_bp.size();
1114 //if removing the requested base pair results in too many lonely base pairs, we have to get rid of the initial lonely base pair first
1115 if(lonely_bp_count>minusculeMaxCount){
1116 //need to keep the original lonely_bp in memeory
1117 }
1118
1119 pair_table[only_in_base_pairs[to_remove[l]].first]=0;
1120 pair_table[only_in_base_pairs[to_remove[l]].second]=0;
1121 removed_pairs.push_back(to_remove[l]);
1122 }
1123
1124 current.erase(it, current.end());
1125 //===========================================================
1126
1127
1128 }
1129 */
1130 //}
1131
1132
1133
1134
1135 /*
1136 void PartialPath::RemoveFromConflictGroup(std::vector<int> added_regular,std::vector<std::pair<int,int> > added_correctives){
1137
1138 for(size_t i=0;i<added_regular.size();i++){
1139 conflict_group.erase(added_regular[i]);
1140 }
1141 //bp that are added are in only_in_base_pairs2, so this is the container that is searched for the elements of added_correctives;
1142 for(size_t i=0;i<added_correctives.size();i++){
1143 std::vector<std::pair<int,int> >::iterator added_corrective=find(only_in_base_pairs2.begin(),only_in_base_pairs2.end(),added_correctives[i]);
1144 if(added_corrective!=only_in_base_pairs2.end()){
1145 int idx=added_corrective-only_in_base_pairs2.begin();
1146 conflict_group.erase(idx);
1147 }
1148 }
1149
1150 }
1151 */
1152 /*
1153 // CONSIDER THE POSSIBLE COMBINATIONS OF THE GIVEN CONFLICT GROUP IN TURN.
1154 for (size_t k=0; k<combination.size(); k++) {
1155 int idx = combination[k]-1;
1156 std::map<int,std::vector<int> >::const_iterator it=conflict_group.begin();
1157 advance(it,idx);
1158 std::pair<int,std::vector<int> > conflict=*it;
1159 int to_add=conflict.first;
1160 //if(to_add==1) continue;
1161 std::vector<int> to_remove=conflict.second;
1162 // remove what impedes adding the pair in combination[k], then
1163 // add it. TREAT ONE ELEMENT IN THE CONFLICT GROUP
1164 for (size_t l=0; l<to_remove.size(); l++) {
1165 size_t old_size=current_pairs.size();
1166 // TREAT ONE BASE PAIR
1167 #ifdef _DEBUG_MH_
1168 std::cout<<"remove pair ("+Str(only_in_base_pairs[to_remove[l]].first)+","+Str(only_in_base_pairs[to_remove[l]].second)+")"<<std::endl;
1169 #endif
1170 std::vector<std::pair<int,int> >::iterator it = remove(current_pairs.begin(),current_pairs.end(),only_in_base_pairs[to_remove[l]]);
1171
1172
1173 //if only_in_base_pairs[to_remove[l] hasn't been removed before
1174 if(it!=current_pairs.end()){
1175 std::vector<std::pair<int,int> > current_bp_after_remove= std::vector<std::pair<int,int> >(current_pairs.begin(),it);
1176
1177 std::vector<std::pair<int,int> > lonely_bp=GetMinusculeStacks(current_bp_after_remove,base_pairs2,sequence);
1178 int lonely_bp_count=lonely_bp.size();
1179 //if removing the requested base pair results in too many lonely base pairs, we have to get rid of the initial lonely base pair first
1180 if(lonely_bp_count>max_lonely_bp){
1181 //need to keep the original lonely_bp in memeory
1182 }
1183
1184 pair_table[only_in_base_pairs[to_remove[l]].first]=0;
1185 pair_table[only_in_base_pairs[to_remove[l]].second]=0;
1186 removed_pairs.push_back(to_remove[l]);
1187 }
1188 current_pairs.erase(it, current_pairs.end());
1189 */
AddBestPartialPathTrailToPathTrail()1190 void PartialPath::AddBestPartialPathTrailToPathTrail(){
1191 if(MHS_debug) cout<<"AddBestPartialPathTrailToPathTrail \n";
1192
1193 for(size_t i=0;i<bestPartialPathTrail.size();i++){
1194 int idx=bestPartialPathTrail[i];
1195 pathTrail.push_back(idx);
1196 if(idx>=BP_ADD_CONST-1){
1197 idx-=BP_ADD_CONST;
1198 //this is new
1199 if(idx==-1) continue;
1200 //add the bp to the pair_table and latest structure
1201 std::pair<int,int> bp=add_catalog[idx];
1202 pair_table[bp.first]=bp.second;
1203 pair_table[bp.second]=bp.first;
1204 }
1205 else{
1206 //remove the bp from the pair_table and latest structure
1207 std::pair<int,int> bp=remove_catalog[idx];
1208 pair_table[bp.first]=0;
1209 pair_table[bp.second]=0;
1210 }
1211
1212 }
1213 // bestPartialPathTrail.clear();
1214 if(MHS_debug) cout<<"yields pathTrail "+ PrintCombination(pathTrail)+"\n";
1215 }
1216
1217
FindOptimalPath(std::vector<std::pair<double,std::string>> & structures)1218 void PartialPath::FindOptimalPath(std::vector<std::pair<double,std::string> >& structures){
1219 bool debug=false;
1220 if(MHS_debug)cout<<"FindOptimalPath\n";
1221
1222
1223
1224
1225 std::vector<int> combination= std::vector<int>();
1226
1227
1228 GetConflictGroup();
1229
1230 while(conflict_group.size()>0){
1231 double best_combination_saddle_energy = INF;//std::numeric_limits<double>::max();
1232 long int n_combinations = N_take_k( conflict_group.size(),lookahead);
1233
1234
1235 std::vector<Stack> minusculeStacks=GetMinusculeStacks();
1236 //if there is more than one minuscule Stack, rremove all but the largest.
1237 int greatestMStackIndex=-1;
1238 int maxsize=0;
1239 for(size_t i=0;i<minusculeStacks.size();i++){
1240 int size=minusculeStacks[i].GetSize();
1241 if(size>maxsize) {
1242 maxsize=size;
1243 greatestMStackIndex=i;
1244 }
1245 }
1246 Stack originalMinusculeStack;
1247 if(greatestMStackIndex==-1) originalMinusculeStack=Stack();
1248 else originalMinusculeStack=minusculeStacks[greatestMStackIndex];
1249 //get rid of the smaller stacks
1250 for(size_t i=0;i<minusculeStacks.size();i++){
1251 if((int)i==greatestMStackIndex ) continue;
1252 RemoveMinusculeStack(minusculeStacks[i]);
1253 }
1254
1255
1256 // cout<<"Treat conflict group of size"+Str((int)conflict_group.size())+"\n";
1257 //try all combinations and record combination idx with the best energy.Keep track of saddle point for the entire function all, that is throughout all groups.
1258 for (long int j=0; j<n_combinations; j++) {
1259 minusculeStack=originalMinusculeStack;
1260
1261 if(MHS_debug)cout<<"combination "+Str((int)j)+" out of "+Str((int)n_combinations)<<endl;
1262 combination = GetCombination(conflict_group.size(),lookahead,j);
1263 double saddle_energy=WalkPartialPath(combination);
1264 if(MHS_debug)cout<<"obtains saddle "+Str(saddle_energy)+"\n";
1265 if(saddle_energy<best_combination_saddle_energy) {
1266 if(debug) cout<<"UpdateBestPartialPathTrail from current Trail of size "+Str((int)currentPartialPathTrail.size())+"\n";
1267 best_combination_saddle_energy=saddle_energy;
1268 bestPartialPathTrail=currentPartialPathTrail;
1269 }
1270 UndoPartialPathTrail();
1271 }
1272 if(debug) cout<<"AddBestPartialPathTrail of size "+Str((int)bestPartialPathTrail.size())+" ToPathTrail"<<std::endl;
1273 //updates pair_table
1274 AddBestPartialPathTrailToPathTrail();
1275 //remove the used up elements from the conflict group
1276 if(MHS_debug)cout<<"remove the used up elements from the conflict group of size "+Str((int)conflict_group.size())<<std::endl;
1277 if(MHS_debug) std::cout<<PrintConflictGroup(conflict_group,remove_catalog,add_catalog)<<std::endl;
1278
1279 for(size_t i=0;i<bestPartialPathTrail.size();i++){
1280 if(debug) cout<<"bestPartialPathTrail ["+Str((int)i)+"] "+Str(bestPartialPathTrail[i])<<std::endl;
1281 if(bestPartialPathTrail[i]<BP_ADD_CONST-1) continue;
1282
1283 //do not change horses midstream: ain't happening, as we break as soon as an element is deleted
1284 for(size_t j=0;j<conflict_group.size();j++){
1285 if(conflict_group[j][0]==bestPartialPathTrail[i]-BP_ADD_CONST){
1286 conflict_group.erase(conflict_group.begin()+j);
1287 if(debug)std::cout<<"erase "+Str((int)j)<<endl;
1288 break;
1289 }
1290 }
1291 if(debug)cout<<"conflict_group has now size "+Str((int)conflict_group.size())<<std::endl;
1292 }
1293
1294 }
1295 //call this before UpdateBacktrackBase, while add_catalog and remove_catalog are not emptied yet.
1296 if(debug) cout<<"AddPathTrailToStructureList"<<std::endl;
1297 AddPathTrailToStructureList(structures);
1298 UpdateBacktrackBase();
1299
1300 //cout<<std::endl;
1301 pathTrail.clear();
1302 }
1303
AddPathTrailToStructureList(std::vector<std::pair<double,std::string>> & structures)1304 void PartialPath::AddPathTrailToStructureList(std::vector<std::pair<double,std::string> >& structures){
1305 bool debug=false;
1306 if(debug) cout<<PrintPairTable()<<endl;
1307 std::string latest_structure=structures.back().second;
1308 MakePairTable(const_cast<char*>(latest_structure.c_str()));
1309 //turn latest structure into pair_table
1310 for(size_t i=0;i<pathTrail.size();i++){
1311 int idx=pathTrail[i];
1312 if(debug) cout<<"idx "<<idx<<endl;
1313 if(idx>=BP_ADD_CONST-1){
1314 idx-=BP_ADD_CONST;
1315 //-1 indicates nothing was added, it only holds the place in the conflict group
1316 if(idx==-1) continue;
1317 //add the bp to the pair_table and latest structure
1318 std::pair<int,int> bp=add_catalog[idx];
1319 latest_structure[bp.first-1]='(';
1320 latest_structure[bp.second-1]=')';
1321 pair_table[bp.first]=bp.second;
1322 pair_table[bp.second]=bp.first;
1323 double energy=FastEvalEnergy(sequence);
1324 structures.push_back(std::make_pair(energy,latest_structure));
1325 }
1326 else{
1327 //remove the bp from the pair_table and latest structure
1328 std::pair<int,int> bp=remove_catalog[idx];
1329 latest_structure[bp.first-1]='.';
1330 latest_structure[bp.second-1]='.';
1331 pair_table[bp.first]=0;
1332 pair_table[bp.second]=0;
1333 double energy=FastEvalEnergy(sequence);
1334 structures.push_back(std::make_pair(energy,latest_structure));
1335
1336 }
1337 if(debug) cout<<"becomes idx "<<idx<<endl;
1338 }
1339 if(debug){
1340 cout<<"New Structure List:"<<endl;
1341 for(size_t l=0;l<structures.size() ;l++){
1342 cout<<structures[l].second+" "+Str(structures[l].first)<<endl;
1343 }
1344 cout<<PrintPairTable()<<endl;
1345 }
1346 // pathTrail.clear();
1347 }
1348
1349
1350
1351 //erase every index listed in pathTrail from the add/remove catalog
UpdateBacktrackBase()1352 void PartialPath::UpdateBacktrackBase(){
1353 bool debug=false;
1354 if(MHS_debug) cout<<"UpdateBacktrackBase with pathTrail of size "+Str((int)pathTrail.size())<<std::endl;
1355
1356 for (int i=0; i<(int)pathTrail.size(); i++) {
1357 if(debug) cout<<"pathTrail["+Str(i)+"] "+Str(pathTrail[i])<<endl;
1358 }
1359 if(MHS_debug) cout<<"add_catalog size "+Str((int)add_catalog.size())+" remove_catalog size "+Str((int)remove_catalog.size())<<std::endl;
1360 //the lowest index comes at the end
1361 sort(pathTrail.begin(),pathTrail.end(),std::greater<int>());
1362 for (int i=0; i<(int)pathTrail.size(); i++) {
1363 int idx = pathTrail[i];
1364 if(MHS_debug) cout<<"idx "+Str(idx)+" ";
1365 if(idx>=BP_ADD_CONST-1){
1366 idx-=BP_ADD_CONST;
1367 //-1 is place holder in conflict group but does not affect add_catalog or src, as it is not associated with a base pair
1368 if(idx==-1) continue;
1369 std::vector<std::pair<int,int> >::iterator it=add_catalog.begin();
1370 advance(it,idx);
1371 if(MHS_debug) cout<<"erase "+PrintBasePair(*it)<<endl;
1372 add_catalog.erase(it);
1373 src.push_back(add_catalog[idx]);
1374
1375 }
1376 else {
1377 std::vector<std::pair<int,int> >::iterator it=remove_catalog.begin();
1378 advance(it,idx);
1379 if(MHS_debug) cout<<"erase "+PrintBasePair(*it)<<endl;
1380 remove_catalog.erase(it);
1381 std::vector<std::pair<int,int> >::iterator it2=src.begin();
1382 advance(it2,idx);
1383 src.erase(it2);
1384
1385 }
1386 }
1387 if(MHS_debug) cout<<"become add_catalog size "+Str((int)add_catalog.size())+" remove_catalog size "+Str((int)remove_catalog.size())<<endl;
1388 }
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400 /*
1401 Returns lonely basepairs in the current conformation that are not also contained in the target confromation
1402 For stacks of length two that are not GC stackings, the outer base pair is returned.
1403 Assumes that base pairs are ordered increasingly in their first digits.
1404 */
1405
1406
GetMinusculeStacks()1407 std::vector<Stack> PartialPath::GetMinusculeStacks(){
1408 bool debug=false;
1409
1410 if(debug) std::cout<<"GetMinusculeStacks"<<endl;
1411 std::vector<Stack> ret= std::vector<Stack>();
1412 if(debug) cout<<PrintPairTable()<<endl;
1413 std::vector< std::pair<int,int> > current=PairTableToBasePairList(pair_table);
1414 if(debug) std::cout<<"obtained base pair list "+PrintBasePairList(current)<<std::endl;
1415
1416
1417
1418
1419 for(size_t i=0;i<current.size();i++){
1420 std::pair<int,int> bp=current[i];
1421 bool done=false;
1422 //continue of the current base pair (or a neighboring pair) is already listed in ret
1423 //(bc it is the outer bp ina stack, for which the inner bp was already treated)
1424 for(size_t j=0;j<ret.size();j++){
1425 if(ret[j].Contains(bp) || ret[j].Contains(bp.first-1,bp.second+1) || ret[j].Contains(bp.first+1,bp.second-1)) {
1426 done=true;
1427 break;
1428 }
1429 }
1430 if(done) continue;
1431 //ignore if this base pair is also part of the final structure
1432 if(find(target.begin(),target.end(),bp) !=target.end()) continue;
1433
1434 //build stacks by checking whether bases nieghboring bp are paired in the current structure
1435 bool GC=IsGCPair(bp,sequence);
1436 int stack_size=1;
1437
1438 std::pair<int,int> bp_outer= std::pair<int,int>(bp.first-1,bp.second+1);
1439 if(debug) std::cout<<"orig bp_outer "+PrintBasePair(bp_outer)<<endl;
1440 while(Stack::IsMinuscule(stack_size,GC) ){
1441 //finds base pair outside of bp
1442 if(find(current.begin(),current.end(),bp_outer)!=current.end()){
1443 stack_size++;
1444 if(!IsGCPair(bp_outer,sequence)) GC=false;
1445 bp_outer= std::pair<int,int>(bp_outer.first-1,bp_outer.second+1);
1446 if(debug) std::cout<<"then bp_outer "+PrintBasePair(bp_outer)<<endl;
1447 }
1448 else break;
1449
1450 }
1451
1452 std::pair<int,int> bp_inner= std::pair<int,int>(bp.first+1,bp.second-1);
1453 if(debug) std::cout<<"orig bp_inner "+PrintBasePair(bp_inner)<<endl;
1454 while(Stack::IsMinuscule(stack_size,GC)){
1455 //finds base pair outside of bp
1456 if(find(current.begin(),current.end(),bp_inner)!=current.end()){
1457 stack_size++;
1458 if(!IsGCPair(bp_inner,sequence)) GC=false;
1459 bp_inner= std::pair<int,int>(bp_inner.first+1,bp_inner.second-1);
1460 if(debug) std::cout<<"then bp_inner "+PrintBasePair(bp_inner)<<endl;
1461 }
1462 else break;
1463 }
1464 if(debug) std::cout<<"stack_size "<<stack_size<<endl;
1465 if(!Stack::IsMinuscule(stack_size,GC)) continue;
1466 if(debug) std::cout<<"is minuscule "<<stack_size<<endl;
1467 //we actually have a minuscule stack
1468 Stack mStack=Stack();
1469 bp_inner.first--;
1470 bp_inner.second++;
1471 if(debug) std::cout<<"bp_inner "+PrintBasePair(bp_inner)<<endl;
1472
1473 //need to record the outermost pair, which is obtained by moving back in from the last attempt to find an outer
1474 //base pair, which was too far out.
1475 // bp_outer= std::pair<int,int>(bp_outer.first+1,bp_outer.second-1);
1476 // std::cout<<"bp_outer "+PrintBasePair(bp_outer)<<endl;
1477 while(bp_inner!=bp_outer){
1478 mStack.AddOutside(bp_inner.first,bp_inner.second,sequence[bp_inner.first-1],sequence[bp_inner.second-1]);
1479 //bp_inner now graduall moves OUTSIDE (it moved insde in the loop further up)
1480 bp_inner= std::pair<int,int>(bp_inner.first-1,bp_inner.second+1);
1481 if(debug) std::cout<<"new bp_inner "+PrintBasePair(bp_inner)<<endl;
1482
1483 }
1484 if(debug) std::cout<<"add stack "+mStack.Print()<<endl;
1485 //add the stack if it is minuscule
1486 ret.push_back(mStack);
1487 }
1488 //std::cout<<" "<<endl;
1489 if(debug) std::cout<<"GetMinusculeStacks returns"<<endl;
1490 return ret;
1491
1492 }
1493
1494
1495
1496
1497 /*
1498 Returns lonely basepairs in the current conformation that are not also contained in the target confromation
1499 For stacks of length two that are not GC stackings, the outer base pair is returned.
1500 Assumes that base pairs are ordered increasingly in their first digits.
1501
1502
1503 std::vector<std::pair<int,int> > PartialPath::GetMinusculeStacks(){
1504
1505 std::vector<std::pair<int,int> > ret= std::vector<std::pair<int,int> >();
1506 for(size_t i=0;i<current.size();i++){
1507 std::pair<int,int> bp=current[i];
1508 //continue of the current base pair is already listed in ret (bc it is the outer bp ina stack, for which the inner bp was already treated)
1509 if(find(ret.begin(),ret.end(),bp)!=ret.end()) continue;
1510 //ignore if this base pair is also part of the final structure
1511 if(find(target.begin(),target.end(),bp) !=target.end()) continue;
1512
1513 bool GC=IsGCPair(bp,sequence);
1514 int stack_size=1;
1515
1516 std::pair<int,int> bp_outer= std::pair<int,int>(bp.first-1,bp.second+1);
1517 while(true){
1518 //finds base pair outside of bp
1519 if(find(current.begin(),current.end(),bp_outer)!=current.end()){
1520 stack_size++;
1521 if(!IsGCPair(bp_outer,sequence)) GC=false;
1522 bp_outer= std::pair<int,int>(bp_outer.first-1,bp_outer.second+1);
1523 }
1524 else break;
1525 }
1526
1527 std::pair<int,int> bp_inner= std::pair<int,int>(bp.first+1,bp.second-1);
1528 while(true){
1529 //finds base pair outside of bp
1530 if(find(current.begin(),current.end(),bp_inner)!=current.end()){
1531 stack_size++;
1532 if(!IsGCPair(bp_inner,sequence)) GC=false;
1533 bp_inner= std::pair<int,int>(bp_inner.first+1,bp_inner.second-1);
1534 }
1535 else break;
1536 }
1537 //need to record the outermost pair, which is obtained by moving back in from the last attempt to find an outer
1538 //base pair, which was too far out.
1539 bp_outer= std::pair<int,int>(bp_outer.first+1,bp_outer.second-1);
1540
1541 //if the stack is of size 2, we might already have added it to ret, when the other basepair in the stack was considered
1542 //in this case we don't add it again.
1543 if(find(ret.begin(),ret.end(),bp_outer)!=ret.end()) continue;
1544 //GC stacks need minimal size of 2
1545 if(GC && stack_size>=2) ret.push_back(bp_outer);
1546 //other stackings require a size of at least 3
1547 else if(!GC && stack_size>=3) ret.push_back(bp_outer);
1548
1549
1550 }
1551 return ret;
1552
1553 }
1554 */
1555
1556 std::vector<std::pair<int,int> > base_pairs2=std::vector<std::pair<int,int> > ();
1557
1558 int max_lonely_bp=1;
1559
1560 //std::vector<std::pair<double,std::string> >
DoPartialPath(std::vector<std::pair<double,std::string>> & path,const std::vector<int> & combination,std::string sequence,const std::vector<std::pair<int,int>> & backtrack_base,const std::map<int,std::vector<int>> & conflict_group,const std::vector<std::pair<int,int>> & only_in_base_pairs,const std::vector<std::pair<int,int>> & only_in_base_pairs2)1561 void DoPartialPath(std::vector<std::pair<double,std::string> > & path, const std::vector<int> & combination, std::string sequence, const std::vector<std::pair<int,int> > &
1562 backtrack_base, const std::map<int,std::vector<int> > & conflict_group, const std::vector<std::pair<int,int> > & only_in_base_pairs,
1563 const std::vector<std::pair<int,int> > & only_in_base_pairs2){
1564 /*
1565 void PartialPath(std::vector<std::pair<double,std::string> > & path,const std::vector<int> & combination, std::string sequence,const std::vector<std::pair<int,int> > & backtrack_base,
1566 const std::map<int,std::vector<int> > & conflict_group,const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2){
1567 */
1568 path.clear();
1569 // std::vector<std::pair<double,std::string> > path=std::vector<std::pair<double,std::string> > ();
1570
1571 // std::cout<<"PartialPath Original PairTable "+PrintPairTable()<<std::endl;
1572 //bool debug=true;//false;
1573 std::string highest_structure;
1574 double highest = -INF;//std::numeric_limits<double>::min();
1575 double current_value=-INF;//std::numeric_limits<double>::min();
1576 std::string current_structure=sequence;
1577 std::vector<std::pair<int,int> > current_pairs =backtrack_base;
1578 std::vector<int> added_pairs= std::vector<int> ();
1579 std::vector<int> removed_pairs= std::vector<int> ();
1580 // CONSIDER THE POSSIBLE COMBINATIONS OF THE GIVEN CONFLICT GROUP IN TURN.
1581 for (size_t k=0; k<combination.size(); k++) {
1582 int idx = combination[k]-1;
1583 std::map<int,std::vector<int> >::const_iterator it=conflict_group.begin();
1584 advance(it,idx);
1585 std::pair<int,std::vector<int> > conflict=*it;
1586 int to_add=conflict.first;
1587 //if(to_add==1) continue;
1588 std::vector<int> to_remove=conflict.second;
1589 // remove what impedes adding the pair in combination[k], then
1590 // add it. TREAT ONE ELEMENT IN THE CONFLICT GROUP
1591 for (size_t l=0; l<to_remove.size(); l++) {
1592 size_t old_size=current_pairs.size();
1593 // TREAT ONE BASE PAIR
1594 #ifdef _DEBUG_MH_
1595 std::cout<<"remove pair ("+Str(only_in_base_pairs[to_remove[l]].first)+","+Str(only_in_base_pairs[to_remove[l]].second)+")"<<std::endl;
1596 #endif
1597 std::vector<std::pair<int,int> >::iterator it = remove(current_pairs.begin(),current_pairs.end(),only_in_base_pairs[to_remove[l]]);
1598
1599
1600 //if only_in_base_pairs[to_remove[l] hasn't been removed before
1601 if(it!=current_pairs.end()){
1602 std::vector<std::pair<int,int> > current_bp_after_remove= std::vector<std::pair<int,int> >(current_pairs.begin(),it);
1603
1604 /*
1605 std::vector<std::pair<int,int> > lonely_bp=GetMinusculeStacks(current_bp_after_remove,base_pairs2,sequence);
1606 int lonely_bp_count=lonely_bp.size();
1607 //if removing the requested base pair results in too many lonely base pairs, we have to get rid of the initial lonely base pair first
1608 if(lonely_bp_count>max_lonely_bp){
1609 //need to keep the original lonely_bp in memeory
1610 }
1611 */
1612 pair_table[only_in_base_pairs[to_remove[l]].first]=0;
1613 pair_table[only_in_base_pairs[to_remove[l]].second]=0;
1614 removed_pairs.push_back(to_remove[l]);
1615 }
1616 current_pairs.erase(it, current_pairs.end());
1617 if(current_pairs.size()<old_size){
1618 current_value = FastEvalEnergy(sequence);
1619 std::string current_structure= BasePairListToStructure1(sequence.length(),current_pairs);
1620 path.push_back(std::pair<double,std::string>(current_value,current_structure));
1621 }
1622 if ( current_value > highest ) {
1623 highest = current_value;
1624 highest_structure = current_structure;
1625 }
1626 }
1627 //-1 means that there is nothing to add any more so only the final contacts are removed
1628 if(to_add!=-1){
1629 #ifdef _DEBUG_MH_
1630 std::cout<<"add pair ("+Str(only_in_base_pairs2[to_add].first)+","+Str(only_in_base_pairs2[to_add].second)+")"<<std::endl;
1631 #endif
1632 current_pairs.push_back(only_in_base_pairs2[to_add]);
1633 pair_table[only_in_base_pairs2[to_add].first]=only_in_base_pairs2[to_add].second;
1634 pair_table[only_in_base_pairs2[to_add].second]=only_in_base_pairs2[to_add].first;
1635 added_pairs.push_back(to_add);
1636 current_value = FastEvalEnergy(sequence);
1637 std::string current_structure= BasePairListToStructure1(sequence.length(),current_pairs);
1638 path.push_back(std::pair<double,std::string>(current_value,current_structure));
1639 if ( current_value > highest ) {
1640 highest = current_value;
1641 highest_structure = current_structure;
1642 }
1643 }
1644 }
1645 //revert basepair additions and deletions
1646 // std::cout<<"PairTable after Partial Path "+PrintPairTable()+"\n";
1647
1648 for(size_t i=0;i<added_pairs.size();i++){
1649 pair_table[only_in_base_pairs2[added_pairs[i]].first]=0;
1650 pair_table[only_in_base_pairs2[added_pairs[i]].second]=0;
1651 }
1652 for(size_t i=0;i<removed_pairs.size();i++){
1653 pair_table[only_in_base_pairs[removed_pairs[i]].first]=only_in_base_pairs[removed_pairs[i]].second;
1654 pair_table[only_in_base_pairs[removed_pairs[i]].second]=only_in_base_pairs[removed_pairs[i]].first;
1655 }
1656
1657 // std::cout<<"Restored PairTable "+PrintPairTable()+"\n";
1658 path.push_back(std::pair<double,std::string>(highest,highest_structure));
1659 //return path;
1660 }
1661
1662 /*
1663 determine the number of conflicts for each bp in only_in_base_pairs2.Build a group out of the set of bp sharing the lowest # of conflicts
1664 */
1665
1666 //std::map<int,std::vector<int> >
GetConflictGroup(std::map<int,std::vector<int>> & conflict_group,const std::vector<std::pair<int,int>> & only_in_base_pairs,const std::vector<std::pair<int,int>> & only_in_base_pairs2)1667 void GetConflictGroup(std::map<int,std::vector<int> > & conflict_group, const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2) {
1668 conflict_group.clear();
1669 //bool debug=false;//true;
1670 std::vector<int> conflicts=std::vector<int>(only_in_base_pairs2.size());
1671 // std::map<int,std::vector<int> > conflict_group=std::map<int,std::vector<int> >();
1672 if(only_in_base_pairs2.size()>0){
1673 size_t minsize=only_in_base_pairs.size();
1674 for (size_t i=0; i<only_in_base_pairs2.size();i++) {
1675 conflicts[i]=0;
1676 for (size_t j=0; j<only_in_base_pairs.size();j++) {
1677 if(BasePairConflict(only_in_base_pairs2[i],only_in_base_pairs[j])) conflicts[i]++;
1678 }
1679 #ifdef _DEBUG_MH_
1680 std::cout<<"only_in_base_pairs2["+Str((int)i)+"]: ("+Str(only_in_base_pairs2[i].first)+","+Str(only_in_base_pairs2[i].second)+")\n";
1681 std::cout<<"conflicts[i]:"<<conflicts[i]<<"\n";
1682 #endif
1683 if(conflicts[i]<(int)minsize) minsize=conflicts[i];
1684 }
1685 #ifdef _DEBUG_MH_
1686 std::cout<<"minsize:"<<minsize<<"\n";
1687 #endif
1688 for (size_t i=0; i<only_in_base_pairs2.size();i++) {
1689 if(conflicts[i]==(int)minsize){
1690 conflict_group[i]=std::vector<int>();
1691 for (size_t j=0; j<only_in_base_pairs.size();j++) {
1692 if(BasePairConflict(only_in_base_pairs2[i],only_in_base_pairs[j])) conflict_group[i].push_back(j);
1693 }
1694 }
1695 }
1696 }
1697 else{
1698 conflict_group[-1]=std::vector<int>();
1699 for (size_t i=0; i<only_in_base_pairs.size(); i++) conflict_group[-1].push_back(i);
1700 }
1701 // return conflict_group;
1702 }
1703
1704
1705
SetDifference(std::vector<std::pair<int,int>> minuend,std::vector<std::pair<int,int>> subtrahend)1706 std::vector<std::pair<int,int> > SetDifference(std::vector<std::pair<int,int> > minuend,std::vector<std::pair<int,int> > subtrahend){
1707 std::vector<std::pair<int,int> > ret=std::vector<std::pair<int,int> > ();
1708 for (size_t i=0; i<minuend.size(); i++) {
1709 if (find(subtrahend.begin(),subtrahend.end(),minuend[i]) == subtrahend.end()) ret.push_back(minuend[i]);
1710 }
1711 return ret;
1712 }
1713
1714 //std::vector<int>
FindBestPartialPathCombination(std::vector<int> & best_combination,int lookahead,std::string sequence,const std::vector<std::pair<int,int>> & backtrack_base,const std::map<int,std::vector<int>> & conflict_group,const std::vector<std::pair<int,int>> & only_in_base_pairs,const std::vector<std::pair<int,int>> & only_in_base_pairs2)1715 void FindBestPartialPathCombination(std::vector<int> & best_combination,int lookahead,std::string sequence,const std::vector<std::pair<int,int> > & backtrack_base,
1716 const std::map<int,std::vector<int> > & conflict_group,const std::vector<std::pair<int,int> > & only_in_base_pairs,
1717 const std::vector<std::pair<int,int> > & only_in_base_pairs2){
1718 #ifdef _DEBUG_MH_
1719 std::cout<<"FindBestPartialPathCombination"<<std::endl;
1720 #endif
1721 best_combination.clear();
1722 long int n_combinations = N_take_k( conflict_group.size(),lookahead);
1723 std::string best_combination_saddle=std::string();
1724 double best_combination_saddle_energy = INF;//<double>::max();
1725 //std::cout<<"n_combinations: "+Str((int)n_combinations)<<std::endl;
1726 best_combination.clear();
1727
1728
1729 std::vector<int> combination= std::vector<int>();
1730 std::vector<std::pair<double,std::string> > partial_path=std::vector<std::pair<double,std::string> > ();
1731 //try all combinations and record combination idx with the best energy.Keep track of saddle point for the entire function all, that is throughout all groups.
1732 for (long int j=0; j<n_combinations; j++) {
1733 combination = GetCombination(conflict_group.size(),lookahead,j);
1734 #ifdef _DEBUG_MH_
1735 std::cout<<"obtained combination for partial_path"<<std::endl;
1736 for(size_t r=0;r<combination.size();r++) std::cout<<combination[r];
1737 std::cout<<std::endl;
1738 #endif
1739
1740 DoPartialPath(partial_path,combination,sequence,backtrack_base,conflict_group,only_in_base_pairs,only_in_base_pairs2);
1741 double current_combination_saddle_energy=partial_path.back().first;
1742 std::string current_combination_saddle=partial_path.back().second;
1743 #ifdef _DEBUG_MH_
1744 std::cout<<"current_combination_saddle "+current_combination_saddle+" "+Str(current_combination_saddle_energy)<<std::endl;
1745 #endif
1746 // take the minimum value of the permutations of this group achieved so far.
1747 if (current_combination_saddle_energy < best_combination_saddle_energy ) {
1748 best_combination_saddle_energy = current_combination_saddle_energy;
1749 best_combination_saddle =current_combination_saddle;
1750 best_combination=combination;
1751 #ifdef _DEBUG_MH_
1752 std::cout<<"new best_combination_saddle "+best_combination_saddle+" "+Str(best_combination_saddle_energy)<<std::endl;
1753 std::cout<<"new best combination ";
1754 for(size_t r=0;r<best_combination.size();r++) std::cout<<best_combination[r];
1755 std::cout<<std::endl;
1756 #endif
1757 }
1758 }
1759 // return best_combination;
1760 }
1761
1762 /*
1763 Updates basepairs to reflect the base pairs that have been added to the current structure on the Morgan Higgs path
1764 */
1765
1766 //std::vector<std::pair<int,int> >
UpdateBacktrackBase(std::vector<std::pair<int,int>> & base_pairs,const std::map<int,std::vector<int>> & conflict_group,const std::vector<int> & remove,const std::vector<std::pair<int,int>> & only_in_base_pairs,const std::vector<std::pair<int,int>> & only_in_base_pairs2)1767 void UpdateBacktrackBase(std::vector<std::pair<int,int> >& base_pairs,const std::map<int,std::vector<int> > & conflict_group,const std::vector<int> & remove,
1768 const std::vector<std::pair<int,int> > & only_in_base_pairs,const std::vector<std::pair<int,int> > & only_in_base_pairs2){
1769 for (size_t i=0; i<remove.size(); i++) {
1770 int idx = remove[i]-1;
1771 std::map<int,std::vector<int> >::const_iterator it=conflict_group.begin();
1772 advance(it,idx);
1773 std::pair<int,std::vector<int> > conflict=*it;
1774 int to_add=conflict.first;
1775 //if(to_add==1) continue;
1776 std::vector<int> to_remove=conflict.second;
1777
1778 for (size_t j=0; j<to_remove.size(); j++) {
1779 std::pair<int,int> p=only_in_base_pairs[to_remove[j]];
1780 for (size_t k=0; k<base_pairs.size();k++) {
1781 if(base_pairs[k]==p) {
1782 base_pairs.erase(base_pairs.begin()+k);
1783 break;
1784 }
1785 }
1786 }
1787 //-1 means that there is nothing to add any more
1788 if(to_add!=-1){
1789 base_pairs.push_back(only_in_base_pairs2[to_add]);
1790 }
1791 }
1792 // return base_pairs;
1793 }
1794
1795 /*
1796 bool StackNotMadeFromBases(std::vector<std::pair<int,int> > stack,std::vector<int> bases){
1797 std::cout<<"StackNotMadeFromBases\n";
1798 std::cout<<"Base:";
1799 for(size_t j=0;j<bases.size();j++){
1800 std::cout<<bases[j]<<" ";
1801 }
1802 std::cout<<"\n";
1803 for(size_t j=0;j<stack.size();j++){
1804 int base1=stack[j].first;
1805 int base2=stack[j].second;
1806 std::cout<<"base1:"+Str(base1)+"\n";
1807 std::cout<<"base2:"+Str(base2)+"\n";
1808 //break if base1 was paired
1809 if(find( bases.begin(), bases.end(),base1)!= bases.end()) return false;
1810 //break if base2 was paired
1811 if(find( bases.begin(), bases.end(),base2)!= bases.end()) return false;
1812 }
1813 return true;
1814 }
1815 */
1816
1817 static std::vector<std::pair<double,std::string> > path;
SeedPath(std::string sequence,std::string src)1818 void SeedPath(std::string sequence,std::string src){
1819
1820 path=std::vector<std::pair<double,std::string> >(1,std::pair<double,std::string>(FastEvalEnergy(sequence),src));
1821
1822 }
1823
1824 /**
1825 *
1826 */
1827
1828
1829 //std::pair<double,std::string>
MorganHiggsStudlaEnergy(std::string sequence,std::string src,std::string tgt,double saddlE,int lookahead,std::string grouping)1830 std::vector<std::pair<double,std::string> > MorganHiggsStudlaEnergy(std::string sequence,std::string src,std::string tgt,double saddlE,int lookahead,std::string grouping){
1831 int interrupt=-1;
1832 bool debug=false;
1833 #ifdef _DEBUG_MH_
1834 std::cout<<"MorganHiggsStudlaEnergy with lookahead "+Str(lookahead)+" and grouping "+grouping+"\n";
1835 std::cout<<"src:\n"+src+":"+Str(FastEvalEnergy(sequence))+"\n";
1836 std::cout<<"tgt:\n"+tgt+":"+Str(EvalEnergy(sequence,tgt))+"\n";
1837 #endif
1838
1839 // Obtain the vector of base pairs with values in [1,Node::matrix_size]
1840 std::vector<std::pair<int,int> > base_pairs = MakeBasePairList1(src);
1841 // std::vector<std::pair<int,int> >
1842 base_pairs2 = MakeBasePairList1(tgt);
1843 std::vector<std::pair<int,int> > remove_catalog =SetDifference(base_pairs,base_pairs2);
1844 std::vector<std::pair<int,int> > add_catalog =SetDifference(base_pairs2,base_pairs);
1845 if(MHS_debug) std::cout<<"add_catalog: "+PrintBasePairList(add_catalog)<<endl;
1846 if(MHS_debug) std::cout<<"remove_catalog: "+PrintBasePairList(remove_catalog)<<endl;
1847
1848
1849
1850
1851 //src is the first element in the path
1852 // std::vector<std::pair<double,std::string> > path=new std::vector<std::pair<double,std::string> >();
1853
1854
1855 //path.push_back(std::pair<double,std::string>(FastEvalEnergy(sequence),src));
1856
1857 SeedPath(sequence,src);
1858
1859
1860
1861 PartialPath partial=PartialPath(base_pairs,base_pairs2,sequence,lookahead,remove_catalog,add_catalog);
1862
1863 while(partial.add_catalog.size()>0 || partial.remove_catalog.size()>0){
1864
1865 //treat all combinations in a conflict group
1866 int prevPathSize=path.size();
1867 // MakePairTableFromBasePairs(partial.src,src.size());
1868
1869 MakePairTable(const_cast<char*>(path.back().second.c_str()));
1870 if(MHS_debug) cout<<"MorganHiggsEnergy calls FindOptimalPath with pair_table "+PrintPairTable()<<endl;
1871 partial.FindOptimalPath(path);
1872 if(MHS_debug) cout<<"pair_table after FindOptimalPath:"+PrintPairTable()<<endl;
1873 //handle inerrupt
1874 for(size_t k=prevPathSize;k<path.size();k++){
1875 if(path[k].first>saddlE && interrupt==-1) interrupt=k;//path.size();
1876 }
1877 //save varialbes between partialpaths
1878 }
1879
1880
1881 if(interrupt==-1) interrupt=path.size();
1882 path.push_back(make_pair((double)interrupt,std::string()));
1883
1884 // cout<<"Obtained path:"<<endl;
1885 //for(size_t l=0;l<path.size() ;l++){
1886 // cout<<path[l].second+" "+Str(path[l].first)<<endl;
1887 // }
1888 if(MHS_debug) cout<<PrintPairTable()<<endl;
1889 return path;
1890 }
1891
1892
1893
1894 //std::pair<double,std::string>
MorganHiggsEnergy(std::string sequence,std::string src,std::string tgt,double saddlE,int lookahead,std::string grouping)1895 std::vector<std::pair<double,std::string> > MorganHiggsEnergy(std::string sequence,std::string src,std::string tgt,double saddlE,int lookahead,std::string grouping){
1896 int interrupt=-1;
1897
1898 #ifdef _DEBUG_MH_
1899 std::cout<<"MorganHiggsEnergy with lookahead "+Str(lookahead)+" and grouping "+grouping+"\n";
1900 std::cout<<"src:\n"+src+":"+Str(FastEvalEnergy(sequence))+"\n";
1901 std::cout<<"tgt:\n"+tgt+":"+Str(EvalEnergy(sequence,tgt))+"\n";
1902 #endif
1903
1904 // Obtain the vector of base pairs with values in [1,Node::matrix_size]
1905 std::vector<std::pair<int,int> > base_pairs = MakeBasePairList1(src);
1906 // std::vector<std::pair<int,int> >
1907 base_pairs2 = MakeBasePairList1(tgt);
1908 std::vector<std::pair<int,int> > only_in_base_pairs =SetDifference(base_pairs,base_pairs2);
1909 std::vector<std::pair<int,int> > only_in_base_pairs2 =SetDifference(base_pairs2,base_pairs);
1910
1911 //Nukleationszentrum: Habe x zusammenhaengende Basenspaare in only_in_base_pairs2, von denen keine einzige Base in only_in_base_pairs
1912 //war. xist die maximale helixgroesse in base_pairs aber hoechstens 3.
1913
1914 // std::vector<std::vector<std::pair<int,int> > > front_stacks=ConformationToStacks(base_pairs);
1915 // std::vector<int> paired_bases_in_front=std::vector<int>();
1916 // int helix_max=0;
1917 // for(int i=0;i<front_stacks.size();i++){
1918 // std::vector<std::pair<int,int> > stack= front_stacks[i];
1919 // for(int j=0;j<stack.size();j++){
1920 // paired_bases_in_front.push_back(stack[j].first);
1921 // paired_bases_in_front.push_back(stack[j].second);
1922 // }
1923 // if(stack.size()>helix_max) helix_max=stack.size();
1924 // }
1925 // if(helix_max>3) helix_max=3;
1926 // std::cout<<"helix_max:"+Str(helix_max )+"\n";
1927
1928
1929 // std::cout<<"# elts in only_in_base_pairs2:"+Str((int)only_in_base_pairs2.size())+" \n";
1930 // std::vector<std::vector<std::pair<int,int> > > new_stacks=ConformationToStacks(only_in_base_pairs2);
1931 // std::cout<<"# new stacks:"+Str((int)new_stacks.size())+" \n";
1932 // bool have_nucleation=false;
1933 // for(int i=0;i<new_stacks.size();i++){
1934 // std::vector<std::pair<int,int> > stack= new_stacks[i];
1935 // if(stack.size()<helix_max) continue;
1936 // //the stack is large enough. Ensure that none of its bases are in a base pair in base_pairs.
1937 // if(StackNotMadeFromBases(stack,paired_bases_in_front)){
1938 // std::cout<<"Have nucleation\n";
1939 // have_nucleation=true;
1940 // break;
1941 // }
1942 // }
1943 // if(!have_nucleation){
1944 // std::cout<<"Cannot fold form src to tgt because there is no nucleation center\n";
1945 // std::cout<<src+"\n";
1946 // std::cout<<tgt+"\n";
1947 // std::vector<std::pair<double,std::string> > path= std::vector<std::pair<double,std::string> >();
1948 // path.push_back(std::pair<double,std::string>(1000,src));
1949 // return path;
1950 // }
1951
1952 //src is the first element in the path
1953 std::vector<std::pair<double,std::string> > path=std::vector<std::pair<double,std::string> >();
1954 path.push_back(std::pair<double,std::string>(FastEvalEnergy(sequence),src));
1955 std::vector<std::pair<double,std::string> > partial_path= std::vector<std::pair<double,std::string> >();
1956 std::map<int,std::vector<int> > conflict_group= std::map<int,std::vector<int> >();
1957 std::vector<int> combination= std::vector<int>();
1958 while(only_in_base_pairs2.size()>0 || only_in_base_pairs.size()>0){
1959
1960 #ifdef _DEBUG_MH_
1961 std::cout<<"base_pairs:\n"<<BasePairListToStructure1(sequence.length(),base_pairs)<<"\n";
1962 std::cout<<"base_pairs2:\n"<<BasePairListToStructure1(sequence.length(),base_pairs2)<<"\n";
1963 std::cout<<"only_in_base_pairs:\n"<<BasePairListToStructure1(sequence.length(),only_in_base_pairs)<<"\n";
1964 std::cout<<"only_in_base_pairs2:\n"<<BasePairListToStructure1(sequence.length(),only_in_base_pairs2)<<"\n";
1965 #endif
1966
1967 GetConflictGroup(conflict_group,only_in_base_pairs,only_in_base_pairs2);
1968 #ifdef _DEBUG_MH_
1969 std::cout<<"got conflict_group"<<"\n";
1970 std::cout<<"PrintConflictGroup\n";
1971 std::cout<<PrintConflictGroup(conflict_group,only_in_base_pairs,only_in_base_pairs2)+"\n";
1972 #endif
1973 //treat all combinations in a conflict group
1974 while(conflict_group.size()>0){
1975 #ifdef _DEBUG_MH_
1976 std::cout<<"conflict_group.size "+Str((int)conflict_group.size())<<std::endl;
1977 std::cout<<"Look for a path combination"<<std::endl;
1978 #endif
1979
1980 FindBestPartialPathCombination(combination,lookahead,sequence,base_pairs,conflict_group,only_in_base_pairs,only_in_base_pairs2);
1981 #ifdef _DEBUG_MH_
1982 std::cout<<"PrintCombination()"<<std::endl;
1983 std::cout<<PrintCombination(combination)<<std::endl;
1984 std::cout<<"take its partial path"<<std::endl;
1985 #endif
1986
1987 DoPartialPath(partial_path,combination,sequence,base_pairs,conflict_group,only_in_base_pairs,only_in_base_pairs2);
1988
1989 //Stop one element short as to not push group saddle back here. It is done right before MorganHiggsEnergy returns!
1990 #ifdef _DEBUG_MH_
1991 std::cout<<"Print partial path"<<std::endl;
1992 #endif
1993 for(size_t k=0;k<partial_path.size()-1;k++) {
1994 #ifdef _DEBUG_MH_
1995 std::cout<<partial_path[k].second+":"+Str(partial_path[k].first)<<std::endl;
1996 #endif
1997 path.push_back(partial_path[k]);
1998 //XXXXXXXXXXXXXX
1999 //can only interrupt once
2000 if(partial_path[k].first>saddlE && interrupt==-1) interrupt=path.size();
2001 }
2002
2003
2004 //update the elements in base_pairs.
2005 #ifdef _DEBUG_MH_
2006 std::cout<<"UpdateBacktrackBase"<<std::endl;
2007 std::cout<<"old backtrackbase:\n"<<BasePairListToStructure1(sequence.length(),base_pairs)<<std::endl;
2008 #endif
2009 UpdateBacktrackBase(base_pairs,conflict_group,combination,only_in_base_pairs,only_in_base_pairs2);
2010 MakePairTableFromBasePairs(base_pairs,src.size());
2011 #ifdef _DEBUG_MH_
2012 std::cout<<"new backtrackbase:\n"<<BasePairListToStructure1(sequence.length(),base_pairs)<<std::endl;
2013 //find new conflict group after each constructed partial path
2014 std::cout<<"grouping is _"+grouping+"_"<<std::endl;
2015 #endif
2016 if(grouping=="regroup") break;
2017 else if(grouping=="standard"){
2018
2019 #ifdef _DEBUG_MH_REGROUP_
2020 std::cout<<"Regroup ConflictGroup"<<std::endl;
2021 std::cout<<PrintConflictGroup(conflict_group,only_in_base_pairs,only_in_base_pairs2)+"\n";
2022 //std::cout<<PrintConflictGroup(conflict_group)<<std::endl;
2023 #endif
2024 sort(combination.begin(),combination.end());
2025 for (size_t k=0; k<combination.size(); k++) {
2026 //delete elements in the combination, i.e. those that have not been used to construct the partial path
2027 //add 1 to k, as combinations start at 1, not at 0
2028 //remove the ones that _were_ in the combination
2029 //int k=it->first+1;
2030 // combination 1 2
2031 //conflict group
2032 //0: 0 1
2033 //1: 0 1
2034 //3: 1 2
2035 //need:
2036 //1 -> 0: 0 1
2037 //2 -> 1: 0 1
2038 std::map<int,std::vector<int> >::iterator it=conflict_group.begin();
2039 //the -k compensates for the deletions from conflict_group in previous iterations
2040 advance(it,combination[k]-1-k);
2041 #ifdef _DEBUG_MH_REGROUP_
2042 //the index of the base pair in only_in_base_pairs2==oibp2 that k refers to in the combination and that hence has been added to the path
2043 size_t idx_oibp2=it->first;
2044 std::cout<<"before erasing "+Str(idx_oibp2)<<std::endl;
2045 std::cout<<PrintConflictGroup(conflict_group,only_in_base_pairs,only_in_base_pairs2)+"\n";
2046 //std::cout<<PrintConflictGroup(conflict_group)<<std::endl;
2047 #endif
2048 conflict_group.erase(it);
2049 #ifdef _DEBUG_MH_REGROUP_
2050 std::cout<<"after erasing "+Str(idx_oibp2)<<std::endl;
2051 std::cout<<PrintConflictGroup(conflict_group,only_in_base_pairs,only_in_base_pairs2)+"\n";
2052 //std::cout<<PrintConflictGroup(conflict_group)<<std::endl;
2053 #endif
2054 }
2055 #ifdef _DEBUG_MH_
2056 std::cout<<"New ConflictGroup"<<std::endl;
2057 std::cout<<PrintConflictGroup(conflict_group,only_in_base_pairs,only_in_base_pairs2)+"\n";
2058 //std::cout<<PrintConflictGroup(conflict_group)<<std::endl;
2059 #endif
2060 }
2061 }
2062 #ifdef _DEBUG_MH_
2063 std::cout<<"recalculate only_in_base_pairs"<<std::endl;
2064 #endif
2065 only_in_base_pairs=SetDifference(base_pairs,base_pairs2);
2066 #ifdef _DEBUG_MH_
2067 std::cout<<"recalculate only_in_base_pairs2"<<std::endl;
2068 #endif
2069 only_in_base_pairs2=SetDifference(base_pairs2,base_pairs);
2070 }
2071
2072 // //the global saddle, the max across the final path
2073 // std::pair<double,std::string> saddle=path[0];
2074 // //std::cout<<"MH path"<<std::endl;
2075 // for(size_t i=1;i<path.size();i++){
2076 // if(path[i].first>saddle.first) saddle=path[i];
2077 // //std::cout<<path[i].second+" "+Str(path[i].first)<<std::endl;
2078 // }
2079 // path.push_back(saddle);
2080
2081 if(interrupt==-1) interrupt=path.size();
2082 path.push_back(make_pair((double)interrupt,std::string()));
2083 return path;
2084 }
2085
2086
PrintConflictGroup(std::vector<std::vector<int>> conflict_group,std::vector<std::pair<int,int>> only_in_base_pairs,std::vector<std::pair<int,int>> only_in_base_pairs2)2087 std::string PrintConflictGroup(std::vector<std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs,
2088 std::vector<std::pair<int,int> > only_in_base_pairs2){
2089 std::string s;
2090 for(std::vector<std::vector<int> >::iterator it=conflict_group.begin();it!=conflict_group.end();it++){
2091 std::vector<int> v=*it;
2092 s+=Str(v[0])+" "+PrintBasePair(only_in_base_pairs2[v[0]])+":";
2093 for(size_t j=1;j<v.size();j++) s+=Str(v[0])+" "+PrintBasePair(only_in_base_pairs[v[j]])+" ";
2094 s+="\n";
2095 }
2096 return s;
2097
2098 // int to_add=conflict.first;
2099
2100 // std::vector<int> to_remove=conflict.second;
2101 }
2102 //ORIGINAL
PrintConflictGroup(std::map<int,std::vector<int>> conflict_group,std::vector<std::pair<int,int>> only_in_base_pairs,std::vector<std::pair<int,int>> only_in_base_pairs2)2103 std::string PrintConflictGroup(std::map<int,std::vector<int> > conflict_group,std::vector<std::pair<int,int> > only_in_base_pairs,
2104 std::vector<std::pair<int,int> > only_in_base_pairs2){
2105 std::string s;
2106 for(std::map<int,std::vector<int> >::iterator it=conflict_group.begin();it!=conflict_group.end();it++){
2107 s+=PrintBasePair(only_in_base_pairs2[it->first])+":";
2108 std::vector<int> v=it->second;
2109 for(size_t j=0;j<v.size();j++) s+=PrintBasePair(only_in_base_pairs[v[j]])+" ";
2110 s+="\n";
2111 }
2112 return s;
2113
2114 // int to_add=conflict.first;
2115
2116 // std::vector<int> to_remove=conflict.second;
2117 }
2118
PrintCombination(std::vector<int> combination)2119 std::string PrintCombination(std::vector<int> combination){
2120 std::string s;
2121 for(std::vector<int>::iterator it=combination.begin();it!=combination.end();it++){
2122 s+=Str(*it)+" ";
2123 }
2124 s+="\n";
2125 return s;
2126
2127 }
2128
MakePairTableFromBasePairs(const std::vector<std::pair<int,int>> & base_pairs,int length)2129 void MakePairTableFromBasePairs(const std::vector<std::pair<int,int> >& base_pairs,int length){
2130 for(int i=1;i<=length;i++){
2131 pair_table[i]=0;
2132 }
2133 for(size_t i=0;i<base_pairs.size();i++){
2134 pair_table[base_pairs[i].first]=base_pairs[i].second;
2135 pair_table[base_pairs[i].second]=base_pairs[i].first;
2136 }
2137
2138 }
2139
2140
2141 #ifdef _TEST_MORGANHIGGS_
2142 extern "C" {
2143 #include "fold.h"
2144 #include "fold_vars.h"
2145 #include "utils.h"
2146 #include "pair_mat.h"
2147 }
2148 //extern
2149 short *S;
2150 //extern
2151 short *S1;
2152 //extern
2153 short *pair_table;
2154 // End of file
main(int argc,char * argv[])2155 int main(int argc, char *argv[]) {
2156
2157 double T=-2.2;
2158 double mod= fmod(T, 1);
2159 std::cout<<mod<<std::endl;
2160
2161 OptionS* OptS;
2162 OptS = decodeCML(argc, argv);
2163 double saddlE=10000;
2164 std::cout<<"Enter sequence, start structure and target structure, separated by new lines"<<std::endl;
2165 std::string sequence;
2166 std::string src;
2167 std::string tgt;
2168
2169 std::cin>>sequence;
2170 std::cin>>src;
2171 std::cin>>tgt;
2172 OptS->transcribed=src.size();
2173 InitializeEnergyModel(OptS,sequence);
2174 //InitializeViennaRNA(sequence,OptS->dangle,src.size());
2175 MakePairTable(const_cast<char*>(src.c_str()));
2176 std::cout<<"new:"+PrintPairTable()<<std::endl;
2177
2178 std::cout<<"S "<<S[0]<<std::endl;
2179 std::cout<<"S1 "<<S[1]<<std::endl;
2180
2181 std::vector<std::pair<double,std::string> > path;
2182 if(OptS->barrier_heuristic=='M'){
2183 path=MorganHiggsEnergy(sequence,src,tgt,saddlE,OptS->lookahead,OptS->grouping);
2184 }
2185 else if(OptS->barrier_heuristic=='S'){
2186 path=MorganHiggsStudlaEnergy(sequence,src,tgt,saddlE,OptS->lookahead,OptS->grouping);
2187 }
2188 for(size_t l=0;l<path.size() ;l++){
2189 cout<<path[l].second+" "+Str(path[l].first)<<endl;
2190 }
2191 return 0;
2192 }
2193 #endif
2194