1 /*
2 Last changed Time-stamp: <2007-07-10 19:27:22 xtof>
3 $Id: Node.cpp,v 1.35 2007/11/03 16:45:58 Kinwalker Exp $
4 */
5
6 #include <algorithm>
7 #include <iostream>
8 #include <iterator>
9 #include <utility>
10 #include "Node.h"
11
12 #include "template_utils.c"
13
14 extern "C" {
15 #include "findpath.h"
16 }
17
18 #define MS_PER_TIME_UNIT .0001
19 #define TIME_VS_DELTAG_DY_DX (8.0/11.0)
20 #define EPSILON .00000000001
21
22 #define MIN_ENERGY_DIFF .01
23
24 double BARRIER_TOO_HIGH=10000;//std::numeric_limits<int>::max();
25 // class variables
26 int Node::verbose;
27 int Node::lookahead;
28 int Node::matrix_size;
29 std::string Node::grouping;
30 std::string Node::sequence;
31 std::string Node::front_structure;
32 double Node::front_energy;
33 std::string Node::mfe_structure;
34 double Node::mfe;
35 double Node::energy_barrier;
36 double Node::max_barrier;
37 std::vector<std::vector<bool> > Node::front =std::vector<std::vector<bool> >();
38 std::vector<Node*> Node::front_extrema = std::vector<Node*>() ;
39 std::vector<Node*> Node::extrema = std::vector<Node*>() ;
40 std::vector<double> Node::extension_cost = std::vector<double>() ;
41
42
43 std::string Node::constraint_string;
44 int Node::ineligible_count=0;
45 int Node::transcribed;
46 int Node::initially_transcribed;
47 std::string Node::front_trajectory_ps;
48 bool Node::print_front_trajectory=false;
49 int Node::interrupt_trajectory;
50 int Node::windowsize;
51
52 bool Node::transcribed_front_maximal=false;
53 double Node::transcription_rate;
54 double Node::time;
55 OptionS* Node::OptS=NULL;
56 std::vector<std::string> Node::trajectory= std::vector<std::string>();
57 std::vector<std::pair<double,std::string> > Node::path=std::vector<std::pair<double,std::string> > ();
58
59 short *S;
60 short *S1;
61 short *pair_table;
62
63
64
65 struct BasePairConflict :public std::binary_function<std::pair<int,int>,std::pair<int,int>,bool>{
operator ()BasePairConflict66 bool operator()(std::pair<int,int> p1,std::pair<int,int> p2) const{
67 if(p1.first==p2.first && p1.second==p2.second) return false;
68 if(p1.first==p2.first || p1.first==p2.second || p1.second==p2.first || p1.second==p2.second) return true;
69 if(p1.first<=p2.first && p1.second>= p2.first && p1.second<=p2.second) return true;
70 if(p1.first>=p2.first && p1.first<=p2.second && p1.second>=p2.second) return true;
71 else return false;
72 }
73 };
74
75
DeltaGToTimeUnits(double delta_G)76 double Node::DeltaGToTimeUnits(double delta_G){
77 //Cout("delta_G:"+Str(delta_G));
78 double units= pow(10.0,TIME_VS_DELTAG_DY_DX*delta_G);
79
80 //Cout("units:"+Str(units));
81 return units;
82 }
83
TimeUnitsToDeltaG(double units)84 double Node::TimeUnitsToDeltaG(double units){
85 return log10(units)/TIME_VS_DELTAG_DY_DX;
86 }
87
TimeToDeltaG(double time)88 double Node::TimeToDeltaG(double time){
89 if(time<=EPSILON) return 0.0;
90 double units=time*1000/MS_PER_TIME_UNIT;
91 double delta_G= Node::TimeUnitsToDeltaG(units);
92 if(delta_G<0){
93 Cout("Error in TimeToDelta G. Delta G is "+Str(delta_G)+" for time "+Str(time));
94 exit(0);
95 }
96 return delta_G;
97 }
98
TimePassedFolding(double delta_G)99 double Node::TimePassedFolding(double delta_G){
100 double units=Node::DeltaGToTimeUnits(delta_G);
101 // Cout("units:"+Str(units)+"\n");
102 double msec=units*MS_PER_TIME_UNIT;
103 // Cout("msec:"+Str(msec)+"\n");
104 if(verbose>=2) Cout(Str(msec*.001)+" secs passed folding from dG "+Str(delta_G)+"\n");
105 return msec*.001;
106 }
107
108
109
ProcessOptions(OptionS * OptS)110 void Node::ProcessOptions(OptionS* OptS){
111 // Allocate memory
112 //obtain input sequence
113 Node::OptS=OptS;
114 if (OptS->testseq) Node::sequence = "ACAGGUUCGCCUGUGUUGCGAACCUGCGGGUUCG";
115 else std::cin >> Node::sequence;
116
117
118 Node::print_front_trajectory=OptS->printfront;
119 Node::grouping = std::string(OptS->grouping);
120 Node::verbose = OptS->verbose;
121 Node::lookahead = OptS->lookahead;
122 Node::matrix_size = Node::sequence.size();
123 if(OptS->windowsize<=0) Node::windowsize= Node::sequence.size();
124 else {
125 Node::windowsize=OptS->windowsize;
126 if(Node::windowsize>(int)Node::sequence.size()) Node::windowsize=Node::sequence.size();
127 }
128 Node::transcribed=OptS->transcribed;
129 Node::initially_transcribed=Node::transcribed;
130 Node::interrupt_trajectory=OptS->interrupt_trajectory;
131 if(Node::transcribed<0) Node::transcribed=Node::matrix_size;
132
133 Node::front_structure=std::string(Node::transcribed,'.');
134
135 if(OptS->init_structure) {
136 Cout("Please enter starting structure\n");
137 std::cin >> Node::front_structure;
138 }
139 // fold_constrained=OptS->fold_constrained;
140 // if(fold_constrained) {
141 // Cout("Please enter folding constraints\n");
142 // std::cin >> Node::constraint_string;
143 // }
144
145 Node::max_barrier=0.0;
146 Node::energy_barrier=0.0;
147 if(Node::verbose>=1) std::cout<<"Initial number of bases transcribed:"+Str(Node::transcribed)+"\n";
148 Node::transcription_rate=(double)OptS->transcription_rate;
149 if(Node::verbose>=1) std::cout<<"Transcription rate:"+Str(Node::transcription_rate)+"\n";
150 Node::time=0.0;
151 Node::CalculateMaxBarrier();
152
153
154 }
155
CalculateMaxBarrier()156 void Node::CalculateMaxBarrier(){
157 double t_transcribe=(double)1.0/(double)Node::transcription_rate;
158 Node::max_barrier=Node::TimeToDeltaG(t_transcribe);
159 }
160
161
162
163
164 /**
165 * Constructor
166 */
Node(int i,int j,double energy)167 Node::Node(int i, int j, double energy)
168 {
169 this->i = i;
170 this->j = j;
171 this->free_energy = energy;
172 this->is_eligible=true;
173 this->is_included=false;
174 }
175
176
FoldingPathToString(std::vector<std::pair<double,std::string>> path)177 std::string Node::FoldingPathToString(std::vector<std::pair<double,std::string> > path){
178 std::string ret="";
179 for(size_t i=0;i<path.size();i++)
180 ret+=path[i].second+" "+Str(path[i].first)+" "+Str(Node::energy_barrier)+"\n";
181 return ret;
182 }
183
GetMorganHiggsPath(std::string target)184 void Node::GetMorganHiggsPath(std::string target){
185
186 int t=Node::transcribed;
187
188 Node::path = MorganHiggsEnergy(Node::sequence.substr(0,t),
189 Node::front_structure.substr(0,t),
190 target.substr(0,t),
191 Node::front_energy+Node::energy_barrier,
192 Node::lookahead,Node::grouping);//,Node::interrupt_trajectory);
193 }
194
GetMorganHiggsStudlaPath(std::string target)195 void Node::GetMorganHiggsStudlaPath(std::string target){
196
197 int t=Node::transcribed;
198
199 Node::path = MorganHiggsStudlaEnergy(Node::sequence.substr(0,t),
200 Node::front_structure.substr(0,t),
201 target.substr(0,t),
202 Node::front_energy+Node::energy_barrier,
203 Node::lookahead,Node::grouping);//,Node::interrupt_trajectory);
204 }
205
206 /**
207 counts from 0.
208 */
FindPathMinimum(int last_idx_within_barrier)209 int Node::FindPathMinimum(int last_idx_within_barrier){
210
211 double min=INF;
212 int min_idx=-1;
213 for(size_t i=0;i<=(size_t)last_idx_within_barrier;i++){
214 if(Node::path[i].first<min){
215 min= Node::path[i].first;
216 min_idx=i;
217 }
218 }
219 //Cout("FindPathMinimum "+Str(Node::path[min_idx].first)+"\n");
220 return min_idx;
221 }
222
223
224 /**
225 counts from 0.
226 */
FindPathSaddle(int min_idx)227 int Node::FindPathSaddle(int min_idx){
228 bool debug=false;
229 if(debug) Cout("FindPathSaddle got min_idx"+Str(min_idx)+"\n");
230 double max=-INF;
231 int max_idx=-1;
232 for(int i=0;i<=min_idx;i++){
233 if(debug) Cout("i "+Str(i)+" "+Str(Node::path[i].first)+" max "+Str(max)+"\n");
234 if(Node::path[i].first>max){
235
236 max=Node:: path[i].first;
237 max_idx=i;
238 if(debug)Cout("new max "+Str(max)+" max_idx "+Str(max_idx)+"\n");
239 }
240 }
241 if(debug) Cout("FindPathSaddle idx"+Str(Node:: path[max_idx].first)+"\n");
242 return max_idx;
243 }
244
245
GetSaddleFromPath(std::pair<double,std::string> & saddle,std::pair<double,std::string> & final_structure)246 void Node::GetSaddleFromPath(std::pair<double,std::string> & saddle,std::pair<double,std::string> & final_structure){
247 //the last element is the dummy element, subtract it here,subtract another 1 for counting from 0
248 int path_size=(int)path.size()-2;
249 if(Node::interrupt_trajectory){
250 int last_idx_of_partial_path=(int)path.back().first;
251 int min_idx;
252 while(true){
253 //Cout("last_idx_of_partial_path "+Str(last_idx_of_partial_path)+" path.size "+Str(path_size));
254 min_idx=FindPathMinimum(last_idx_of_partial_path);
255 //if this energy of the min within reach is not low enough, we take the next min
256 if(path[min_idx].first+MIN_ENERGY_DIFF<Node::front_energy) break;
257 while(last_idx_of_partial_path<=path_size && path[last_idx_of_partial_path].first+MIN_ENERGY_DIFF>=Node::front_energy) last_idx_of_partial_path++;
258 //if we have increased to the size of the path, it means there is no structure that is lower, in that case retrun the front structure,
259 //as this trajectory is worthless, even when the energy barrier is higher
260 if(last_idx_of_partial_path>path_size){
261 final_structure=std::make_pair(Node::front_energy,Node::front_structure);
262 return;
263 }
264 }
265 int saddle_idx = FindPathSaddle(min_idx);
266 saddle=path[saddle_idx];
267 final_structure=path[min_idx];
268 }
269 else{
270 int saddle_idx=FindPathSaddle(path_size);
271 saddle=path[saddle_idx];
272 final_structure=path[path_size];
273 }
274 // if(verbose>=4) Cout("GetSaddleFromPath obtains saddle and final structue\n"+saddle.second+"\n"+final_structure.second);
275 }
276
277 /**
278 Returns a pair consisting of the barrier height to overcome to fold into the new structure from the front as first element
279 and the new structure as the second element.
280 */
281
282
283
CalculateFoldingPath(Node * extremum,std::string integrated_structure)284 void Node::CalculateFoldingPath(Node* extremum,std::string integrated_structure){
285 if(verbose>=3) std::cout<<"Folding "<<OptS->barrier_heuristic<<std::endl;
286 MakePairTableFromFrontStructure();
287 if(OptS->barrier_heuristic=='M')
288 GetMorganHiggsPath(integrated_structure);
289 else if(OptS->barrier_heuristic=='S')
290 GetMorganHiggsStudlaPath(integrated_structure);
291 else { /* other heuristic */
292 path_t *p;
293 int p_len = 0;
294 double maxE = -INF;//std::numeric_limits<double>::max();
295 int maxE_idx = 0;
296 int t = Node::transcribed;
297 std::vector<std::pair<double,std::string> > v;
298 p = get_path(const_cast<char*>(sequence.substr(0,t).c_str()),
299 const_cast<char*>(Node::front_structure.c_str()),
300 const_cast<char*>(integrated_structure.substr(0,t).c_str()),
301 Node::OptS->maxkeep);
302 for (int i=0; p[i].s != NULL; i++) {
303 // memorize idx of structure with highest energy seen so far
304 if(p[i].en > maxE){
305 maxE = p[i].en;
306 maxE_idx = i+1;
307 }
308 v.push_back(std::make_pair(p[i].en, p[i].s));
309 }
310 // add dummy entry with idx of structure with highest energy
311 v.push_back(std::make_pair(maxE_idx, ""));
312 /* clean up space of path */
313 for (int i=0; p[i].s != NULL; i++) free(p[i].s);
314 free(p);
315 Node::path = v;
316
317 }
318
319 if(verbose>=3) {
320 Cout(Node::front_structure+"\n");
321 Cout(integrated_structure+"\n");
322 Cout(Node::FoldingPathToString(Node::path));
323 }
324 }
325
CalculatePathAndSaddle(Node * extremum,std::string integrated_structure,std::pair<double,std::string> & saddle,std::pair<double,std::string> & final_structure)326 void Node::CalculatePathAndSaddle(Node* extremum,std::string integrated_structure,std::pair<double,std::string> & saddle,std::pair<double,std::string> & final_structure){
327
328 //get path and saddle for each heuristic separately. Keep the one that produced the lowest saddle
329 char heuristic=OptS->barrier_heuristic;
330
331 OptS->barrier_heuristic='M';
332 Node::CalculateFoldingPath(extremum,integrated_structure);
333 Node::GetSaddleFromPath(saddle,final_structure);
334 std::pair<double,std::string> new_saddle=std::pair<double,std::string> ();
335 std::pair<double,std::string> new_final_structure=std::pair<double,std::string> ();
336 OptS->barrier_heuristic='S';
337
338 Node::CalculateFoldingPath(extremum,integrated_structure);
339 Node::GetSaddleFromPath(new_saddle,new_final_structure);
340 if(new_saddle.first<saddle.first && StructureIsFrontExtension(new_final_structure)){
341 saddle=new_saddle;
342 final_structure=new_final_structure;
343 }
344 OptS->barrier_heuristic='F';
345 Node::CalculateFoldingPath(extremum,integrated_structure);
346 Node::GetSaddleFromPath(new_saddle,new_final_structure);
347 if(new_saddle.first<saddle.first && StructureIsFrontExtension(new_final_structure)){
348 saddle=new_saddle;
349 final_structure=new_final_structure;
350 }
351
352 OptS->barrier_heuristic=heuristic;
353 }
354
355
CombineFrontAndNode(std::string node_substructure)356 std::string Node::CombineFrontAndNode(std::string node_substructure){
357 bool debug=false;
358 if(debug) Cout("CombineFrontAndNode\n front "+Node::front_structure+"\n node "+node_substructure+"\n");
359
360 std::vector<std::pair<int,int> >bp_front=MakeBasePairList1(Node::front_structure);
361 std::vector<std::pair<int,int> >bp_node=MakeBasePairList1(node_substructure);
362 for(size_t i=0;i<bp_front.size();i++){
363 std::pair<int,int> bp=bp_front[i];
364 //do nothing if the base pair is already part of basepairs
365 if(find(bp_node.begin(),bp_node.end(),bp)!=bp_node.end()) continue;
366 if(!Conflict(bp_node,bp)) {
367 if(debug) Cout("added bp "+PrintBasePair(bp)+"\n");
368 node_substructure[bp.first-1]='(';
369 node_substructure[bp.second-1]=')';
370 }
371 }
372 if(debug) Cout("combined "+node_substructure);
373 return node_substructure;
374 }
375
376
IsWithinWindow()377 bool Node::IsWithinWindow(){
378 if(this->j-this->i<=Node::windowsize) return true;
379 else return false;
380 }
381
382 /**
383 * Add the next extremum that does not conflict with the front and is
384 * within the limits imposed by energy_barrier.
385 */
386 bool
FindExtremum()387 Node::FindExtremum(){
388 std::pair<double,std::string> saddle= std::pair<double,std::string>();
389 std::pair<double,std::string> final_structure= std::pair<double,std::string>();
390 Node::transcribed_front_maximal=false;
391 //return if no extrema are transcribed
392 if(find_if(extrema.begin(),extrema.end(),std::mem_fun(&Node::IsTranscribed))==extrema.end()){
393 Node::transcribed_front_maximal=true;
394 return false;
395 }
396 extension_cost.clear();
397 MakePairTableFromFrontStructure();
398 std::vector<std::pair<int,int> >bp_front=MakeBasePairList1(front_structure);
399 for (size_t i=0; i<extrema.size(); i++) {
400 //make sure the value from the last iteration is not reused
401 std::string combined_structure="";
402 if(!extrema[i]->IsTranscribed() || !extrema[i]->IsEligible() || (!AllTranscribed() && !extrema[i]->IsWithinWindow()) ) {
403 //if(verbose>=2) Cout("Not transcribed.\n");
404 extension_cost.push_back(BARRIER_TOO_HIGH);
405 continue;
406 }
407 if(verbose>=4) Cout("Try extending to Node "+Print(extrema[i])+".\n");
408 if(verbose>=3) Cout("Front is\n"+Node::front_structure+":"+Str(Node::front_energy)+"\n");
409 //try adding terminal basepairs if backtracked structure doesn't add anything to front
410 bool debug=false;
411 double barrier=BARRIER_TOO_HIGH;
412 std::string node_substructure=BacktrackNode(extrema[i]);
413 if(debug) Cout("node_substructure\n"+node_substructure);
414 std::string integrated_structure=CombineFrontAndNode(node_substructure);
415 bool skip=false;
416 if(!Node::interrupt_trajectory){
417 if(integrated_structure==Node::front_structure || Evaluate(integrated_structure)>Node::front_energy) skip=true;
418 }
419
420 if(verbose>=3) Cout("Combination of Node and Front\n"+integrated_structure+"\n");
421 if(!skip && Node::front_structure!=integrated_structure){
422 if(OptS->barrier_heuristic=='A') CalculatePathAndSaddle(extrema[i],integrated_structure,saddle,final_structure);
423 else{
424 Node::CalculateFoldingPath(extrema[i],integrated_structure);
425 Node::GetSaddleFromPath(saddle,final_structure);
426 }
427 //we can only accept the final structure if it is different from the front structure and has a better energy
428 if(StructureIsFrontExtension(final_structure)) {
429 barrier=saddle.first-Node::front_energy;
430 combined_structure=final_structure.second;
431 }
432 }
433 if(debug) Cout("Current combined structure and barrier\n"+combined_structure+" "+Str(barrier));
434 //needs to be set to a value that gives this one a second chance below once all is transcribed
435 //else combined_structure=Node::front_structure;
436 //only if everything is transcribed and we are moving towards the mfE
437 //done by the function CalculateFoldingPath
438 skip=false;
439
440 if(!Node::interrupt_trajectory){
441 if(node_substructure==Node::front_structure || Evaluate(node_substructure)>Node::front_energy) skip=true;
442 }
443
444 if(!skip && barrier>Node::energy_barrier && combined_structure!=node_substructure){
445 if(OptS->barrier_heuristic=='A') CalculatePathAndSaddle(extrema[i],node_substructure,saddle,final_structure);
446 else{
447 Node::CalculateFoldingPath(extrema[i],node_substructure);
448 Node::GetSaddleFromPath(saddle,final_structure);
449 }
450 if(StructureIsFrontExtension(final_structure)) {
451 double barrier2=saddle.first-Node::front_energy;
452 if(barrier2<barrier){
453 barrier=barrier2;
454 combined_structure=final_structure.second;
455 if(debug) Cout("Have valid front extension with\n"+combined_structure+" "+Str(barrier));
456 }
457 }
458 }
459 if(debug) Cout("Current combined structure and barrier\n"+combined_structure+" "+Str(barrier));
460 if(verbose>=3) Cout("Actually obtained combination:\n"+combined_structure+"\n");
461 extension_cost.push_back(barrier);
462
463
464 if(verbose>=2) {
465 std::cout<<"Node "+Print(extrema[i])+" has extension cost "+Str(extension_cost.back())+" ("+Str(Node::energy_barrier)+" "+Str(Node::transcribed)+")\n";
466 }
467 if ( barrier <= Node::energy_barrier ) {
468 //1.First structure if that is suitable.
469 //2.Otherwise seconds structure, if suitable, or we won't get here
470 extrema[i]->AddToFront(combined_structure,barrier);
471 if(find_if(extrema.begin()+i,extrema.end(),std::mem_fun(&Node::IsTranscribed))==extrema.end()) Node::transcribed_front_maximal=true;
472 return true;
473 }
474 //Everything is eligible
475 else if(barrier > Node::max_barrier) extrema[i]->SetIneligible();
476 else{
477 if(verbose>=3)std::cout<<"Extremum"+Print(extrema[i])+" has extension cost "+Str(barrier)+" while "+Str(Node::energy_barrier)+" is allowed and max is "+Str(Node::max_barrier)+"\n";
478 }
479 }
480 return (false);
481 }
482
StructureIsFrontExtension(const std::pair<double,std::string> & struc)483 bool Node::StructureIsFrontExtension( const std::pair<double,std::string> & struc){
484 if(struc.second!=Node::front_structure && struc.first<Node::front_energy) return true;
485 else return false;
486 }
487
488
489 static int front_count=1;
490 void
AddToFront(std::string new_front_structure,double barrier)491 Node::AddToFront(std::string new_front_structure,double barrier){
492 this->Reeligify(Node::front_structure,new_front_structure);
493 double new_front_energy =Evaluate(new_front_structure);
494 if ( verbose >= 1 ) {
495 std::cout<<"Add Node ("+Print(this)+" "+Str(extension_cost.back())+" ("+Str(Node::energy_barrier)+" "+Str(Node::transcribed)+")\n";
496 std::cout<<"src:"+Node::front_structure+" "+Str(Node::front_energy)+"\n";
497 std::cout<<"tgt:"+new_front_structure+" "+Str(new_front_energy)+"\n";
498 }
499
500 //add the extremum to the front
501 for (int i=this->i; i<=this->j; i++) {
502 for (int j=i; j<=this->j; j++) front[i][j] = true;
503 }
504 front_extrema.push_back(this);
505 Node::front_structure=new_front_structure;
506 Node::front_energy=new_front_energy;
507
508 if(barrier>0.0) Node::IncreaseTime(TimePassedFolding(barrier));
509 std::string trajectory_entry=std::string();
510 trajectory_entry+=Node::front_structure+" ";
511 trajectory_entry+=Str(Node::front_energy)+" ";
512 trajectory_entry+=Str(Node::time)+" ";
513 trajectory_entry+=Str(barrier)+" ";
514 trajectory_entry+=Str(Node::energy_barrier)+" ";
515 trajectory_entry+=Str(Node::transcribed)+" ";
516 trajectory.push_back(trajectory_entry);
517
518 //and remove it and the extrema it dominates from the extrema vector
519 int count=0;
520 for(size_t i=0;i<extrema.size();i++){
521 if(!extrema[i]->is_included && extrema[i]->i>=this->i && extrema[i]->j<=this->j){
522 if(extrema[i]->IsMfE()) continue;
523 extrema[i]->SetIncluded(true);
524 extrema[i]->SetIneligible();
525 count++;
526 }
527 }
528
529 //print new front
530 if ( verbose >= 1 ) {
531 std::cout<<"Path to new front:\n";
532 std::cout<<FoldingPathToString(this->path)<<std::endl;
533 Cout("New Front\n");
534 std::cout<<front_structure+" "+Str(front_energy)<<std::endl;
535 }
536
537 if ( verbose >= 5 ) Cout(Node::PrintFront()+"\n");
538 //make all extrema eligible again.
539 if(Node::print_front_trajectory) {
540 std::vector<std::pair<int,int> > ext= std::vector<std::pair<int,int> >();
541 for(size_t k=0;k<front_extrema.size();k++)ext.push_back(std::pair<int,int> (front_extrema[k]->i,front_extrema[k]->j));
542 Node::front_trajectory_ps+=PSFrontPlot(Node::sequence,ext);
543
544 // /*
545 Node::front_trajectory_ps=PSFrontPlot(Node::sequence,ext);
546 Node::front_trajectory_ps+="end \n";
547 std::string filename="front_trajectory"+Str(front_count)+".ps";
548 std::ofstream outfile(filename.c_str());
549 outfile << Node::front_trajectory_ps;
550 outfile.close();
551 front_count++;
552 //*/
553
554 }
555 }
556
557
558 /**
559 * Extends the front by as many non-conflicting extrema as possible
560 * within the limits imposed by Node::energy_barrier.
561 */
562 //void
563 bool
ExtendFront()564 Node::ExtendFront()
565 {
566 bool extended= FindExtremum();
567 return extended;
568 }
569
570
571 /**
572 * Find the local extrema in the max_matching matrix and sort them
573 * comparing with Node::LessThan
574 */
575
576 extern "C" {
577 void export_fold_arrays(int **f5_p, int **c_p, int **fML_p, int **fM1_p,
578 int **indx_p, char **ptype_p);
579 }
580 void
FindLocalExtrema()581 Node::FindLocalExtrema()
582 {
583 int *f5, *c, *fML, *fM1, *indx; char *ptype;
584 export_fold_arrays(&f5,&c,&fML,&fM1,&indx,&ptype);
585 if(verbose>=2) Cout("#extrema: "+Str((int)extrema.size())+"\n");
586 int n = matrix_size;
587 for (int i=1; i<=n-TURN-1; i++) {
588 for (int j=i+TURN+1; j<=n; j++) {
589 double val = c[indx[j]+i];
590 //i and j pair AND no lonely pair
591 if(val < INF) {
592 //Node node=Node(i, j,val);
593 extrema.push_back(new Node(i, j,val));
594 if(verbose>=2) Cout(Print(extrema.back())+" "+Str(val)+"\n");
595 }
596 }
597 }
598
599 double energy = f5[Node::sequence.size()];
600 // (1,n) is in general not a pair. You can't include it in the list
601 //Node node2=Node(1,n,energy);
602 extrema.push_back(new Node(1,n,energy));//& node2);
603 if(verbose>=3) Cout("#extrema: "+Str((int)Node::extrema.size())+"\n");
604 stable_sort(extrema.begin(), extrema.end(), Node::LessThan);
605 }
606
607
608 /**
609 * Counts the number of squares by which the Node increases the
610 * current front.
611 */
612 int
FrontIncrease()613 Node::FrontIncrease()
614 {
615 //WE COUNT substructures FROM 1, but front starts at front[0][0]
616 // std::cout<<"FrontIncrease\n";
617 int count = 0;
618 for (int i=this->i; i<=this->j; i++) {
619 for (int j=i; j<=this->j; j++) {
620 // std::cout<<"i: "+Str(i)+" j:"+Str(j)+"\n";
621 if ( !front[i-1][j-1] ) count++;
622 }
623 }
624 return count;
625 }
626
627
TrulyContainsFront()628 bool Node::TrulyContainsFront(){
629 for (int i=1; i<=Node::matrix_size; i++) {
630 if (front[i][this->j] ) return false;
631 }
632 for (int j=1; j<=Node::matrix_size; j++) {
633 if (front[this->i][j] ) return false;
634 }
635 return true;
636 }
637
638 /**
639 * Determines whether Node::front increases when this is added to
640 * it. Used to filter extrema, that will not extend the front.
641 */
642 bool
FrontIncreases()643 Node::FrontIncreases()
644 {
645 return ( FrontIncrease() > 0 );
646 }
647
648 /**
649 * Comparison for STL's sort. Comparison criteria: Distance from main
650 * diagonal, distance to left/right edge of the diagonal a Node is on,
651 * 5' before 3'.
652 */
653 inline bool
LessThan(Node * n1,Node * n2)654 Node::LessThan(Node* n1,Node* n2)
655 {
656 // sort left/right edge of diagonal first
657 if ( n1->j-n1->i == n2->j-n2->i ) {
658 // Distance to left/right edge of diagonal
659 int edge_dist = std::min(n1->i-1,Node::matrix_size+1-n1->j);
660 int edge_dist2 = std::min(n2->i-1,Node::matrix_size+1-n2->j);
661
662 if ( edge_dist == edge_dist2 ) {
663 // Take the point on the 5' side. That will be this, if edge_dist==i
664 return ( edge_dist == n1->i-1 );
665 }
666 else return ( edge_dist < edge_dist2 );
667 }
668 else return ( (n1->j-n1->i) < (n2->j-n2->i) );
669 }
670
671
672 /**
673 * Initialize the front
674 */
675 void
NewFront()676 Node::NewFront()
677 {
678 for (int i=0; i<matrix_size+1; i++) Node::front.push_back(std::vector<bool>(matrix_size+1, false));
679
680 Node::front_energy=EvalEnergy(Node::sequence.substr(0,Node::front_structure.size()),Node::front_structure);
681
682 }
683
684 /**
685 * Print matrix coordinates.
686 */
687 inline std::string
Print(Node * n)688 Node::Print(Node* n)
689 {
690 return ("(" + Str(n->i) + "," + Str(n->j) + ")");
691 }
692
693 /**
694 * Print a vector of Nodes.
695 */
696 std::string
Print(std::vector<Node * > nodes)697 Node::Print(std::vector<Node*> nodes)
698 {
699 std::string s;
700 for (unsigned int i=0; i<nodes.size(); i++) s += Print(nodes[i]);
701
702 return (s);
703 }
704
705 /**
706 * Print the front as a n+1xn matrix with 1's indicating squares that
707 * are covered by the front.
708 */
709 std::string
PrintFront()710 Node::PrintFront()
711 {
712 std::string s;
713 for (size_t i=0; i<front.size(); i++) {
714 std::string nix(i, ' ');
715
716 if ( i > 0 )s += nix;
717 for (size_t j=i; j<front[i].size()-1; j++)
718 s += Str(front[i][j]);
719 s += "\n";
720 }
721
722 return (s);
723 }
724
725 /**
726 * Removes all elements from Node::extrema that do not extend the
727 * energy front.
728 */
729
730 void
PruneExtrema()731 Node::PruneExtrema()
732 {
733 std::vector<Node*> new_extrema=std::vector<Node*>();
734 for(size_t i=0;i<extrema.size();i++){
735 if(extrema[i]->FrontIncreases()){
736 new_extrema.push_back(extrema[i]);
737 }
738 }
739 extrema=new_extrema;
740
741 }
742
Contains(Node * n)743 bool Node::Contains(Node* n) { return ((i <= n->i) && (j >= n->j));}
744
745
ConstructFrontResolution(std::string & structure,std::vector<std::pair<int,int>> & basepairs,std::pair<int,int> bp_to_add)746 void Node::ConstructFrontResolution(std::string & structure,std::vector<std::pair<int,int> > & basepairs, std::pair<int,int> bp_to_add){
747 bool debug_pt=false;
748 if(debug_pt) {
749 Cout("ConstructFrontResolution"); //std::cout<<std::endl;
750 std::cout<<"Basepairs "+PrintBasePairList(basepairs);
751 std::string old_pair_table=PrintPairTable();
752 Cout("old_pair_table:"+old_pair_table);
753 }
754 for(std::vector<std::pair<int,int> >::iterator it2=basepairs.begin();it2!=basepairs.end();it2++) {
755
756 if(debug_pt) Cout("Conflict between "+PrintBasePair(*it2)+" and "+PrintBasePair(bp_to_add)+"?\n");
757 if( BasePairConflict()(*it2, bp_to_add)){
758 if(debug_pt) Cout("Remove "+PrintBasePair(*it2)+"\n");//std::cout<<std::endl;
759 int first=it2->first;
760 int second=it2->second;
761 pair_table[first]=0;
762 pair_table[second]=0;
763 structure[first-1]='.';
764 structure[second-1]='.';
765 // bp_removed.push_back(*it2);
766 }
767 }
768
769
770 std::vector<std::pair<int,int> >::iterator it=remove_if(basepairs.begin(),basepairs.end(),std::bind2nd(BasePairConflict(),bp_to_add));
771 basepairs.erase(it,basepairs.end());
772 if(debug_pt) Cout(PrintBasePairList(basepairs)+"\n");
773 if(debug_pt) Cout(PrintPairTable());
774
775 //add the basepair in stack, which now does not conflict with the front any more.
776 basepairs.push_back(bp_to_add);
777 if(debug_pt) Cout("Add "+PrintBasePair(bp_to_add)+"\n");
778 int first=bp_to_add.first;
779 int second=bp_to_add.second;
780 pair_table[first]=second;
781 pair_table[second]=first;
782 structure[first-1]='(';
783 structure[second-1]=')';
784 if(debug_pt) Cout(PrintPairTable());
785 if(debug_pt) Cout("new_pair_table:"+PrintPairTable());
786 }
787
788
BacktrackFront(std::pair<double,std::string> & ret,std::string node_substructure)789 void Node::BacktrackFront(std::pair<double,std::string> & ret,std::string node_substructure)
790 {
791
792 bool debug_pt=false;//true;//false;
793 if(debug_pt) Cout("BacktrackFront with PairTable "+PrintPairTable());
794 if(debug_pt) Cout("node_substructure\n "+node_substructure+"\n");
795 std::string front_structure=Node::front_structure;
796 //Cout("have front_structure "+front_structure+" sequence "+sequence);
797 //initialized later when front_structure with resolved conflicts is evaluated
798 double front_energy;
799 //create a substructure s
800 //1.For a given Node n, add as much of the suboptimal structure s(n) of n
801 //2.Add more of it, if it improves E(s)
802 //3.s=s+s(n)
803 std::vector<std::pair<int,int> >basepairs=MakeBasePairList1(front_structure);
804 std::vector<std::pair<int,int> > bp_node=MakeBasePairList1(node_substructure);
805 //take maximal possible substructure of node_substructure that does not conflict with front_structure
806 std::vector<std::pair<int,int> >bp_node_conflict=std::vector<std::pair<int,int> >();
807 for(size_t i=0;i<bp_node.size();i++){
808 //do nothing if the base pair is already part of basepairs
809 if(find(basepairs.begin(),basepairs.end(),bp_node[i])!=basepairs.end()) continue;
810 if(!Conflict(basepairs,bp_node[i])) {
811 //if (debug) {
812 if(debug_pt) Cout("added bp "+PrintBasePair(bp_node[i])+"\n");//}
813 basepairs.push_back(bp_node[i]);
814 int first=bp_node[i].first;
815 int second=bp_node[i].second;
816 pair_table[first]=second;
817 pair_table[second]=first;
818 //manually change front_structure to circumvent a call to BasePairListToStructure1
819 front_structure[first-1]='(';
820 front_structure[second-1]=')';
821 }
822 else bp_node_conflict.push_back(bp_node[i]);
823 }
824 if(debug_pt) Cout("PairTable after adding nc bp \n"+PrintPairTable());
825
826
827 //now basepairs is the maximal substructure without conflicts.
828 //current substructure for the front
829 //front_structure=BasePairListToStructure1(Node::sequence.size(),basepairs);
830 front_energy=FastEvaluate();
831 if(debug_pt) Cout("Front energy after adding non-conflicting base pairs is "+Str(front_energy)+" with PairTable \n"+PrintPairTable());
832 if(verbose >= 3) Cout("front after adding non conflicting base pairs\n"+front_structure+":"+Str(front_energy)+"\n");
833
834 //now add the baspairs in added_node that conflict with the front (and remove those basepairs in the front that conflict with them).
835 //front after those basepairs in conflict with a basepair in added_note have been removed
836 std::vector<std::pair<int,int> > new_basepairs= std::vector<std::pair<int,int> >();
837 int min_stacksize=1;
838 std::vector<std::vector<std::pair<int,int> > > bp_node_conflict_stacks=std::vector<std::vector<std::pair<int,int> > >();
839 ConformationToStacks(bp_node_conflict_stacks,bp_node_conflict,min_stacksize);
840 if(bp_node_conflict_stacks.size()>0){
841 std::vector<std::pair<int,int> > bp_removed;
842 for(size_t i=0;i<bp_node_conflict_stacks.size();i++){
843 new_basepairs=basepairs;
844 std::vector<std::pair<int,int> > stack=bp_node_conflict_stacks[i];
845 //remove the basepairs in the stack that are already contained in the front
846 for(int j=stack.size()-1;j>=0;j--) {
847 if(ConformationHasPair(basepairs,stack[j])) stack.erase(stack.begin()+j);
848 }
849
850 std::vector<std::pair<double,std::string> > front_resolutions= std::vector<std::pair<double,std::string> >();
851 //std::vector<std::pair<double,std::vector<std::pair<int,int> > > > front_resolutions= std::vector<std::pair<double,std::vector<std::pair<int,int> > > >();
852
853 //front_resolutions.push_back(make_pair(front_energy,basepairs));
854 front_resolutions.push_back(make_pair(front_energy,front_structure));//basepairs));
855
856 //ensure that the base pair in front at first_idx_stack does not come from the node that stack is constructued from.
857 std::string resolved_structure=front_structure;
858 for(size_t j=0;j<stack.size();j++){
859 //last_insertion=j;
860 //double resolution_energy=
861 ConstructFrontResolution(resolved_structure,new_basepairs,stack[j]);
862 front_resolutions.push_back(make_pair(FastEvaluate(),resolved_structure));
863 //front_resolutions.push_back(make_pair(resolution_energy,BasePairListToStructure1(Node::transcribed,new_basepairs)));
864 }
865 //revert changes in pair_table: remove stack and add those elts of basepairs back that were removed because they conflicted with elements in stack
866 MakePairTable(const_cast<char*>(front_structure.substr(0,Node::transcribed).c_str()));
867 if(debug_pt) std::cout<<"Pairtable after reverting changes of outside in "+PrintPairTable();
868 //add stack back to front as much as possible
869 new_basepairs=basepairs;
870 resolved_structure=front_structure;
871
872 for(int j=stack.size()-1;j>=0;j--){
873 ConstructFrontResolution(resolved_structure,new_basepairs,stack[j]);
874 front_resolutions.push_back(make_pair(FastEvaluate(),resolved_structure));
875 }
876
877 //now accept the best solution in front_resolutions and update structure, energy and bp
878 front_energy=INF;
879 front_structure=std::string(Node::transcribed,'.');
880 for(std::vector<std::pair<double,std::string> >::iterator it=front_resolutions.begin();it!=front_resolutions.end();it++){
881 if(it->first<front_energy){
882 front_energy=it->first;
883 front_structure=it->second;
884 }
885 }
886 basepairs=MakeBasePairList1(front_structure);
887 //sync pair_table with latest front unless this was the last iteration or the latest front is already in pair_table
888 if(i<bp_node_conflict_stacks.size()-1){
889 MakePairTable(const_cast<char*>(front_structure.substr(0,Node::transcribed).c_str()));
890 if(debug_pt) std::cout<<"Pairtable after syncing with front resolution "+PrintPairTable();
891 if(debug_pt) std::cout<<"Basepairs "+PrintBasePairList(basepairs);
892 }
893 //repeat for all conflict stacks
894 }
895 }
896 if(debug_pt) Cout("BacktrackFront returns "+front_structure+" "+Str(front_energy)+"\n");
897 ret= make_pair(front_energy,front_structure);
898 }
899
900
901
BacktrackNode(Node * n)902 std::string Node::BacktrackNode(Node* n){
903 if(n->IsMfE()) {
904 Node::CalculateMfe();
905 return Node::mfe_structure;
906 }
907 else {
908 char *s = backtrack_fold_from_pair(const_cast<char*>(Node::sequence.c_str()),
909 n->i, n->j);
910 std::string ss(s);
911 free(s);
912 return (ss.substr(0,Node::transcribed));
913 }
914 }
915
CalculateMfe()916 void Node::CalculateMfe(){
917
918 char *sequence = new char[Node::matrix_size+1];
919 char *structure = new char[Node::matrix_size+1];
920 strcpy(sequence, Node::sequence.c_str());
921 // if(fold_constrained) strcpy(structure,Node::constraint_string.c_str());
922 Node::mfe = fold(sequence, structure);
923 Node::mfe_structure=std::string(structure);
924 // clean up memory
925 delete[] sequence;
926 delete[] structure;
927 }
928
IsMfE()929 bool Node::IsMfE(){
930 if(this->i==1 && this->j==(int)Node::sequence.size()) return true;
931 else return false;
932
933 }
934
935
936
Transcribe()937 void Node::Transcribe(){
938 Node::transcribed++;
939 if(Node::transcribed==Node::matrix_size ) Node::SetAllEligible();
940 if(verbose>=2) Cout("Transcribed to "+Str(Node::transcribed)+"\n");
941 S[0]=Node::transcribed;
942 S1[0]=Node::transcribed;
943 pair_table[0]=Node::transcribed;
944 Node::front_structure+=".";
945 MakePairTableFromFrontStructure();
946 Node::front_energy=Evaluate(Node::front_structure);
947 }
948
IsTranscribed()949 bool Node::IsTranscribed(){
950 return j<=Node::transcribed;
951
952 }
953
954 /**
955 TO DO:CAN IT BE THAT STRUCTURES SMALLR THAN MFE MAY STILL CONTRIBUTE TO FRONT WHILE THE MFE CANT
956 */
957
StopCriterionReached()958 bool Node::StopCriterionReached(){
959 if(front_energy-MIN_ENERGY_DIFF<mfe){
960 if(verbose>=1) Cout("StopCriterion reached: MfE is part of the front\n");
961 return true;
962 }
963 return false;
964 }
965
MinimalExtensionCost()966 double Node::MinimalExtensionCost(){
967 if(extension_cost.size()==0) return 0.0;
968 else return *min_element(Node::extension_cost.begin(),Node::extension_cost.end());
969 }
970
RaiseEnergyBarrier()971 void Node::RaiseEnergyBarrier(){
972 Node::energy_barrier += 1.0;
973 double minimial_extension_cost=MinimalExtensionCost();
974 if(Node::energy_barrier < minimial_extension_cost && minimial_extension_cost>=0.0) Node::energy_barrier = ceil(minimial_extension_cost);
975 Node * n;
976 int count=0;
977 for(size_t i=0;i<extrema.size();i++){
978 n=extrema[i];
979 if(!n->is_included && !n->is_eligible && extension_cost[i]<=Node::energy_barrier){
980 n->is_eligible=true;
981 count++;
982 }
983 }
984 }
985
986
IncreaseTime(double inc)987 void Node::IncreaseTime(double inc){
988 double oldtime=Node::time;
989 Node::time+=inc;
990 if(verbose>=1) std::cout<<"Increase time from "<<oldtime<<" to "<<Node::time<<std::endl;
991 }
992
SetEnergyBarrier()993 void Node::SetEnergyBarrier(){
994 //time in seconds it takes to transcribe one base
995 double t_transcribe=1.0/Node::transcription_rate;
996 //number of bases transribed since start of algorithm
997 double transcribed=Node::transcribed-Node::initially_transcribed;
998 //time when next base is transcribed
999 double new_time=(transcribed+1)*t_transcribe;
1000 double delta_time=new_time-Node::time;
1001 /*
1002 Cout("t_transcribe :"+Str(t_transcribe ));
1003 Cout("transcribed :"+Str(transcribed ));
1004 Cout("new_time :"+Str(new_time ));
1005 std::cout<<"SetEnergyBarrier for delta time "<<delta_time<<std::endl;
1006 */
1007 Node::SetEnergyBarrier(delta_time);
1008 }
1009
SetEnergyBarrier(double delta_time)1010 void Node::SetEnergyBarrier(double delta_time){
1011 Node::energy_barrier=Node::TimeToDeltaG(delta_time);
1012 //Cout("delta_time: "+Str(delta_time));
1013 //Cout("energy barrier: "+Str(Node::energy_barrier)+"\n");
1014 }
1015
1016
AdvanceTimeToNextTranscription()1017 void Node::AdvanceTimeToNextTranscription(){
1018 double t_transcribe=(double)1.0/(double)Node::transcription_rate;
1019 double transcribed=Node::transcribed-Node::initially_transcribed;
1020 double new_time=(transcribed+1)*t_transcribe;
1021 double delta_time=new_time-Node::time;
1022 /*
1023 Cout("t_transcribe: "+Str(t_transcribe));
1024 Cout("transcribed: "+Str(transcribed));
1025 Cout("new_time: "+Str(new_time));
1026 Cout("delta_time: "+Str(delta_time));
1027 */
1028 if(delta_time<EPSILON) delta_time+=t_transcribe;
1029 // Cout("AdvanceTimeToNextTranscription:"+Str(delta_time));
1030 Node::IncreaseTime(delta_time);
1031 Node::Transcribe();
1032 }
1033
1034
Evaluate(std::string struc)1035 double Node::Evaluate(std::string struc){
1036 struc=struc.substr(0,Node::transcribed);
1037 std::string sequence=Node::sequence.substr(0,Node::transcribed);
1038 //Cout("Evaluate:\n"+sequence+"\n"+struc+"\n");
1039 return EvalEnergy(sequence,struc);
1040 }
1041
1042
FastEvaluate()1043 double Node::FastEvaluate(){
1044 std::string sequence=Node::sequence.substr(0,Node::transcribed);
1045 // Cout("Evaluate:\n"+sequence+"\n"+struc+"\n");
1046 return FastEvalEnergy(sequence);
1047
1048 }
CleanUpNode(void)1049 void Node::CleanUpNode(void)
1050 {
1051 // free allocated memory of extrema array
1052 for (std::vector<Node*>::iterator it = Node::extrema.begin();
1053 it != Node::extrema.end();
1054 it++)
1055 delete *it;
1056
1057 delete [] pair_table;
1058 delete [] S;
1059 delete [] S1;
1060 }
1061
1062
IsEligible()1063 bool Node::IsEligible(){
1064 return is_eligible;
1065 }
1066
1067
IsIncluded()1068 bool Node::IsIncluded(){
1069 return is_included;
1070 }
1071
SetIncluded(bool val)1072 void Node::SetIncluded(bool val){
1073 is_included=val;
1074 }
1075
1076
SetAllEligible()1077 void Node::SetAllEligible(){
1078 Node::ineligible_count=0;
1079 for(size_t i=0;i<extrema.size();i++){
1080 Node * n=extrema[i];
1081 if(!n->is_included) n->is_eligible=true;
1082 else Node::ineligible_count++;
1083 }
1084 }
1085
HaveOverlap(const std::vector<std::pair<int,int>> & bp_removed,const std::pair<int,int> & extent)1086 bool Node::HaveOverlap(const std::vector<std::pair<int,int> >& bp_removed,const std::pair<int,int> & extent){
1087
1088 for(size_t i=0;i<bp_removed.size();i++){
1089 if(extent.second>=bp_removed[i].first && extent.first<=bp_removed[i].second ) return true;
1090 }
1091 return false;
1092
1093 }
1094
Reeligify(std::string old_structure,std::string new_structure)1095 void Node::Reeligify(std::string old_structure,std::string new_structure){
1096 int count=0;
1097 std::vector<std::pair<int,int> >bp_old=MakeBasePairList1(old_structure);
1098 std::vector<std::pair<int,int> >bp_new=MakeBasePairList1(new_structure);
1099
1100 std::vector<std::pair<int,int> > bp_removed=std::vector<std::pair<int,int> > ();
1101 bp_removed.push_back(std::pair<int,int>(this->i,this->j));
1102 for(size_t i=0;i<bp_old.size();i++){
1103 if(find(bp_new.begin(),bp_new.end(),bp_old[i])==bp_new.end()) bp_removed.push_back(bp_old[i]);
1104 }
1105 Node * n;
1106 for(size_t i=0;i<extrema.size();i++){
1107 n=extrema[i];
1108 if(n->is_included || n->IsEligible()) continue;
1109 std::pair<int,int> extent(n->i,n->j);
1110 if(HaveOverlap(bp_removed,extent)) {
1111 n->is_eligible=true;
1112 count++;
1113 }
1114 }
1115 }
1116
SetIneligible()1117 void Node::SetIneligible(){
1118 is_eligible=false;
1119 Node::ineligible_count++;
1120 }
1121
MakePairTableFromFrontStructure()1122 void Node::MakePairTableFromFrontStructure(){
1123 // std::string old_pair_table=PrintPairTable();
1124 MakePairTable(const_cast<char*>(Node::front_structure.substr(0,Node::transcribed).c_str()));
1125 // pair_table[0]=Node::transcribed;
1126 // std::cout<<"MakePairTableFromFrontStructure "+Node::front_structure<<std::endl;
1127 //std::cout<<"old:"+old_pair_table<<std::endl;
1128 //std::cout<<"new:"+PrintPairTable()<<std::endl;
1129 }
1130
AllTranscribed()1131 bool Node::AllTranscribed(){
1132 return (Node::transcribed==Node::matrix_size);
1133 }
1134