1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <time.h>
5 #include <limits.h>
6 #include <string.h>
7 
8 #include <string>
9 #include <set>
10 #include <map>
11 #include <vector>
12 #include <algorithm>
13 #include <memory>
14 
15 extern "C" {
16   #include "fold.h"
17   #include "findpath.h"
18   #include "RNAlocmin_cmdline.h"
19   #include "utils.h"
20   #include "read_epars.h"
21   #include "fold_vars.h"
22   #include "params.h"
23   #include "move_set_inside.h"
24 }
25 
26 #include "findpath_pk.h"
27 
28 #include "hash_util.h"
29 #include "globals.h"
30 #include "RNAlocmin.h"
31 #include "flood.h"
32 #include "RNAlocmin.h"
33 #include "move_set_pk.h"
34 #include "neighbourhood.h"
35 
36 #include "barrier_tree.h"
37 
38 using namespace std;
39 
40 // dirty hack to "allegiance" stuff
41  //to be filled by 1st run
42 map<struct_en, struct_en, comps_entries> str_to_LM;
43 vector<struct_en> structures;
44  // to be filled after
45 map<struct_en, int, comps_entries> LM_to_LMnum;
46 static bool allegiance = false;
47 
48 
isSeq(char * p)49 inline bool isSeq(char *p)
50 {
51   // check first two chars - should be enough
52   char seq[]="ACGTUactgu";
53   int ok = 0;
54   for (unsigned int i=0; i<strlen(seq); i++) {
55     if (p[0]==seq[i]) {
56       ok++;
57     }
58     if (p[1]==seq[i]) {
59       ok++;
60     }
61     if (ok==2) break;
62   }
63 
64   return (ok==2);
65 }
66 
isStruct(char * p)67 inline bool isStruct(char *p)
68 {
69   // check first two chars - should be enough
70   if ((p[0]=='.' || p[0]=='(') && (p[1]=='.' || p[1]=='(' || p[1]=='[')) return true;
71   else return false;
72 }
73 
isEnergy(char * p,float & energy)74 inline bool isEnergy(char *p, float &energy)
75 {
76   if (sscanf(p, "%f", &energy) == 1) return true;
77   else return false;
78 }
79 
en_fltoi(float en)80 inline int en_fltoi(float en)
81 {
82   if (en < 0.0) return (int)(en*100 - 0.5);
83   else return (int)(en*100 + 0.5);
84 }
85 
86 struct barr_info { // info taken from barriers
87   int father;
88   int e_diff;
89   int bsize;
90   int fbsize;
91   // unused, unmodified
92   float fen;
93   int grad;
94   float feng;
95 
operator +barr_info96   barr_info &operator+(barr_info &second) {
97     bsize += second.bsize;
98     if (second.father!=father) father = -1;
99     fbsize += second.fbsize;
100     grad += second.grad;
101     return *this;
102   }
103 };
104 
105 // functions that are down in file ;-)
106 char *read_seq(char *seq_arg, char **name_out);
107 int move(unordered_map<struct_en, gw_struct, hash_fncts, hash_eq> &structs, map<struct_en, int, comps_entries> &output, set<struct_en, comps_entries> &output_shallow, SeqInfo &sqi, bool pure_output);
108 char *read_previous(char *previous, map<struct_en, int, comps_entries> &output);
109 char *read_barr(char *previous, map<struct_en, barr_info, comps_entries> &output);
110 
main(int argc,char ** argv)111 int main(int argc, char **argv)
112 {
113   clock_t clck1 = clock();
114 
115   // testing
116   //test();
117   //exit(EXIT_SUCCESS);
118 
119   // parse arguments
120   gengetopt_args_info args_info;
121   if (cmdline_parser(argc, argv, &args_info) != 0) {
122     fprintf(stderr, "ERROR: argument parsing problem.\n");
123     exit(EXIT_FAILURE);
124   }
125 
126   //check for bad arguments
127   if (Opt.Init(args_info) !=0 ) {
128     fprintf(stderr, "ERROR: one or more bad arguments, exiting...\n");
129     exit(EXIT_FAILURE);
130   }
131 
132   // adjust temperature
133   if (args_info.temp_given) {
134     temperature = args_info.temp_arg;
135   }
136 
137   // read parameter file
138   if (args_info.paramFile_given) {
139     read_parameter_file(args_info.paramFile_arg);
140   }
141 
142   // dangle setup
143   if (args_info.dangles_given) {
144     dangles = args_info.dangles_arg;
145     model_detailsT  md;
146     set_model_details(&md);
147     md.dangles = dangles;
148   }
149 
150   // adjust dependencies:
151   if (args_info.barr_name_given) {args_info.bartree_flag = true;}
152   if (args_info.rates_file_given) {args_info.rates_flag = true;}
153 
154   // degeneracy setup
155   if (args_info.degeneracy_off_flag) {
156     degeneracy_handling(0);
157     Neighborhood::SwitchOffDegen();
158   }
159 
160   //try_pk();
161   //exit(0);
162 
163   // keep track of structures & statistics
164   map<struct_en, int, comps_entries> output; // structures plus energies to output (+ how many hits has this minima)
165   map<struct_en, barr_info, comps_entries> output_barr; // structures plus energies to output (+ barr_info)
166   set<struct_en, comps_entries> output_shallow; // shallow structures (if minh specified)
167   char *seq = NULL;
168   char *name = NULL;
169 
170   // read previous LM or/and sequence
171   if (args_info.fix_barriers_given) {
172     seq = read_barr(args_info.fix_barriers_arg, output_barr);
173   } else {
174     if (args_info.previous_given) {
175       seq = read_previous(args_info.previous_arg, output);
176     } else {
177       seq = read_seq(args_info.seq_arg, &name);
178     }
179   }
180 
181   if (args_info.verbose_lvl_arg>1) fprintf(stderr, "%s\n", seq);
182 
183   seq_len = strlen(seq);
184 
185   // allegiance:
186   FILE *alleg = NULL;
187   if (args_info.allegiance_given) {
188     alleg = fopen(args_info.allegiance_arg, "w");
189     if (alleg) {
190       allegiance = true;
191       fprintf(alleg, "       %s\n", seq);
192     }
193   }
194 
195   // time?
196   if (args_info.verbose_lvl_arg>0) {
197     fprintf(stderr, "Time to initialize: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
198     clck1 = clock();
199   }
200 
201   // ########################## begin main loop - reads structures from RNAsubopt and process them
202   int count = max(output.size(), output_barr.size());  //num of local minima
203   SeqInfo sqi;
204   sqi.Init(seq);
205 
206   int not_canonical = 0;
207 
208   if (!args_info.fix_barriers_given) {
209 
210     // if direct output:
211     if (args_info.just_output_flag) printf("%s\n", seq);
212 
213     // hash
214     unordered_map<struct_en, gw_struct, hash_fncts, hash_eq> structs (HASHSIZE); // structures to minima map
215     while ((!args_info.find_num_given || count != args_info.find_num_arg) && !args_info.just_read_flag) {
216       int res = move(structs, output, output_shallow, sqi, args_info.just_output_flag);
217 
218       // print out
219       //if (Opt.verbose_lvl>0 && num_moves%10000==0) fprintf(stderr, "processed %d, minima %d, time %f secs.\n", num_moves, count, (clock()-clck1)/(double)CLOCKS_PER_SEC);
220       if (Opt.verbose_lvl>0 && num_moves%(Opt.pknots?1000:10000)==0 && num_moves!=0) fprintf(stderr, "processed %d, minima %d, time %f secs.\n", num_moves, (int)output.size(), (clock()-clck1)/(double)CLOCKS_PER_SEC);
221 
222       // evaluate results
223       if (res==0)   continue; // same structure has been processed already
224       if (res==-1)  break; // error or end
225       if (res==-2)  not_canonical++;
226       if (res==1)   count=output.size();
227     }
228 
229     if (args_info.just_output_flag) {
230       // end it
231 
232       // release resources
233       if (seq!=NULL) free(seq);
234       if (name!=NULL) free(name);
235       cmdline_parser_free(&args_info);
236       freeP();
237       free_arrays();
238 
239       // time?
240       if (args_info.verbose_lvl_arg>0) {
241         fprintf(stderr, "Printing results + freeing args: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
242         clck1 = clock();
243       }
244       return 0;
245     }
246 
247 
248     // ########################## end main loop - reads structures from RNAsubopt and process them
249 
250     //int num_of_structures = hash_size();
251     if (args_info.verbose_lvl_arg>0) print_stats(structs);
252     add_stats(structs, output);
253 
254     // time?
255     if (args_info.verbose_lvl_arg>0) {
256       fprintf(stderr, "Main loop (deepest descent from RNAsubopt): %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
257       clck1 = clock();
258     }
259 
260     // vectors for rates computation
261     int num = ((count > args_info.min_num_arg && args_info.min_num_arg!=0) ? args_info.min_num_arg : count);
262     vector<string> output_str;
263     output_str.resize(num);
264     vector<struct_en> output_he;
265     struct_en h;
266     h.structure = NULL;
267     output_he.resize(num, h);
268     vector<int> output_en;
269     output_en.resize(num);
270     vector<int> output_num;
271     output_num.resize(num);
272 
273     // threshold for flooding
274     int threshold;
275 
276     int i=0;
277     int ii=0;
278     for (map<struct_en, int, comps_entries>::iterator it=output.begin(); it!=output.end(); it++) {
279       ii++;
280       // if not enough minima
281       if (i<num) {
282         // first check if the output is not shallow
283         if (Opt.minh>0) {
284           int saddle;
285           struct_en *escape = flood(it->first, sqi, saddle, Opt.minh, args_info.pseudoknots_flag, !args_info.minh_lite_flag);
286 
287           if (args_info.verbose_lvl_arg>0 && ii%100 == 0) {
288             fprintf(stderr, "non-shallow remained: %d / %d; time: %.2f secs.\n", i, ii, (clock()-clck1)/(double)CLOCKS_PER_SEC);
289           }
290 
291           // shallow
292           if (escape) {
293             if (args_info.verbose_lvl_arg>1) {
294               fprintf(stderr, "shallow: %s %6.2f (saddle: %s %6.2f)\n", pt_to_str_pk(it->first.structure).c_str(), it->first.energy/100.0, pt_to_str_pk(escape->structure).c_str(), escape->energy/100.0);
295             }
296             free(escape->structure);
297             free(escape);
298             free(it->first.structure);
299             continue;
300           }
301         }
302 
303         if (args_info.verbose_lvl_arg > 2) fprintf(stderr, "%4d %s %6.2f %6d\n", i+1, pt_to_str_pk(it->first.structure).c_str(), it->first.energy/100.0, it->second);
304         output_str[i]=pt_to_str_pk(it->first.structure);
305         output_num[i]=it->second;
306         output_he[i]=it->first;
307         output_en[i]=it->first.energy;
308         i++;
309 
310         // allegiance:
311         if (allegiance) LM_to_LMnum[it->first] = i;
312 
313       } else { // we have enough minima
314         free(it->first.structure);
315       }
316     }
317     output.clear();
318 
319     // allegiance:
320     if (allegiance) {
321       for (int i=0; i<(int)structures.size(); i++) {
322         fprintf(alleg, "%6d %s %6.2f %6d\n", i+1, pt_to_str_pk(structures[i].structure).c_str(), structures[i].energy/100.0, LM_to_LMnum[str_to_LM[structures[i]]]);
323         //free(structures[i].structure);
324       }
325       structures.clear();
326       LM_to_LMnum.clear();
327       str_to_LM.clear();
328     }
329 
330     // erase possible NULL elements...
331     /*if (args_info.noSort_flag) {
332       for (int j=output_he.size()-1; j>=0; j--) {
333         if (output_he[j].structure==NULL) {
334           output_he.erase(output_he.begin()+j);
335           output_str.erase(output_str.begin()+j);
336           output_en.erase(output_en.begin()+j);
337           output_num.erase(output_num.begin()+j);
338         }
339       }
340     } else {*/
341       output_he.resize(i);
342       output_str.resize(i);
343       output_en.resize(i);
344       output_num.resize(i);
345     //}
346 
347     // time?
348     if (Opt.minh>0 && args_info.verbose_lvl_arg>0) {
349       fprintf(stderr, "Discarding shallow minima: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
350       clck1 = clock();
351     }
352 
353     // array of energy barriers
354     float *energy_barr = NULL;
355     bool *findpath_barr = NULL;
356 
357     // find saddles - fill energy barriers
358     if (args_info.rates_flag || args_info.bartree_flag || args_info.barrier_file_given) {
359       // threshold for flooding
360       vector<int> tmp = output_num;
361       sort(tmp.begin(), tmp.end());
362       int thr = num*args_info.floodPortion_arg;
363       thr--;
364       threshold = (thr<0 ? 0 : tmp[thr]);
365 
366       // nodes
367       std::unique_ptr<nodeT []> nodes(new nodeT[num]);
368       energy_barr = (float*) malloc(num*num*sizeof(float));
369       for (int i=0; i<num*num; i++) energy_barr[i]=1e10;
370       findpath_barr = (bool*) malloc(num*num*sizeof(bool));
371       for (int i=0; i<num*num; i++) findpath_barr[i]=false;
372 
373       // fill nodes
374       for (int i=0; i<num; i++) {
375         nodes[i].father = -1;
376         nodes[i].height = output_en[i]/100.0;
377         nodes[i].label = NULL;
378         nodes[i].color = 0.0;
379         nodes[i].saddle_height = 1e10;
380       }
381 
382       int flooded = 0;
383       // init union-findset
384       init_union(num);
385       // first try to flood the highest bins
386       for (int i=num-1; i>=0; i--) {
387         // flood only if low number of walks ended there
388         if (output_num[i]<=threshold && Opt.floodMax>0) {
389           //copy_arr(Enc.pt, output_he[i].structure);
390           if (args_info.verbose_lvl_arg>2) fprintf(stderr,   "flooding  (%3d): %s %.2f\n", i+1, output_str[i].c_str(), output_he[i].energy/100.0);
391 
392           int saddle;
393           struct_en *he = flood(output_he[i], sqi, saddle, Opt.minh, args_info.pseudoknots_flag);
394 
395           // print info
396           if (args_info.verbose_lvl_arg>1) {
397             if (he) {
398               fprintf(stderr, "below     (%3d): %s %.2f\n"
399                               "en: %7.2f  is: %s %.2f\n", i,
400                       output_str[i].c_str(), output_he[i].energy/100.0, saddle/100.0,
401                       pt_to_str_pk(he->structure).c_str(), he->energy/100.0);
402             } else {
403               fprintf(stderr, "unsucesful(%3d): %s %.2f\n", i,
404                       output_str[i].c_str(), output_he[i].energy/100.0);
405             }
406           }
407           // if flood succesfull - walk down to find father minima
408           if (he) {
409             // walk down
410             move_set(*he, sqi);
411 
412             // now check if we have the minimum already (hopefuly yes ;-) )
413             vector<struct_en>::iterator it;
414             it = lower_bound(output_he.begin(), output_he.end(), *he, compf_entries2);
415 
416             if (args_info.verbose_lvl_arg>1) fprintf(stderr, "minimum: %s %.2f\n", pt_to_str_pk(he->structure).c_str(), he->energy/100.0);
417             // we dont need it again
418 
419             hash_eq heq;
420             if (it!=output_he.end() && heq(&*it, he)) {
421               int pos = (int)(it-output_he.begin());
422               if (args_info.verbose_lvl_arg>1) fprintf(stderr, "found father at pos: %d\n", pos);
423 
424               flooded++;
425               energy_barr[i*num+pos] = energy_barr[pos*num+i] = saddle/100.0;
426 
427               // union set
428               //fprintf(stderr, "join: %d %d\n", min(i, pos), max(i, pos));
429               union_set(min(i, pos), max(i, pos));
430             }
431             free(he->structure);
432             free(he);
433           }
434         }
435       }
436 
437       // time?
438       if (args_info.verbose_lvl_arg>0) {
439         fprintf(stderr, "Flood(%d(%d)/%d): %.2f secs.\n", flooded, (int)(num*args_info.floodPortion_arg), num, (clock() - clck1)/(double)CLOCKS_PER_SEC);
440         clck1 = clock();
441       }
442 
443       // for others, just do findpath
444       int findpath = 0;
445       set<int> to_findpath;
446       for (int i=0; i<num; i++) to_findpath.insert(find(i));
447 
448       if (args_info.verbose_lvl_arg>1) {
449         fprintf(stderr, "Minima left to findpath (their father = -1): ");
450         for (set<int>::iterator it=to_findpath.begin(); it!=to_findpath.end(); it++) {
451           fprintf(stderr, "%d ", *it);
452         }
453         fprintf(stderr, "\n");
454       }
455 
456       // findpath:
457       for (set<int>::iterator it=to_findpath.begin(); it!=to_findpath.end(); it++) {
458         set<int>::iterator it2=it;
459         it2++;
460         for (; it2!=to_findpath.end(); it2++) {
461           if (args_info.pseudoknots_flag) energy_barr[(*it2)*num+(*it)] = energy_barr[(*it)*num+(*it2)] = find_saddle_pk(seq, output_str[*it].c_str(), output_str[*it2].c_str(), args_info.depth_arg)/100.0;
462           else energy_barr[(*it2)*num+(*it)] = energy_barr[(*it)*num+(*it2)] = find_saddle(seq, output_str[*it].c_str(), output_str[*it2].c_str(), args_info.depth_arg)/100.0;
463           findpath_barr[(*it2)*num+(*it)] = findpath_barr[(*it)*num+(*it2)] = true;
464           if (args_info.verbose_lvl_arg>0 && findpath %10000==0){
465             fprintf(stderr, "Findpath:%7d/%7d\n", findpath, (int)(to_findpath.size()*(to_findpath.size()-1)/2));
466           }
467           findpath++;
468         }
469       }
470 
471       // debug output
472       if (args_info.verbose_lvl_arg>2) {
473         fprintf(stderr, "Energy barriers:\n");
474         //bool symmetric = true;
475         for (int i=0; i<num; i++) {
476           for (int j=0; j<num; j++) {
477             fprintf(stderr, "%8.2g%c ", energy_barr[i*num+j], (findpath_barr[i*num+j]?'~':' '));
478             //if (energy_barr[i*num+j] != energy_barr[j*num+i]) symmetric = false;
479           }
480           fprintf(stderr, "\n");
481         }
482         fprintf(stderr, "\n");
483         //fprintf(stderr, "%s", (symmetric? "":"non-symmetric energy barriers!!\n"));
484       }
485 
486       // time?
487       if (args_info.verbose_lvl_arg>0) {
488         fprintf(stderr, "Findpath(%d/%d): %.2f secs.\n", findpath, num*(num-1)/2, (clock() - clck1)/(double)CLOCKS_PER_SEC);
489         clck1 = clock();
490       }
491 
492       // create rates for treekin
493       if (args_info.rates_flag) {
494         print_rates(args_info.rates_file_arg, args_info.temp_arg, num, energy_barr, output_en);
495       }
496 
497       // saddles for evaluation
498       if (args_info.barrier_file_given) {
499         print_rates(args_info.barrier_file_arg, args_info.temp_arg, num, energy_barr, output_en, true);
500       }
501 
502       // generate barrier tree?
503       if (args_info.bartree_flag) {
504 
505         //PS_tree_plot(nodes, num, "tst.ps");
506 
507         // make tree (fill missing nodes)
508         make_tree(num, energy_barr, findpath_barr, nodes.get());
509 
510         // plot it!
511         PS_tree_plot(nodes.get(), num, args_info.barr_name_arg);
512       }
513 
514       // time?
515       if (args_info.verbose_lvl_arg>0) {
516         fprintf(stderr, "Rates + barrier tree generation: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
517         clck1 = clock();
518       }
519 
520       // ############### actual output
521       // printf output with fathers! maybe
522       printf("     %s\n", seq);
523       for (unsigned int i=0; i<output_str.size(); i++) {
524         if (args_info.eRange_given) {
525           if ((output_en[i] - output_en[0]) >  args_info.eRange_arg*100 ) {
526             break;
527           }
528         }
529         printf("%4d %s %6.2f", i+1, output_str[i].c_str(), output_en[i]/100.0);
530         if (args_info.bartree_flag) printf(" %4d %6.2f\n", nodes[i].father+1, nodes[i].saddle_height-nodes[i].height);
531         printf(" %6d\n", output_num[i]);
532       }
533 
534     } else {
535 
536       // printf output without fathers!
537       printf("     %s\n", seq);
538       for (unsigned int i=0; i<output_str.size(); i++) {
539         if (args_info.eRange_given) {
540           if ((output_en[i] - output_en[0]) >  args_info.eRange_arg*100 ) {
541             break;
542           }
543         }
544         printf("%4d %s %6.2f %6d", i+1, output_str[i].c_str(), output_en[i]/100.0, output_num[i]);
545         printf("\n");
546       }
547     }
548 
549     // release smth
550     for(unsigned int i=0; i<output_he.size(); i++) {
551       free(output_he[i].structure);
552     }
553     // free hash
554     free_hash(structs);
555 
556     // release res:
557     if (energy_barr!=NULL) free(energy_barr);
558     if (findpath_barr!=NULL) free(findpath_barr);
559 
560   } else { // fix-barrier part (just print output):
561     int mfe = output_barr.begin()->first.energy;
562     printf("     %s\n", seq);
563     int i=1;
564     for (map<struct_en, barr_info, comps_entries>::iterator it=output_barr.begin(); it!=output_barr.end(); it++) {
565       if (args_info.eRange_given) {
566         if ((it->first.energy - mfe) >  args_info.eRange_arg*100 ) {
567           break;
568         }
569       }
570       const struct_en &he = it->first;
571       barr_info &bi = it->second;
572       printf("%4d %s %6.2f", i, pt_to_str_pk(he.structure).c_str(), he.energy/100.0);
573       printf(" %4d %6.2f %6d %6d %10.6f %6d %10.6f\n", bi.father+1, bi.e_diff/100.0, bi.bsize, bi.fbsize, bi.fen, bi.grad, bi.feng);
574       i++;
575     }
576   }
577 
578   // release resources
579   if (seq!=NULL) free(seq);
580   if (name!=NULL) free(name);
581   cmdline_parser_free(&args_info);
582   freeP();
583   free_arrays();
584 
585   // time?
586   if (args_info.verbose_lvl_arg>0) {
587     fprintf(stderr, "Printing results + freeing args: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
588     clck1 = clock();
589   }
590 
591   return 0;
592 }
593 
read_previous(char * previous,map<struct_en,int,comps_entries> & output)594 char *read_previous(char *previous, map<struct_en, int, comps_entries> &output)
595 {
596   char *seq = NULL;
597   FILE *fprev;
598   fprev = fopen(previous, "r");
599   if (fprev == NULL) {
600     fprintf(stderr, "Cannot open file \"%s\".\n", previous);
601     exit(EXIT_FAILURE);
602   }
603   char *line = my_getline(fprev);
604   char *p = strtok(line, " \t\n");
605   while (p) {
606     if (isSeq(p)) {
607       seq = (char*)malloc((strlen(p)+1)*sizeof(char));
608       strcpy(seq, p);
609       break;
610     }
611     p = strtok(NULL, " \t\n");
612   }
613   free(line);
614 
615   if (seq == NULL) {
616     fprintf(stderr, "Couldn't find sequence on first line of file \"%s\"\n", previous);
617     fclose(fprev);
618     exit(EXIT_FAILURE);
619   }
620   // read previously found minima:
621   struct_en he;
622   while ((line = my_getline(fprev))) {
623     p = strtok(line, " \t\n");
624 
625     he.structure = NULL;
626     he.energy = INT_MAX;
627 
628     // read the stuff
629     int num;
630     sscanf(p, "%d", &num);
631     p = strtok(NULL, " \t\n");
632     if (p && isStruct(p)) {
633       he.structure = make_pair_table_PK(p);
634     }
635     p = strtok(NULL, " \t\n");
636     if (p && he.structure && he.energy==INT_MAX) {
637       float en;
638       if (sscanf(p, "%f", &en)==1) {
639         he.energy = en_fltoi(en);
640       }
641     }
642     // 3 aternatives: read father, e_diff and count; or read only count; or read nothing
643     int father;
644     float e_diff;
645     int count;
646     p = strtok(NULL, " \t\n");
647     if (p && he.structure && he.energy!=INT_MAX) {
648       sscanf(p, "%d", &father);
649       p = strtok(NULL, " \t\n");
650       if (p && he.structure && he.energy!=INT_MAX) {
651         sscanf(p, "%f", &e_diff);
652         p = strtok(NULL, " \t\n");
653         if (p && he.structure && he.energy!=INT_MAX) {
654           sscanf(p, "%d", &count);
655           p = strtok(NULL, " \t\n");
656           if (p && he.structure && he.energy!=INT_MAX) {
657             count = 0; // because we are reading barriers file without info about count...
658           }
659         } else count = 0;
660 
661       } else {
662         count = father;
663       }
664     } else count = 0;
665     // insert
666     output[he]=count;
667     free(line);
668   }
669 
670   fclose(fprev);
671 
672   return seq;
673 }
674 
read_barr(char * barr_arg,map<struct_en,barr_info,comps_entries> & output)675 char *read_barr(char *barr_arg, map<struct_en, barr_info, comps_entries> &output)
676 {
677   char *seq = NULL;
678   FILE *fbarr;
679   fbarr = fopen(barr_arg, "r");
680   if (fbarr == NULL) {
681     fprintf(stderr, "Cannot open file \"%s\".\n", barr_arg);
682     exit(EXIT_FAILURE);
683   }
684   char *line = my_getline(fbarr);
685   char *p = strtok(line, " \t\n");
686   while (p) {
687     if (isSeq(p)) {
688       seq = (char*)malloc((strlen(p)+1)*sizeof(char));
689       strcpy(seq, p);
690       break;
691     }
692     p = strtok(NULL, " \t\n");
693   }
694   free(line);
695 
696   if (seq == NULL) {
697     fprintf(stderr, "Couldn't find sequence on first line of file \"%s\"\n", barr_arg);
698     fclose(fbarr);
699     exit(EXIT_FAILURE);
700   }
701   // read previously found minima:
702   while ((line = my_getline(fbarr))) {
703     p = strtok(line, " \t\n");
704 
705     struct_en he;
706     he.structure = NULL;
707     he.energy = INT_MAX;
708 
709     // read the stuff
710     int num;
711     sscanf(p, "%d", &num);
712     p = strtok(NULL, " \t\n");
713     if (p && isStruct(p)) {
714       he.structure = make_pair_table_PK(p);
715     }
716     p = strtok(NULL, " \t\n");
717     if (p && he.structure && he.energy==INT_MAX) {
718       float en;
719       if (sscanf(p, "%f", &en)==1) {
720         he.energy = en_fltoi(en);
721       }
722     }
723     barr_info bi;
724     int i =0;
725     float en;
726     while (p && he.structure && he.energy!=INT_MAX) {
727       p = strtok(NULL, " \t\n");
728       i++;
729       switch (i){
730         case 1: sscanf(p, "%d", &bi.father); break;
731         case 2: sscanf(p, "%f", &en); bi.e_diff = en_fltoi(en); break;
732         case 3: sscanf(p, "%d", &bi.bsize); break;
733         case 4: sscanf(p, "%d", &bi.fbsize); break;
734         case 5: sscanf(p, "%f", &bi.fen); break;
735         case 6: sscanf(p, "%d", &bi.grad); break;
736         case 7: sscanf(p, "%f", &bi.feng); break;
737       }
738     }
739 
740     // try to move it:
741     SeqInfo sqi;
742     sqi.Init(seq);
743     he.energy = Opt.pknots ? energy_of_struct_pk(seq, he.structure, sqi.s0, sqi.s1, 0): energy_of_structure_pt(seq, he.structure, sqi.s0, sqi.s1, 0);
744     int last_en = he.energy;
745     move_set(he, sqi);
746     /*
747     //fprintf(stderr, "%f\n", he.energy);
748 
749     while ((Opt.rand? move_rand(he) : move_set(he))!=0) {
750       Deg.Clear();
751     }
752     Deg.Clear();*/
753 
754     // print changes:
755     if (last_en != he.energy) {
756       fprintf(stderr, "%6.2f -> %6.2f %s\n", last_en/100.0, he.energy/100.0, pt_to_str_pk(he.structure).c_str());
757     }
758 
759     // if we have it already:
760     map<struct_en, barr_info, comps_entries>::iterator it;
761     if ((it = output.find(he))!=output.end()) {
762       it->second = it->second + bi;
763       it->second.e_diff += last_en - he.energy;
764       free(he.structure);
765     } else {
766       output[he] = bi;
767     }
768 
769     free(line);
770   }
771 
772   fclose(fbarr);
773   return seq;
774 }
775 
read_seq(char * seq_arg,char ** name_out)776 char *read_seq(char *seq_arg, char **name_out)
777 {
778   char *name = NULL;
779   char *seq = NULL;
780   bool have_seq = false;
781   // read sequence
782   FILE *fseq;
783   fseq = fopen(seq_arg, "r");
784   if (fseq == NULL) {
785     fprintf(stderr, "WARNING: Cannot open file \"%s\".\n", seq_arg);
786     // try to read it from 1st line of input:
787     seq = my_getline(stdin);
788     int j = 0;
789     for (unsigned int i=0; i<strlen(seq); i++) {
790       if (seq[i]=='A' || seq[i]=='C' || seq[i]=='T' || seq[i]=='G' || seq[i]=='a' || seq[i]=='c' || seq[i]=='t' || seq[i]=='g' || seq[i]=='U' || seq[i]=='u') seq[j++] = seq[i];
791     }
792     seq[j]='\0';
793     seq = (char*)realloc(seq, sizeof(char)*(j+1));
794     if (j<5) {
795       fprintf(stderr, "ERROR: Sequence not found in input nor in \"%s\" file.\n", seq_arg);
796       exit(EXIT_FAILURE);
797     } else {
798       have_seq = true;
799     }
800   }
801   if (!have_seq) {
802     name = my_getline(fseq);
803     if (name == NULL) {
804       fprintf(stderr, "ERROR: File \"%s\" empty.\n", seq_arg);
805       fclose(fseq);
806       exit(EXIT_FAILURE);
807     }
808     seq = my_getline(fseq);
809     if (seq == NULL || name[0]!='>') {
810       //fprintf(stderr, "WARNING: File \"%s\" not in FASTA format. Using it as a sequence.\n", seq_arg);
811       if (seq!=NULL) free(seq);
812       seq = name;
813       name = NULL;
814     }
815     // seq on more lines??
816     char *seq2;
817     while ((seq2=my_getline(fseq))!=NULL && isSeq(seq2)) {
818       seq = (char*) realloc(seq, sizeof(char)*(strlen(seq)+strlen(seq2)+1));
819       strcpy(seq+strlen(seq), seq2);
820       free(seq2);
821       //fprintf(stderr, "%s %d\n", seq, (int)strlen(seq));
822     }
823 
824     fclose(fseq);
825     if (name) (*name_out) = name;
826   }
827 
828   // covert seq's T's to U's
829   for (int i=0; i<(int)strlen(seq); i++) {
830     if (seq[i]=='T') seq[i]='U';
831     if (seq[i]==' ') seq[i]='\0';
832   }
833   return seq;
834 }
835 
836 
move(unordered_map<struct_en,gw_struct,hash_fncts,hash_eq> & structs,map<struct_en,int,comps_entries> & output,set<struct_en,comps_entries> & output_shallow,SeqInfo & sqi,bool pure_output)837 int move(unordered_map<struct_en, gw_struct, hash_fncts, hash_eq> &structs, map<struct_en, int, comps_entries> &output, set<struct_en, comps_entries> &output_shallow, SeqInfo &sqi, bool pure_output)
838 {
839   // read a line
840   char *line = my_getline(stdin);
841   if (line == NULL) return -1;
842   if (line[0]=='>') {
843     free(line);
844     return 0;
845   }
846 
847   float energy=1e10;
848 
849   // process lines
850   char *p = line;
851   char *sep = " \t\n";
852   char *temp = NULL;
853 
854   bool struct_found = false;
855   bool energy_found = false;
856 
857   // read the structure
858   p = strtok(line, sep);
859   while(p!=NULL && !(struct_found && energy_found)) {
860     //fprintf(stderr, "%s\n", p);
861     if (isStruct(p)) {
862       if (struct_found) fprintf(stderr, "WARNING: On line \"%s\" two structure-like strings found!\n", line);
863       else {
864         temp = p;
865       }
866       struct_found = true;
867     } else {
868       if (isEnergy(p, energy)) {
869         energy_found = true;
870       }
871     }
872 
873     p = strtok(NULL, sep);
874   }
875   p = temp;
876   if (!struct_found) {
877     fprintf(stderr, "WARNING: On line \"%s\" no structure-like string found!\n", line);
878     free(line);
879     return 0;
880   }
881 
882   // count moves
883   num_moves++;
884 
885   // find length of structure
886   int len=0;
887   while (p[len]!='\0' && p[len]!=' ') len++;
888 
889   if (len!=seq_len) {
890     fprintf(stderr, "WARNING: Unequal lengths:\n(structure) %s\n (sequence) %s\n", p, sqi.seq);
891     free(line);
892     return -0;
893   }
894 
895   // make make_pair
896   struct_en str;
897   str.structure = Opt.pknots? make_pair_table_PK(p):make_pair_table(p);
898 
899   // only H,K,L,M types allowed:
900   if (!str.structure) {
901     free(line);
902     return 0;
903   } else {
904     str.energy = Opt.pknots? energy_of_struct_pk(sqi.seq, str.structure, sqi.s0, sqi.s1, Opt.verbose_lvl>3):energy_of_structure_pt(sqi.seq, str.structure, sqi.s0, sqi.s1, 0);
905     free(line);
906   }
907 
908   // if pure, just do descend and print it:
909   if (pure_output) {
910     //is it canonical (noLP)
911     if (Opt.noLP && find_lone_pair(str.structure)!=-1) {
912       if (Opt.verbose_lvl>0) fprintf(stderr, "WARNING: structure \"%s\" has lone pairs, skipping...\n", pt_to_str_pk(str.structure).c_str());
913       free(str.structure);
914       return -2;
915     }
916 
917     //debugging
918     if (Opt.verbose_lvl>1) fprintf(stderr, "proc(pure): %d %s\n", num_moves, pt_to_str_pk(str.structure).c_str());
919 
920     // descend
921     int gw_length = move_set(str, sqi);
922     // only some types of PK allowed!!!
923     if (Opt.pknots && str.energy == INT_MAX) {
924       free(str.structure);
925       return 0;
926     }
927 
928     if (Opt.verbose_lvl>2) fprintf(stderr, "\n  %s %d %d\n", pt_to_str_pk(str.structure).c_str(), str.energy, gw_length);
929     printf("%s %6.2f %4d\n", pt_to_str_pk(str.structure).c_str(), str.energy/100.0, gw_length);
930     free(str.structure);
931     return 1;
932   }
933 
934   // check if it was before
935   unordered_map<struct_en, gw_struct, hash_fncts, hash_eq>::iterator it_s = structs.find(str);
936 
937   // if it was - release memory + get another
938   if (it_s != structs.end()) {
939     it_s->second.count++;
940     free(str.structure);
941     return 0;
942   } else {
943     // find energy only if not in input (not working - does energy_of_move require energy_of_struct run first???)
944     //str.energy = Enc.Energy(str);
945     //printf("\n%s\n%s %7.2f\n\n", Enc.seq, pt_to_str_pk(str.structure).c_str(), str.energy/100.0);
946     /*if (1 || !energy_found) str.energy = Enc.Energy(str);
947     else str.energy = (int)(energy*100.0+(energy<0.0 ? -0.5 : 0.5));*/
948 
949     // allegiance hack:
950     struct_en he_str = str;
951     if (allegiance) {
952       structures.push_back(he_str);
953     }
954 
955     //is it canonical (noLP)
956     if (Opt.noLP && find_lone_pair(str.structure)!=-1) {
957       if (Opt.verbose_lvl>0) fprintf(stderr, "WARNING: structure \"%s\" has lone pairs, skipping...\n", pt_to_str_pk(str.structure).c_str());
958       free(str.structure);
959       return -2;
960     }
961 
962     // copy it anew
963     struct_en old = str;
964     str.structure = allocopy(str.structure);
965 
966     //debugging
967     if (Opt.verbose_lvl>1) fprintf(stderr, "processing: %d %s\n", num_moves, pt_to_str_pk(str.structure).c_str());
968 
969     // descend
970     move_set(str, sqi);
971     // only some types of PK allowed!!!
972     if (Opt.pknots && str.energy == INT_MAX) {
973       free(str.structure);
974       free(old.structure);
975       return 0;
976     }
977 
978     // insert into hash (memory is here only on left side)
979     gw_struct &lm = structs[old];
980     lm.count = 1;
981     /*
982     int i;
983     while ((i = (Opt.rand? move_rand(str) : move_set(str)))!=0) {
984       Deg.Clear();
985     }
986     Deg.Clear();*/
987 
988     if (Opt.verbose_lvl>2) fprintf(stderr, "\n  %s %d\n", pt_to_str_pk(str.structure).c_str(), str.energy);
989 
990     // save for output
991     map<struct_en, int, comps_entries>::iterator it;
992     if ((it = output.find(str)) != output.end()) {
993       it->second++;
994       lm.he = it->first;
995       free(str.structure);
996       // allegiance hack:
997       if (allegiance) str_to_LM[he_str] = it->first;
998     } else {
999       //str.num = output.size();
1000       lm.he = str;
1001       output.insert(make_pair(str, 1));
1002       // allegiance hack:
1003       if (allegiance) str_to_LM[he_str] = str;
1004     }
1005   }
1006 
1007   return 1;
1008 }
1009