1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <time.h>
5 #include <limits.h>
6 #include <string.h>
7
8 #include <string>
9 #include <set>
10 #include <map>
11 #include <vector>
12 #include <algorithm>
13 #include <memory>
14
15 extern "C" {
16 #include "fold.h"
17 #include "findpath.h"
18 #include "RNAlocmin_cmdline.h"
19 #include "utils.h"
20 #include "read_epars.h"
21 #include "fold_vars.h"
22 #include "params.h"
23 #include "move_set_inside.h"
24 }
25
26 #include "findpath_pk.h"
27
28 #include "hash_util.h"
29 #include "globals.h"
30 #include "RNAlocmin.h"
31 #include "flood.h"
32 #include "RNAlocmin.h"
33 #include "move_set_pk.h"
34 #include "neighbourhood.h"
35
36 #include "barrier_tree.h"
37
38 using namespace std;
39
40 // dirty hack to "allegiance" stuff
41 //to be filled by 1st run
42 map<struct_en, struct_en, comps_entries> str_to_LM;
43 vector<struct_en> structures;
44 // to be filled after
45 map<struct_en, int, comps_entries> LM_to_LMnum;
46 static bool allegiance = false;
47
48
isSeq(char * p)49 inline bool isSeq(char *p)
50 {
51 // check first two chars - should be enough
52 char seq[]="ACGTUactgu";
53 int ok = 0;
54 for (unsigned int i=0; i<strlen(seq); i++) {
55 if (p[0]==seq[i]) {
56 ok++;
57 }
58 if (p[1]==seq[i]) {
59 ok++;
60 }
61 if (ok==2) break;
62 }
63
64 return (ok==2);
65 }
66
isStruct(char * p)67 inline bool isStruct(char *p)
68 {
69 // check first two chars - should be enough
70 if ((p[0]=='.' || p[0]=='(') && (p[1]=='.' || p[1]=='(' || p[1]=='[')) return true;
71 else return false;
72 }
73
isEnergy(char * p,float & energy)74 inline bool isEnergy(char *p, float &energy)
75 {
76 if (sscanf(p, "%f", &energy) == 1) return true;
77 else return false;
78 }
79
en_fltoi(float en)80 inline int en_fltoi(float en)
81 {
82 if (en < 0.0) return (int)(en*100 - 0.5);
83 else return (int)(en*100 + 0.5);
84 }
85
86 struct barr_info { // info taken from barriers
87 int father;
88 int e_diff;
89 int bsize;
90 int fbsize;
91 // unused, unmodified
92 float fen;
93 int grad;
94 float feng;
95
operator +barr_info96 barr_info &operator+(barr_info &second) {
97 bsize += second.bsize;
98 if (second.father!=father) father = -1;
99 fbsize += second.fbsize;
100 grad += second.grad;
101 return *this;
102 }
103 };
104
105 // functions that are down in file ;-)
106 char *read_seq(char *seq_arg, char **name_out);
107 int move(unordered_map<struct_en, gw_struct, hash_fncts, hash_eq> &structs, map<struct_en, int, comps_entries> &output, set<struct_en, comps_entries> &output_shallow, SeqInfo &sqi, bool pure_output);
108 char *read_previous(char *previous, map<struct_en, int, comps_entries> &output);
109 char *read_barr(char *previous, map<struct_en, barr_info, comps_entries> &output);
110
main(int argc,char ** argv)111 int main(int argc, char **argv)
112 {
113 clock_t clck1 = clock();
114
115 // testing
116 //test();
117 //exit(EXIT_SUCCESS);
118
119 // parse arguments
120 gengetopt_args_info args_info;
121 if (cmdline_parser(argc, argv, &args_info) != 0) {
122 fprintf(stderr, "ERROR: argument parsing problem.\n");
123 exit(EXIT_FAILURE);
124 }
125
126 //check for bad arguments
127 if (Opt.Init(args_info) !=0 ) {
128 fprintf(stderr, "ERROR: one or more bad arguments, exiting...\n");
129 exit(EXIT_FAILURE);
130 }
131
132 // adjust temperature
133 if (args_info.temp_given) {
134 temperature = args_info.temp_arg;
135 }
136
137 // read parameter file
138 if (args_info.paramFile_given) {
139 read_parameter_file(args_info.paramFile_arg);
140 }
141
142 // dangle setup
143 if (args_info.dangles_given) {
144 dangles = args_info.dangles_arg;
145 model_detailsT md;
146 set_model_details(&md);
147 md.dangles = dangles;
148 }
149
150 // adjust dependencies:
151 if (args_info.barr_name_given) {args_info.bartree_flag = true;}
152 if (args_info.rates_file_given) {args_info.rates_flag = true;}
153
154 // degeneracy setup
155 if (args_info.degeneracy_off_flag) {
156 degeneracy_handling(0);
157 Neighborhood::SwitchOffDegen();
158 }
159
160 //try_pk();
161 //exit(0);
162
163 // keep track of structures & statistics
164 map<struct_en, int, comps_entries> output; // structures plus energies to output (+ how many hits has this minima)
165 map<struct_en, barr_info, comps_entries> output_barr; // structures plus energies to output (+ barr_info)
166 set<struct_en, comps_entries> output_shallow; // shallow structures (if minh specified)
167 char *seq = NULL;
168 char *name = NULL;
169
170 // read previous LM or/and sequence
171 if (args_info.fix_barriers_given) {
172 seq = read_barr(args_info.fix_barriers_arg, output_barr);
173 } else {
174 if (args_info.previous_given) {
175 seq = read_previous(args_info.previous_arg, output);
176 } else {
177 seq = read_seq(args_info.seq_arg, &name);
178 }
179 }
180
181 if (args_info.verbose_lvl_arg>1) fprintf(stderr, "%s\n", seq);
182
183 seq_len = strlen(seq);
184
185 // allegiance:
186 FILE *alleg = NULL;
187 if (args_info.allegiance_given) {
188 alleg = fopen(args_info.allegiance_arg, "w");
189 if (alleg) {
190 allegiance = true;
191 fprintf(alleg, " %s\n", seq);
192 }
193 }
194
195 // time?
196 if (args_info.verbose_lvl_arg>0) {
197 fprintf(stderr, "Time to initialize: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
198 clck1 = clock();
199 }
200
201 // ########################## begin main loop - reads structures from RNAsubopt and process them
202 int count = max(output.size(), output_barr.size()); //num of local minima
203 SeqInfo sqi;
204 sqi.Init(seq);
205
206 int not_canonical = 0;
207
208 if (!args_info.fix_barriers_given) {
209
210 // if direct output:
211 if (args_info.just_output_flag) printf("%s\n", seq);
212
213 // hash
214 unordered_map<struct_en, gw_struct, hash_fncts, hash_eq> structs (HASHSIZE); // structures to minima map
215 while ((!args_info.find_num_given || count != args_info.find_num_arg) && !args_info.just_read_flag) {
216 int res = move(structs, output, output_shallow, sqi, args_info.just_output_flag);
217
218 // print out
219 //if (Opt.verbose_lvl>0 && num_moves%10000==0) fprintf(stderr, "processed %d, minima %d, time %f secs.\n", num_moves, count, (clock()-clck1)/(double)CLOCKS_PER_SEC);
220 if (Opt.verbose_lvl>0 && num_moves%(Opt.pknots?1000:10000)==0 && num_moves!=0) fprintf(stderr, "processed %d, minima %d, time %f secs.\n", num_moves, (int)output.size(), (clock()-clck1)/(double)CLOCKS_PER_SEC);
221
222 // evaluate results
223 if (res==0) continue; // same structure has been processed already
224 if (res==-1) break; // error or end
225 if (res==-2) not_canonical++;
226 if (res==1) count=output.size();
227 }
228
229 if (args_info.just_output_flag) {
230 // end it
231
232 // release resources
233 if (seq!=NULL) free(seq);
234 if (name!=NULL) free(name);
235 cmdline_parser_free(&args_info);
236 freeP();
237 free_arrays();
238
239 // time?
240 if (args_info.verbose_lvl_arg>0) {
241 fprintf(stderr, "Printing results + freeing args: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
242 clck1 = clock();
243 }
244 return 0;
245 }
246
247
248 // ########################## end main loop - reads structures from RNAsubopt and process them
249
250 //int num_of_structures = hash_size();
251 if (args_info.verbose_lvl_arg>0) print_stats(structs);
252 add_stats(structs, output);
253
254 // time?
255 if (args_info.verbose_lvl_arg>0) {
256 fprintf(stderr, "Main loop (deepest descent from RNAsubopt): %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
257 clck1 = clock();
258 }
259
260 // vectors for rates computation
261 int num = ((count > args_info.min_num_arg && args_info.min_num_arg!=0) ? args_info.min_num_arg : count);
262 vector<string> output_str;
263 output_str.resize(num);
264 vector<struct_en> output_he;
265 struct_en h;
266 h.structure = NULL;
267 output_he.resize(num, h);
268 vector<int> output_en;
269 output_en.resize(num);
270 vector<int> output_num;
271 output_num.resize(num);
272
273 // threshold for flooding
274 int threshold;
275
276 int i=0;
277 int ii=0;
278 for (map<struct_en, int, comps_entries>::iterator it=output.begin(); it!=output.end(); it++) {
279 ii++;
280 // if not enough minima
281 if (i<num) {
282 // first check if the output is not shallow
283 if (Opt.minh>0) {
284 int saddle;
285 struct_en *escape = flood(it->first, sqi, saddle, Opt.minh, args_info.pseudoknots_flag, !args_info.minh_lite_flag);
286
287 if (args_info.verbose_lvl_arg>0 && ii%100 == 0) {
288 fprintf(stderr, "non-shallow remained: %d / %d; time: %.2f secs.\n", i, ii, (clock()-clck1)/(double)CLOCKS_PER_SEC);
289 }
290
291 // shallow
292 if (escape) {
293 if (args_info.verbose_lvl_arg>1) {
294 fprintf(stderr, "shallow: %s %6.2f (saddle: %s %6.2f)\n", pt_to_str_pk(it->first.structure).c_str(), it->first.energy/100.0, pt_to_str_pk(escape->structure).c_str(), escape->energy/100.0);
295 }
296 free(escape->structure);
297 free(escape);
298 free(it->first.structure);
299 continue;
300 }
301 }
302
303 if (args_info.verbose_lvl_arg > 2) fprintf(stderr, "%4d %s %6.2f %6d\n", i+1, pt_to_str_pk(it->first.structure).c_str(), it->first.energy/100.0, it->second);
304 output_str[i]=pt_to_str_pk(it->first.structure);
305 output_num[i]=it->second;
306 output_he[i]=it->first;
307 output_en[i]=it->first.energy;
308 i++;
309
310 // allegiance:
311 if (allegiance) LM_to_LMnum[it->first] = i;
312
313 } else { // we have enough minima
314 free(it->first.structure);
315 }
316 }
317 output.clear();
318
319 // allegiance:
320 if (allegiance) {
321 for (int i=0; i<(int)structures.size(); i++) {
322 fprintf(alleg, "%6d %s %6.2f %6d\n", i+1, pt_to_str_pk(structures[i].structure).c_str(), structures[i].energy/100.0, LM_to_LMnum[str_to_LM[structures[i]]]);
323 //free(structures[i].structure);
324 }
325 structures.clear();
326 LM_to_LMnum.clear();
327 str_to_LM.clear();
328 }
329
330 // erase possible NULL elements...
331 /*if (args_info.noSort_flag) {
332 for (int j=output_he.size()-1; j>=0; j--) {
333 if (output_he[j].structure==NULL) {
334 output_he.erase(output_he.begin()+j);
335 output_str.erase(output_str.begin()+j);
336 output_en.erase(output_en.begin()+j);
337 output_num.erase(output_num.begin()+j);
338 }
339 }
340 } else {*/
341 output_he.resize(i);
342 output_str.resize(i);
343 output_en.resize(i);
344 output_num.resize(i);
345 //}
346
347 // time?
348 if (Opt.minh>0 && args_info.verbose_lvl_arg>0) {
349 fprintf(stderr, "Discarding shallow minima: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
350 clck1 = clock();
351 }
352
353 // array of energy barriers
354 float *energy_barr = NULL;
355 bool *findpath_barr = NULL;
356
357 // find saddles - fill energy barriers
358 if (args_info.rates_flag || args_info.bartree_flag || args_info.barrier_file_given) {
359 // threshold for flooding
360 vector<int> tmp = output_num;
361 sort(tmp.begin(), tmp.end());
362 int thr = num*args_info.floodPortion_arg;
363 thr--;
364 threshold = (thr<0 ? 0 : tmp[thr]);
365
366 // nodes
367 std::unique_ptr<nodeT []> nodes(new nodeT[num]);
368 energy_barr = (float*) malloc(num*num*sizeof(float));
369 for (int i=0; i<num*num; i++) energy_barr[i]=1e10;
370 findpath_barr = (bool*) malloc(num*num*sizeof(bool));
371 for (int i=0; i<num*num; i++) findpath_barr[i]=false;
372
373 // fill nodes
374 for (int i=0; i<num; i++) {
375 nodes[i].father = -1;
376 nodes[i].height = output_en[i]/100.0;
377 nodes[i].label = NULL;
378 nodes[i].color = 0.0;
379 nodes[i].saddle_height = 1e10;
380 }
381
382 int flooded = 0;
383 // init union-findset
384 init_union(num);
385 // first try to flood the highest bins
386 for (int i=num-1; i>=0; i--) {
387 // flood only if low number of walks ended there
388 if (output_num[i]<=threshold && Opt.floodMax>0) {
389 //copy_arr(Enc.pt, output_he[i].structure);
390 if (args_info.verbose_lvl_arg>2) fprintf(stderr, "flooding (%3d): %s %.2f\n", i+1, output_str[i].c_str(), output_he[i].energy/100.0);
391
392 int saddle;
393 struct_en *he = flood(output_he[i], sqi, saddle, Opt.minh, args_info.pseudoknots_flag);
394
395 // print info
396 if (args_info.verbose_lvl_arg>1) {
397 if (he) {
398 fprintf(stderr, "below (%3d): %s %.2f\n"
399 "en: %7.2f is: %s %.2f\n", i,
400 output_str[i].c_str(), output_he[i].energy/100.0, saddle/100.0,
401 pt_to_str_pk(he->structure).c_str(), he->energy/100.0);
402 } else {
403 fprintf(stderr, "unsucesful(%3d): %s %.2f\n", i,
404 output_str[i].c_str(), output_he[i].energy/100.0);
405 }
406 }
407 // if flood succesfull - walk down to find father minima
408 if (he) {
409 // walk down
410 move_set(*he, sqi);
411
412 // now check if we have the minimum already (hopefuly yes ;-) )
413 vector<struct_en>::iterator it;
414 it = lower_bound(output_he.begin(), output_he.end(), *he, compf_entries2);
415
416 if (args_info.verbose_lvl_arg>1) fprintf(stderr, "minimum: %s %.2f\n", pt_to_str_pk(he->structure).c_str(), he->energy/100.0);
417 // we dont need it again
418
419 hash_eq heq;
420 if (it!=output_he.end() && heq(&*it, he)) {
421 int pos = (int)(it-output_he.begin());
422 if (args_info.verbose_lvl_arg>1) fprintf(stderr, "found father at pos: %d\n", pos);
423
424 flooded++;
425 energy_barr[i*num+pos] = energy_barr[pos*num+i] = saddle/100.0;
426
427 // union set
428 //fprintf(stderr, "join: %d %d\n", min(i, pos), max(i, pos));
429 union_set(min(i, pos), max(i, pos));
430 }
431 free(he->structure);
432 free(he);
433 }
434 }
435 }
436
437 // time?
438 if (args_info.verbose_lvl_arg>0) {
439 fprintf(stderr, "Flood(%d(%d)/%d): %.2f secs.\n", flooded, (int)(num*args_info.floodPortion_arg), num, (clock() - clck1)/(double)CLOCKS_PER_SEC);
440 clck1 = clock();
441 }
442
443 // for others, just do findpath
444 int findpath = 0;
445 set<int> to_findpath;
446 for (int i=0; i<num; i++) to_findpath.insert(find(i));
447
448 if (args_info.verbose_lvl_arg>1) {
449 fprintf(stderr, "Minima left to findpath (their father = -1): ");
450 for (set<int>::iterator it=to_findpath.begin(); it!=to_findpath.end(); it++) {
451 fprintf(stderr, "%d ", *it);
452 }
453 fprintf(stderr, "\n");
454 }
455
456 // findpath:
457 for (set<int>::iterator it=to_findpath.begin(); it!=to_findpath.end(); it++) {
458 set<int>::iterator it2=it;
459 it2++;
460 for (; it2!=to_findpath.end(); it2++) {
461 if (args_info.pseudoknots_flag) energy_barr[(*it2)*num+(*it)] = energy_barr[(*it)*num+(*it2)] = find_saddle_pk(seq, output_str[*it].c_str(), output_str[*it2].c_str(), args_info.depth_arg)/100.0;
462 else energy_barr[(*it2)*num+(*it)] = energy_barr[(*it)*num+(*it2)] = find_saddle(seq, output_str[*it].c_str(), output_str[*it2].c_str(), args_info.depth_arg)/100.0;
463 findpath_barr[(*it2)*num+(*it)] = findpath_barr[(*it)*num+(*it2)] = true;
464 if (args_info.verbose_lvl_arg>0 && findpath %10000==0){
465 fprintf(stderr, "Findpath:%7d/%7d\n", findpath, (int)(to_findpath.size()*(to_findpath.size()-1)/2));
466 }
467 findpath++;
468 }
469 }
470
471 // debug output
472 if (args_info.verbose_lvl_arg>2) {
473 fprintf(stderr, "Energy barriers:\n");
474 //bool symmetric = true;
475 for (int i=0; i<num; i++) {
476 for (int j=0; j<num; j++) {
477 fprintf(stderr, "%8.2g%c ", energy_barr[i*num+j], (findpath_barr[i*num+j]?'~':' '));
478 //if (energy_barr[i*num+j] != energy_barr[j*num+i]) symmetric = false;
479 }
480 fprintf(stderr, "\n");
481 }
482 fprintf(stderr, "\n");
483 //fprintf(stderr, "%s", (symmetric? "":"non-symmetric energy barriers!!\n"));
484 }
485
486 // time?
487 if (args_info.verbose_lvl_arg>0) {
488 fprintf(stderr, "Findpath(%d/%d): %.2f secs.\n", findpath, num*(num-1)/2, (clock() - clck1)/(double)CLOCKS_PER_SEC);
489 clck1 = clock();
490 }
491
492 // create rates for treekin
493 if (args_info.rates_flag) {
494 print_rates(args_info.rates_file_arg, args_info.temp_arg, num, energy_barr, output_en);
495 }
496
497 // saddles for evaluation
498 if (args_info.barrier_file_given) {
499 print_rates(args_info.barrier_file_arg, args_info.temp_arg, num, energy_barr, output_en, true);
500 }
501
502 // generate barrier tree?
503 if (args_info.bartree_flag) {
504
505 //PS_tree_plot(nodes, num, "tst.ps");
506
507 // make tree (fill missing nodes)
508 make_tree(num, energy_barr, findpath_barr, nodes.get());
509
510 // plot it!
511 PS_tree_plot(nodes.get(), num, args_info.barr_name_arg);
512 }
513
514 // time?
515 if (args_info.verbose_lvl_arg>0) {
516 fprintf(stderr, "Rates + barrier tree generation: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
517 clck1 = clock();
518 }
519
520 // ############### actual output
521 // printf output with fathers! maybe
522 printf(" %s\n", seq);
523 for (unsigned int i=0; i<output_str.size(); i++) {
524 if (args_info.eRange_given) {
525 if ((output_en[i] - output_en[0]) > args_info.eRange_arg*100 ) {
526 break;
527 }
528 }
529 printf("%4d %s %6.2f", i+1, output_str[i].c_str(), output_en[i]/100.0);
530 if (args_info.bartree_flag) printf(" %4d %6.2f\n", nodes[i].father+1, nodes[i].saddle_height-nodes[i].height);
531 printf(" %6d\n", output_num[i]);
532 }
533
534 } else {
535
536 // printf output without fathers!
537 printf(" %s\n", seq);
538 for (unsigned int i=0; i<output_str.size(); i++) {
539 if (args_info.eRange_given) {
540 if ((output_en[i] - output_en[0]) > args_info.eRange_arg*100 ) {
541 break;
542 }
543 }
544 printf("%4d %s %6.2f %6d", i+1, output_str[i].c_str(), output_en[i]/100.0, output_num[i]);
545 printf("\n");
546 }
547 }
548
549 // release smth
550 for(unsigned int i=0; i<output_he.size(); i++) {
551 free(output_he[i].structure);
552 }
553 // free hash
554 free_hash(structs);
555
556 // release res:
557 if (energy_barr!=NULL) free(energy_barr);
558 if (findpath_barr!=NULL) free(findpath_barr);
559
560 } else { // fix-barrier part (just print output):
561 int mfe = output_barr.begin()->first.energy;
562 printf(" %s\n", seq);
563 int i=1;
564 for (map<struct_en, barr_info, comps_entries>::iterator it=output_barr.begin(); it!=output_barr.end(); it++) {
565 if (args_info.eRange_given) {
566 if ((it->first.energy - mfe) > args_info.eRange_arg*100 ) {
567 break;
568 }
569 }
570 const struct_en &he = it->first;
571 barr_info &bi = it->second;
572 printf("%4d %s %6.2f", i, pt_to_str_pk(he.structure).c_str(), he.energy/100.0);
573 printf(" %4d %6.2f %6d %6d %10.6f %6d %10.6f\n", bi.father+1, bi.e_diff/100.0, bi.bsize, bi.fbsize, bi.fen, bi.grad, bi.feng);
574 i++;
575 }
576 }
577
578 // release resources
579 if (seq!=NULL) free(seq);
580 if (name!=NULL) free(name);
581 cmdline_parser_free(&args_info);
582 freeP();
583 free_arrays();
584
585 // time?
586 if (args_info.verbose_lvl_arg>0) {
587 fprintf(stderr, "Printing results + freeing args: %.2f secs.\n", (clock() - clck1)/(double)CLOCKS_PER_SEC);
588 clck1 = clock();
589 }
590
591 return 0;
592 }
593
read_previous(char * previous,map<struct_en,int,comps_entries> & output)594 char *read_previous(char *previous, map<struct_en, int, comps_entries> &output)
595 {
596 char *seq = NULL;
597 FILE *fprev;
598 fprev = fopen(previous, "r");
599 if (fprev == NULL) {
600 fprintf(stderr, "Cannot open file \"%s\".\n", previous);
601 exit(EXIT_FAILURE);
602 }
603 char *line = my_getline(fprev);
604 char *p = strtok(line, " \t\n");
605 while (p) {
606 if (isSeq(p)) {
607 seq = (char*)malloc((strlen(p)+1)*sizeof(char));
608 strcpy(seq, p);
609 break;
610 }
611 p = strtok(NULL, " \t\n");
612 }
613 free(line);
614
615 if (seq == NULL) {
616 fprintf(stderr, "Couldn't find sequence on first line of file \"%s\"\n", previous);
617 fclose(fprev);
618 exit(EXIT_FAILURE);
619 }
620 // read previously found minima:
621 struct_en he;
622 while ((line = my_getline(fprev))) {
623 p = strtok(line, " \t\n");
624
625 he.structure = NULL;
626 he.energy = INT_MAX;
627
628 // read the stuff
629 int num;
630 sscanf(p, "%d", &num);
631 p = strtok(NULL, " \t\n");
632 if (p && isStruct(p)) {
633 he.structure = make_pair_table_PK(p);
634 }
635 p = strtok(NULL, " \t\n");
636 if (p && he.structure && he.energy==INT_MAX) {
637 float en;
638 if (sscanf(p, "%f", &en)==1) {
639 he.energy = en_fltoi(en);
640 }
641 }
642 // 3 aternatives: read father, e_diff and count; or read only count; or read nothing
643 int father;
644 float e_diff;
645 int count;
646 p = strtok(NULL, " \t\n");
647 if (p && he.structure && he.energy!=INT_MAX) {
648 sscanf(p, "%d", &father);
649 p = strtok(NULL, " \t\n");
650 if (p && he.structure && he.energy!=INT_MAX) {
651 sscanf(p, "%f", &e_diff);
652 p = strtok(NULL, " \t\n");
653 if (p && he.structure && he.energy!=INT_MAX) {
654 sscanf(p, "%d", &count);
655 p = strtok(NULL, " \t\n");
656 if (p && he.structure && he.energy!=INT_MAX) {
657 count = 0; // because we are reading barriers file without info about count...
658 }
659 } else count = 0;
660
661 } else {
662 count = father;
663 }
664 } else count = 0;
665 // insert
666 output[he]=count;
667 free(line);
668 }
669
670 fclose(fprev);
671
672 return seq;
673 }
674
read_barr(char * barr_arg,map<struct_en,barr_info,comps_entries> & output)675 char *read_barr(char *barr_arg, map<struct_en, barr_info, comps_entries> &output)
676 {
677 char *seq = NULL;
678 FILE *fbarr;
679 fbarr = fopen(barr_arg, "r");
680 if (fbarr == NULL) {
681 fprintf(stderr, "Cannot open file \"%s\".\n", barr_arg);
682 exit(EXIT_FAILURE);
683 }
684 char *line = my_getline(fbarr);
685 char *p = strtok(line, " \t\n");
686 while (p) {
687 if (isSeq(p)) {
688 seq = (char*)malloc((strlen(p)+1)*sizeof(char));
689 strcpy(seq, p);
690 break;
691 }
692 p = strtok(NULL, " \t\n");
693 }
694 free(line);
695
696 if (seq == NULL) {
697 fprintf(stderr, "Couldn't find sequence on first line of file \"%s\"\n", barr_arg);
698 fclose(fbarr);
699 exit(EXIT_FAILURE);
700 }
701 // read previously found minima:
702 while ((line = my_getline(fbarr))) {
703 p = strtok(line, " \t\n");
704
705 struct_en he;
706 he.structure = NULL;
707 he.energy = INT_MAX;
708
709 // read the stuff
710 int num;
711 sscanf(p, "%d", &num);
712 p = strtok(NULL, " \t\n");
713 if (p && isStruct(p)) {
714 he.structure = make_pair_table_PK(p);
715 }
716 p = strtok(NULL, " \t\n");
717 if (p && he.structure && he.energy==INT_MAX) {
718 float en;
719 if (sscanf(p, "%f", &en)==1) {
720 he.energy = en_fltoi(en);
721 }
722 }
723 barr_info bi;
724 int i =0;
725 float en;
726 while (p && he.structure && he.energy!=INT_MAX) {
727 p = strtok(NULL, " \t\n");
728 i++;
729 switch (i){
730 case 1: sscanf(p, "%d", &bi.father); break;
731 case 2: sscanf(p, "%f", &en); bi.e_diff = en_fltoi(en); break;
732 case 3: sscanf(p, "%d", &bi.bsize); break;
733 case 4: sscanf(p, "%d", &bi.fbsize); break;
734 case 5: sscanf(p, "%f", &bi.fen); break;
735 case 6: sscanf(p, "%d", &bi.grad); break;
736 case 7: sscanf(p, "%f", &bi.feng); break;
737 }
738 }
739
740 // try to move it:
741 SeqInfo sqi;
742 sqi.Init(seq);
743 he.energy = Opt.pknots ? energy_of_struct_pk(seq, he.structure, sqi.s0, sqi.s1, 0): energy_of_structure_pt(seq, he.structure, sqi.s0, sqi.s1, 0);
744 int last_en = he.energy;
745 move_set(he, sqi);
746 /*
747 //fprintf(stderr, "%f\n", he.energy);
748
749 while ((Opt.rand? move_rand(he) : move_set(he))!=0) {
750 Deg.Clear();
751 }
752 Deg.Clear();*/
753
754 // print changes:
755 if (last_en != he.energy) {
756 fprintf(stderr, "%6.2f -> %6.2f %s\n", last_en/100.0, he.energy/100.0, pt_to_str_pk(he.structure).c_str());
757 }
758
759 // if we have it already:
760 map<struct_en, barr_info, comps_entries>::iterator it;
761 if ((it = output.find(he))!=output.end()) {
762 it->second = it->second + bi;
763 it->second.e_diff += last_en - he.energy;
764 free(he.structure);
765 } else {
766 output[he] = bi;
767 }
768
769 free(line);
770 }
771
772 fclose(fbarr);
773 return seq;
774 }
775
read_seq(char * seq_arg,char ** name_out)776 char *read_seq(char *seq_arg, char **name_out)
777 {
778 char *name = NULL;
779 char *seq = NULL;
780 bool have_seq = false;
781 // read sequence
782 FILE *fseq;
783 fseq = fopen(seq_arg, "r");
784 if (fseq == NULL) {
785 fprintf(stderr, "WARNING: Cannot open file \"%s\".\n", seq_arg);
786 // try to read it from 1st line of input:
787 seq = my_getline(stdin);
788 int j = 0;
789 for (unsigned int i=0; i<strlen(seq); i++) {
790 if (seq[i]=='A' || seq[i]=='C' || seq[i]=='T' || seq[i]=='G' || seq[i]=='a' || seq[i]=='c' || seq[i]=='t' || seq[i]=='g' || seq[i]=='U' || seq[i]=='u') seq[j++] = seq[i];
791 }
792 seq[j]='\0';
793 seq = (char*)realloc(seq, sizeof(char)*(j+1));
794 if (j<5) {
795 fprintf(stderr, "ERROR: Sequence not found in input nor in \"%s\" file.\n", seq_arg);
796 exit(EXIT_FAILURE);
797 } else {
798 have_seq = true;
799 }
800 }
801 if (!have_seq) {
802 name = my_getline(fseq);
803 if (name == NULL) {
804 fprintf(stderr, "ERROR: File \"%s\" empty.\n", seq_arg);
805 fclose(fseq);
806 exit(EXIT_FAILURE);
807 }
808 seq = my_getline(fseq);
809 if (seq == NULL || name[0]!='>') {
810 //fprintf(stderr, "WARNING: File \"%s\" not in FASTA format. Using it as a sequence.\n", seq_arg);
811 if (seq!=NULL) free(seq);
812 seq = name;
813 name = NULL;
814 }
815 // seq on more lines??
816 char *seq2;
817 while ((seq2=my_getline(fseq))!=NULL && isSeq(seq2)) {
818 seq = (char*) realloc(seq, sizeof(char)*(strlen(seq)+strlen(seq2)+1));
819 strcpy(seq+strlen(seq), seq2);
820 free(seq2);
821 //fprintf(stderr, "%s %d\n", seq, (int)strlen(seq));
822 }
823
824 fclose(fseq);
825 if (name) (*name_out) = name;
826 }
827
828 // covert seq's T's to U's
829 for (int i=0; i<(int)strlen(seq); i++) {
830 if (seq[i]=='T') seq[i]='U';
831 if (seq[i]==' ') seq[i]='\0';
832 }
833 return seq;
834 }
835
836
move(unordered_map<struct_en,gw_struct,hash_fncts,hash_eq> & structs,map<struct_en,int,comps_entries> & output,set<struct_en,comps_entries> & output_shallow,SeqInfo & sqi,bool pure_output)837 int move(unordered_map<struct_en, gw_struct, hash_fncts, hash_eq> &structs, map<struct_en, int, comps_entries> &output, set<struct_en, comps_entries> &output_shallow, SeqInfo &sqi, bool pure_output)
838 {
839 // read a line
840 char *line = my_getline(stdin);
841 if (line == NULL) return -1;
842 if (line[0]=='>') {
843 free(line);
844 return 0;
845 }
846
847 float energy=1e10;
848
849 // process lines
850 char *p = line;
851 char *sep = " \t\n";
852 char *temp = NULL;
853
854 bool struct_found = false;
855 bool energy_found = false;
856
857 // read the structure
858 p = strtok(line, sep);
859 while(p!=NULL && !(struct_found && energy_found)) {
860 //fprintf(stderr, "%s\n", p);
861 if (isStruct(p)) {
862 if (struct_found) fprintf(stderr, "WARNING: On line \"%s\" two structure-like strings found!\n", line);
863 else {
864 temp = p;
865 }
866 struct_found = true;
867 } else {
868 if (isEnergy(p, energy)) {
869 energy_found = true;
870 }
871 }
872
873 p = strtok(NULL, sep);
874 }
875 p = temp;
876 if (!struct_found) {
877 fprintf(stderr, "WARNING: On line \"%s\" no structure-like string found!\n", line);
878 free(line);
879 return 0;
880 }
881
882 // count moves
883 num_moves++;
884
885 // find length of structure
886 int len=0;
887 while (p[len]!='\0' && p[len]!=' ') len++;
888
889 if (len!=seq_len) {
890 fprintf(stderr, "WARNING: Unequal lengths:\n(structure) %s\n (sequence) %s\n", p, sqi.seq);
891 free(line);
892 return -0;
893 }
894
895 // make make_pair
896 struct_en str;
897 str.structure = Opt.pknots? make_pair_table_PK(p):make_pair_table(p);
898
899 // only H,K,L,M types allowed:
900 if (!str.structure) {
901 free(line);
902 return 0;
903 } else {
904 str.energy = Opt.pknots? energy_of_struct_pk(sqi.seq, str.structure, sqi.s0, sqi.s1, Opt.verbose_lvl>3):energy_of_structure_pt(sqi.seq, str.structure, sqi.s0, sqi.s1, 0);
905 free(line);
906 }
907
908 // if pure, just do descend and print it:
909 if (pure_output) {
910 //is it canonical (noLP)
911 if (Opt.noLP && find_lone_pair(str.structure)!=-1) {
912 if (Opt.verbose_lvl>0) fprintf(stderr, "WARNING: structure \"%s\" has lone pairs, skipping...\n", pt_to_str_pk(str.structure).c_str());
913 free(str.structure);
914 return -2;
915 }
916
917 //debugging
918 if (Opt.verbose_lvl>1) fprintf(stderr, "proc(pure): %d %s\n", num_moves, pt_to_str_pk(str.structure).c_str());
919
920 // descend
921 int gw_length = move_set(str, sqi);
922 // only some types of PK allowed!!!
923 if (Opt.pknots && str.energy == INT_MAX) {
924 free(str.structure);
925 return 0;
926 }
927
928 if (Opt.verbose_lvl>2) fprintf(stderr, "\n %s %d %d\n", pt_to_str_pk(str.structure).c_str(), str.energy, gw_length);
929 printf("%s %6.2f %4d\n", pt_to_str_pk(str.structure).c_str(), str.energy/100.0, gw_length);
930 free(str.structure);
931 return 1;
932 }
933
934 // check if it was before
935 unordered_map<struct_en, gw_struct, hash_fncts, hash_eq>::iterator it_s = structs.find(str);
936
937 // if it was - release memory + get another
938 if (it_s != structs.end()) {
939 it_s->second.count++;
940 free(str.structure);
941 return 0;
942 } else {
943 // find energy only if not in input (not working - does energy_of_move require energy_of_struct run first???)
944 //str.energy = Enc.Energy(str);
945 //printf("\n%s\n%s %7.2f\n\n", Enc.seq, pt_to_str_pk(str.structure).c_str(), str.energy/100.0);
946 /*if (1 || !energy_found) str.energy = Enc.Energy(str);
947 else str.energy = (int)(energy*100.0+(energy<0.0 ? -0.5 : 0.5));*/
948
949 // allegiance hack:
950 struct_en he_str = str;
951 if (allegiance) {
952 structures.push_back(he_str);
953 }
954
955 //is it canonical (noLP)
956 if (Opt.noLP && find_lone_pair(str.structure)!=-1) {
957 if (Opt.verbose_lvl>0) fprintf(stderr, "WARNING: structure \"%s\" has lone pairs, skipping...\n", pt_to_str_pk(str.structure).c_str());
958 free(str.structure);
959 return -2;
960 }
961
962 // copy it anew
963 struct_en old = str;
964 str.structure = allocopy(str.structure);
965
966 //debugging
967 if (Opt.verbose_lvl>1) fprintf(stderr, "processing: %d %s\n", num_moves, pt_to_str_pk(str.structure).c_str());
968
969 // descend
970 move_set(str, sqi);
971 // only some types of PK allowed!!!
972 if (Opt.pknots && str.energy == INT_MAX) {
973 free(str.structure);
974 free(old.structure);
975 return 0;
976 }
977
978 // insert into hash (memory is here only on left side)
979 gw_struct &lm = structs[old];
980 lm.count = 1;
981 /*
982 int i;
983 while ((i = (Opt.rand? move_rand(str) : move_set(str)))!=0) {
984 Deg.Clear();
985 }
986 Deg.Clear();*/
987
988 if (Opt.verbose_lvl>2) fprintf(stderr, "\n %s %d\n", pt_to_str_pk(str.structure).c_str(), str.energy);
989
990 // save for output
991 map<struct_en, int, comps_entries>::iterator it;
992 if ((it = output.find(str)) != output.end()) {
993 it->second++;
994 lm.he = it->first;
995 free(str.structure);
996 // allegiance hack:
997 if (allegiance) str_to_LM[he_str] = it->first;
998 } else {
999 //str.num = output.size();
1000 lm.he = str;
1001 output.insert(make_pair(str, 1));
1002 // allegiance hack:
1003 if (allegiance) str_to_LM[he_str] = str;
1004 }
1005 }
1006
1007 return 1;
1008 }
1009