1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <assert.h>
5 
6 #include <unordered_map>
7 #include <stack>
8 #include <algorithm>
9 #include <vector>
10 #include <ctime>
11 
12 extern "C" {
13   #include "pair_mat.h"
14   #include "fold.h"
15   #include "loop_energies.h"
16 
17   #include "move_set_inside.h"
18 }
19 
20 #include "pknots.h"
21 
22 static float time_eos = 0.0;
23 static paramT *P = NULL;
24 
freeP()25 void freeP()
26 {
27   if (P) free(P);
28   P = NULL;
29 }
30 
get_eos_time()31 float get_eos_time()
32 {
33   return time_eos;
34 }
35 
36 // ################ issue: TODO GU pair on end adds 0.5 penalty....
37 
Ext_Loop(int p,int q,short * s0,paramT * P)38 inline int Ext_Loop(int p, int q, short *s0, paramT *P)
39 {
40   int energy = E_ExtLoop(pair[s0[p]][s0[q]], -1, -1, P);
41   return energy;
42 }
43 
M_Loop(int p,int q,short * s0,paramT * P)44 inline int M_Loop(int p, int q, short *s0, paramT *P)
45 {
46   int energy = E_MLstem(pair[s0[p]][s0[q]], -1, -1, P);
47   return energy;
48 }
49 
50 using namespace std;
51 /*
52 void copy_arr(short *dest, const short *src)
53 {
54   if (!src || !dest) {
55     fprintf(stderr, "Empty pointer in copying\n");
56     return;
57   }
58   memcpy(dest, src, sizeof(short)*(src[0]+1));
59 }
60 
61 short *allocopy(const short *src)
62 {
63   short *res = (short*) malloc(sizeof(short)*(src[0]+1));
64   copy_arr(res, src);
65   return res;
66 }
67 
68 char *allocopy(const char *src)
69 {
70   char *res = (char*) malloc(sizeof(char)*(strlen(src)+1));
71   strcpy(res, src);
72   return res;
73 }*/
74 
make_pair_table_PK(const char * str)75 short *make_pair_table_PK(const char *str)
76 {
77   const int maxp = 4;
78  /* const char ptl[] = {"([{<"};
79   const char ptr[] = {")]}>"};*/
80 
81   // stack of end points of each parenthesis
82   vector<vector<int> > pars(maxp);
83 
84   int length = strlen(str);
85   short *structure = (short*) malloc(sizeof(short)*(length+1));
86   for (int i=1; i<=length; i++) {
87     switch (str[i-1]) {
88     case '.': structure[i] = 0; break;
89     case '(': pars[0].push_back(i); break;
90     case ')': structure[i] = pars[0].back(); structure[pars[0].back()] = i; pars[0].pop_back(); break;
91     case '[': pars[1].push_back(i); break;
92     case ']': structure[i] = pars[1].back(); structure[pars[1].back()] = i; pars[1].pop_back(); break;
93     case '{': pars[2].push_back(i); break;
94     case '}': structure[i] = pars[2].back(); structure[pars[2].back()] = i; pars[2].pop_back(); break;
95     case '<': pars[3].push_back(i); break;
96     case '>': structure[i] = pars[3].back(); structure[pars[3].back()] = i; pars[3].pop_back(); break;
97     }
98   }
99 
100   pars.clear();
101 
102   structure[0] = length;
103 
104   return structure;
105 }
106 
pt_to_str_pk(const short * str)107 string pt_to_str_pk(const short *str)
108 {
109   // we can do with 4 types now:
110   const int maxp = 4;
111   const char ptl[] = {"([{<"};
112   const char ptr[] = {")]}>"};
113 
114   // result:
115   string res;
116   res.reserve(str[0]);
117 
118   // stack of end points of each parenthesis
119   vector<stack<int> > pars(maxp);
120 
121   // temprary structure
122   vector<int> tmp(str[0]+1, 0);
123 
124   // precomputation
125   for (int i=1; i<=str[0]; i++) {
126     if (str[i]>i) {
127       int j=0;
128       //fprintf(stderr, "%d %d \n", (int)pars[j].size(), pars[j].empty());
129       while (j<maxp && !pars[j].empty() && pars[j].top()<str[i]) {
130         j++;
131       }
132       if (j==maxp) {
133         fprintf(stderr, "Cannot print it with %d types of parentheses!!!\n", maxp);
134         throw;
135         return res;
136       } else {
137         // jth parenthesis:
138         pars[j].push(str[i]);
139         tmp[i] = j;
140         tmp[str[i]] = j;
141       }
142     } else if (str[i]>0 && str[i]<i) {
143       pars[tmp[i]].pop();
144     }
145   }
146 
147   // filling the result:
148   for (int i=1; i<=str[0]; i++) {
149     if (str[i]==0) res += '.';
150     else if (str[i]>i) res += ptl[tmp[i]];
151     else res += ptr[tmp[i]];
152   }
153   res+='\0';
154 
155   return res;
156 }
157 
pt_to_chars_pk(const short * str)158 char* pt_to_chars_pk(const short *str)
159 {
160   // we can do with 4 types now:
161   const int maxp = 4;
162   const char ptl[] = {"([{<"};
163   const char ptr[] = {")]}>"};
164 
165   // result:
166   char *res = (char*) malloc((str[0]+1)*sizeof(char));
167 
168   // stack of end points of each parenthesis
169   vector<stack<int> > pars(maxp);
170 
171   // temprary structure
172   vector<int> tmp(str[0]+1, 0);
173 
174   // precomputation
175   for (int i=1; i<=str[0]; i++) {
176     if (str[i]>i) {
177       int j=0;
178       //fprintf(stderr, "%d %d \n", (int)pars[j].size(), pars[j].empty());
179       while (j<maxp && !pars[j].empty() && pars[j].top()<str[i]) {
180         j++;
181       }
182       if (j==maxp) {
183         fprintf(stderr, "Cannot print it with %d types of parentheses!!!\n", maxp);
184         free(res);
185         return NULL;
186       } else {
187         // jth parenthesis:
188         pars[j].push(str[i]);
189         tmp[i] = j;
190         tmp[str[i]] = j;
191       }
192     } else if (str[i]>0 && str[i]<i) {
193       pars[tmp[i]].pop();
194     }
195   }
196 
197   // filling the result:
198   for (int i=1; i<=str[0]; i++) {
199     if (str[i]==0) res[i-1] = '.';
200     else if (str[i]>i) res[i-1] = ptl[tmp[i]];
201     else res[i-1] = ptr[tmp[i]];
202   }
203   res[str[0]] ='\0';
204 
205   return res;
206 }
pt_to_chars_pk(const short * str,char * dest)207 void pt_to_chars_pk(const short *str, char *dest)
208 {
209   // we can do with 4 types now:
210   const int maxp = 4;
211   const char ptl[] = {"([{<"};
212   const char ptr[] = {")]}>"};
213 
214   // stack of end points of each parenthesis
215   vector<stack<int> > pars(maxp);
216 
217   // temprary structure
218   vector<int> tmp(str[0]+1, 0);
219 
220   // precomputation
221   for (int i=1; i<=str[0]; i++) {
222     if (str[i]>i) {
223       int j=0;
224       //fprintf(stderr, "%d %d \n", (int)pars[j].size(), pars[j].empty());
225       while (j<maxp && !pars[j].empty() && pars[j].top()<str[i]) {
226         j++;
227       }
228       if (j==maxp) {
229         fprintf(stderr, "Cannot print it with %d types of parentheses!!!\n", maxp);
230         return ;
231       } else {
232         // jth parenthesis:
233         pars[j].push(str[i]);
234         tmp[i] = j;
235         tmp[str[i]] = j;
236       }
237     } else if (str[i]>0 && str[i]<i) {
238       pars[tmp[i]].pop();
239     }
240   }
241 
242   // filling the result:
243   for (int i=1; i<=str[0]; i++) {
244     if (str[i]==0) dest[i-1] = '.';
245     else if (str[i]>i) dest[i-1] = ptl[tmp[i]];
246     else dest[i-1] = ptr[tmp[i]];
247   }
248   dest[str[0]] ='\0';
249 }
250 
loop_energy_pk(int begin,short * str,short * s0,short * s1,paramT * P,bool & multiloop)251 int loop_energy_pk(int begin, short *str, short *s0, short *s1, paramT *P, bool &multiloop) {
252 
253   int end = str[begin];
254   if (begin==0) end++;
255   int alone = end - begin - 1;
256   int energy = 0;
257   int alone_upto = begin;
258   int loops = 0;
259 
260   //fprintf(stderr, "le_pk %d %d\n", begin, multiloop);
261 
262   for (int i=begin+1; i<end; i++) {
263     ///TODO dangles == 0
264     if (str[i]>i && str[i]>alone_upto && str[i]<end) { // '(' ,but not nested '('
265       if (multiloop) {
266         energy += M_Loop(i, str[i], s0, P);
267         loops ++;
268         // and subtract alone positions
269         if (alone_upto<i) {
270           alone -= str[i] - i +1;
271         } else {
272           alone -= str[i] - alone_upto;
273         }
274         alone_upto = str[i];
275       } else {
276         energy += Ext_Loop(i, str[i], s0, P);
277         //fprintf(stderr, "le_pk EXT %d %d %d\n", i, str[i], energy);
278         alone_upto = str[i];
279       }
280 
281       //if (tree->debug) fprintf(stderr, "eval EN: %3d %3d =>  %7.2f  (%7.2f -> %7.2f)\n", it->second->begin, it->second->end, (energy - old_energy)/100.0, old_energy/100.0, energy/100.0);
282     }
283   }
284 
285   // case that we don't have a multiloop
286   if (loops == 1 && multiloop) {
287     multiloop = false;
288     return loop_energy(str, s0, s1, begin);
289   }
290 
291   if (multiloop) {
292     energy += M_Loop(begin, end, s0, P);
293     energy += P->MLclosing;
294     /* logarithmic ML loop energy if logML */
295     if (logML && (alone>6)) {
296       energy += 6*P->MLbase+(int)(P->lxc*log((double)alone/6.));
297     } else {
298       energy += (alone*P->MLbase);
299     }
300   }
301 
302   return energy;
303 }
304 
305 struct LE_ret {
306   int energy;
307   int ending;  // my ending - only the end of the simple bp
308   int pk_ending; // ending of whole pknot
309   int pk_loops; // number of pknot loops
310 };
311 
try_pk()312 void try_pk() {
313   energy_of_struct_pk("CCCCCCCCCCGGCCCCGGGGGGGGG",
314                       "(((..((...)).[[[)))...]]]", 4);
315 }
316 
317 // checks if we are inside of pknot or inside something else (return true if inside pknot)
OKStack(vector<int> & stack,int pk[4])318 bool OKStack(vector<int> &stack, int pk[4]) {
319   if (stack.size() == 0) return true;
320   int data = stack.back();
321   return data==pk[0] || data==pk[1] || data==pk[2] || data==pk[3];
322 }
323 
OKStacks(vector<int> stacks[3],int pk[4])324 bool OKStacks(vector<int> stacks[3], int pk[4]) {
325   return OKStack(stacks[0], pk) && OKStack(stacks[1], pk) && OKStack(stacks[2], pk);
326 }
327 
loop_energy_rec(int i,short * str,short * s0,short * s1,paramT * P,Helpers & hlps,int encircling=0)328 LE_ret loop_energy_rec(int i, short *str, short *s0, short *s1, paramT *P, Helpers &hlps, int encircling = 0)
329 {
330   // first find the type and count alone points:
331   int begin = i;
332   int true_begin = begin;
333   int bpairs = 0;
334   int last_in_begin = hlps.last_open;
335   hlps.last_open = i;
336 
337   hlps.str_toleft[i] = i;
338   hlps.str_torght[i] = i;
339 
340   // for pk counting
341   int opening = 1;
342   int max_op = 1;
343 
344   int last_pk = 0;
345   int pk[4] = {i,0,0,0};
346   int pk_outer[4] = {i,0,0,0};
347 
348   int reg_inloops = 0;
349   int next_i;
350 
351   //int ending = str[begin];
352   i++;
353 
354   #define END() str[pk_outer[last_pk]]
355 
356   LE_ret ret;
357   ret.energy = 0;
358   ret.ending = str[begin];
359   ret.pk_loops = 1;
360   ret.pk_ending = ret.ending;
361 
362   // debug:
363   //fprintf(stderr, "%4d called loop_energy_rec %s\n", begin, pt_to_str_pk(str).c_str());
364 
365   // if we have done it before:
366   /*if (0 && str_type[i]!=ROOT) {
367     ret.ending = ending;
368     ret.pknot = str_type[i]>=P_H?true:false;
369     ret.energy = str_energy[i];
370     return ret;
371   }*/
372 
373   // and start crawling to the right:
374   while (i < END()) {
375     next_i = i;
376     if (str[i] == 0) {
377     } else
378     if (str[i] > i) {
379       if (str[i] < END()) { // regular inloop - skip it
380         reg_inloops++;
381         if (last_pk>0 && (i < str[pk[last_pk-1]] && str[i] > str[pk[last_pk-1]])) { // we had wrong pk
382           bool multiloop = true;
383           int energy_tmp = loop_energy_pk(pk[last_pk], str, s0, s1, P, multiloop);
384           hlps.str_type[pk[last_pk]] = multiloop?N_M:N_S;
385           hlps.str_energy[pk[last_pk]] = energy_tmp;
386 
387           ret.pk_ending = max(ret.pk_ending, (int)str[pk[last_pk]]);
388           //ending = str[pk[last_pk]];
389           ret.energy += energy_tmp;
390           pk[last_pk] = i;
391         } else { // do everything inbetween i, str[i] (+ maybe more)
392           LE_ret ret_tmp = loop_energy_rec(i, str, s0, s1, P, hlps, max(encircling, ret.ending));
393           next_i = ret_tmp.pk_ending;
394           bpairs++;
395           // if we are encircling a pseudoknot, we have to add 2+ loops:
396           if (ret_tmp.pk_loops > 1 && ret_tmp.pk_ending<ret.ending) {
397             reg_inloops += ret_tmp.pk_loops-1;
398           } else { // or we are extending a pknot, then we have to keep the pknot info for a multiloop above.
399             ret.pk_ending = max(ret.pk_ending, ret_tmp.pk_ending);
400             ret.pk_loops = max(ret.pk_loops, ret_tmp.pk_loops);
401           }
402           ret.energy += ret_tmp.energy;
403         }
404 
405       } else if (str[i] > END()) { // now we know we are in pknot / we found another part of pknot
406         hlps.str_torght[hlps.last_open] = hlps.last_open;
407         hlps.last_open = 0;
408         // pknot identifying
409         last_pk++;
410         opening++;
411         if (max_op < opening) max_op = opening;
412         pk[last_pk] = i;
413         pk_outer[last_pk] = i;
414         ret.pk_ending = max(ret.pk_ending, (int)str[i]);
415       }
416       // now we can do the left & right stacking:
417       if (hlps.last_open != 0) {
418         hlps.str_toleft[i] = hlps.last_open;
419         hlps.str_torght[hlps.last_open] = i;
420       } else {
421         hlps.str_toleft[i] = i;
422         hlps.str_torght[i] = i;
423       }
424 
425       hlps.last_open = i;
426     } else
427     if (str[i] < i) { // should happen only in pknot:
428       hlps.str_torght[hlps.last_open] = hlps.last_open;
429       hlps.last_open = 0;
430       for (int j=0; j<last_pk; j++) if (str[i]==pk[j]) {
431         opening--;
432         break;
433       }
434       if (str[i]<true_begin) true_begin = str[i];
435     }
436     i = next_i +1;
437   }
438 
439   hlps.last_open = last_in_begin;
440   int energy;
441 
442   // return value 3 options:
443   BPAIR_TYPE type = P_H;
444   if (last_pk == 0) {
445     if (reg_inloops <= 1) {
446       energy = loop_energy(str, s0, s1, begin);
447       ret.energy += energy;
448       type = N_S;
449     } else {
450       bool multiloop = true;
451       energy = loop_energy_pk(begin, str, s0, s1, P, multiloop);
452       ret.energy += energy;
453       type = N_M;
454     }
455 
456   } else {
457 
458     if (last_pk==3) type = P_M;
459     if (last_pk==2 && max_op==2) type = P_K;
460     if (last_pk==2 && max_op==3) type = P_L;
461     if (encircling > ret.pk_ending) {
462       energy = beta1mp_pen[type];
463     } else energy = beta1_pen[type];
464 
465     // compute alone:
466     int alone = 0;
467     vector<int> stacks[3];
468     for (int j=true_begin; j<=ret.pk_ending; j++) {
469       if (str[j] == 0) {
470         if (OKStacks(stacks, pk)) {
471           alone++;
472         }
473       } else if (str[j] > j) {
474         int k=0;
475         while (stacks[k].size() > 0 && str[j]>str[stacks[k].back()]) k++;
476         stacks[k].push_back(j);
477       } else if (str[j] < j) {
478         int k=0;
479         while (k<3 && (stacks[k].size() == 0 || str[j] != stacks[k].back())) k++;
480         if (k<3) stacks[k].pop_back();
481       }
482     }
483     hlps.beta1 = energy;
484     energy += beta2_pen[type]*(bpairs+last_pk+1) + beta3_pen[type]*(alone);
485     hlps.beta2 = bpairs+last_pk+1;
486     hlps.beta3 = alone;
487 
488     // weird 0.1:
489     if (type == P_L || type == P_K || type == P_M) energy -= 10;
490 
491     ret.energy += energy;
492     ret.pk_loops = max(last_pk+1, ret.pk_loops);
493   }
494 
495   // assign a type:
496   for (int j=0; j<=last_pk; j++) {
497     hlps.str_type[pk[j]] = type;
498   }
499   hlps.str_energy[pk[0]] = energy;
500 
501   //fprintf(stderr, "%4d  ended loop_energy_rec\n", begin);
502 
503   return ret;
504 }
505 
Helpers(int length)506 Helpers::Helpers(int length)
507 {
508   Create(length);
509 }
510 
Create(int length)511 void Helpers::Create(int length)
512 {
513   str_energy.resize(length + 1, 0);
514   str_type.resize(length+1, ROOT);
515   str_toleft.resize(length+1, 0);
516   str_torght.resize(length+1, 0);
517   beta1 = beta2 = beta3 = last_open = 0;
518 }
519 
energy_of_struct_pk(const char * seq,char * structure,int verbose)520 int energy_of_struct_pk(const char *seq, char *structure, int verbose)
521 {
522   if (P == NULL) {
523     make_pair_matrix();
524     update_fold_params();
525     P = scale_parameters();
526   }
527   short *str = make_pair_table_PK(structure);
528   int res = energy_of_struct_pk(seq, str, verbose);
529   free(str);
530   return res;
531 }
532 
energy_of_struct_pk(const char * seq,short * structure,int verbose)533 int energy_of_struct_pk(const char *seq, short *structure, int verbose)
534 {
535   if (P == NULL) {
536     make_pair_matrix();
537     update_fold_params();
538     P = scale_parameters();
539   }
540   short *s0 = encode_sequence(seq, 0);
541   short *s1 = encode_sequence(seq, 1);
542 
543   int res = energy_of_struct_pk(seq, structure, s0, s1, verbose);
544 
545   free(s1);
546   free(s0);
547 
548   return res;
549 }
550 
energy_of_struct_pk(const char * seq,short * structure,short * s0,short * s1,int verbose)551 int energy_of_struct_pk(const char *seq, short *structure, short *s0, short *s1, int verbose)
552 {
553   clock_t time = clock();
554 
555   if (P == NULL) {
556     make_pair_matrix();
557     update_fold_params();
558     P = scale_parameters();
559   }
560   short *str = structure;
561 
562   // some debug/helper arrays:
563   Helpers hlps(str[0]);
564 
565   int energy = 0;
566 
567   // go through the structure:
568   for (int i=1; i<=str[0]; i++) {
569     if (str[i]>i && hlps.str_type[i]==ROOT) {  //'('
570       // found new loop, evaluate the energy
571       LE_ret ret = loop_energy_rec(i, str, s0, s1, P, hlps);
572       i = ret.ending;
573       energy += ret.energy;
574       // just verbose
575       if (verbose && hlps.beta1 != 0) {
576         fprintf(stderr, "found pknot: %4d %4d %4d\n", hlps.beta1, hlps.beta2, hlps.beta3);
577       }
578     }
579   }
580 
581   //add the energy of external loop
582   bool multiloop = false;
583   int ext = loop_energy_pk(0, str, s0, s1, P, multiloop);
584   energy += ext;
585 
586   string type;
587   for (int i=1; i<=str[0]; i++) {
588     type += bpair_type_sname[hlps.str_type[i]];
589   }
590 
591   if (verbose) fprintf(stderr, "%s\n%s", seq, pt_to_str_pk(structure).c_str());
592   if (verbose) fprintf(stderr, " %7.2f\n%s\n", energy/100.0, type.c_str());
593 
594   if (verbose) {
595     int summ = ext;
596     fprintf(stderr, "%7.2f EXT\n", ext/100.0);
597     for (int i=1; i<=str[0]; i++) {
598       fprintf(stderr, "%7.2f %5d %5d\n", hlps.str_energy[i]/100.0, hlps.str_toleft[i], hlps.str_torght[i]);
599       summ += hlps.str_energy[i];
600     }
601 
602     fprintf(stderr, "%7.2f summed\n", summ/100.0);
603   }
604 
605   float time_tmp = (clock()-time)/(double)CLOCKS_PER_SEC;
606   //fprintf(stderr, "time_tmp = %10g\n", time_tmp);
607   time_eos += time_tmp;
608 
609   return energy;
610 }
611 
612 
Pseudoknot()613 Pseudoknot::Pseudoknot()
614 {
615   for (int i=0; i<4; i++)
616     for (int j=0; j<4; j++) imat[i][j] = false;
617 
618   size = 0;
619 }
620 /*
621 // constructor
622 Pseudoknot::Pseudoknot(int left, int right, int k)
623 {
624   assert(left>0);
625   if (left > right) swap(left, right);
626   if (k && left > k) swap(left, k);
627   if (k && k > right) swap(k, right);
628 
629   for (int i=0; i<4; i++)
630     for (int j=0; j<4; j++) imat[i][j] = false;
631 
632   int pos_right = k?2:1;
633 
634   parts[0].insert(left);
635   parts[pos_right].insert(right);
636 
637   imat[0][pos_right] = 1;
638 
639   if (k) {
640     parts[1].insert(k);
641     imat[0][1] = 1;
642     imat[1][pos_right] = 1;
643   }
644 }*/
645 
646 // contains some bpair?
Contains(int left)647 bool Pseudoknot::Contains(int left)
648 {
649   return parts[0].count(left) || parts[1].count(left) || parts[2].count(left)|| parts[3].count(left);
650 }
651 
652 /*bool cross(int left, int right, int left2, int right2) {
653   return (left<left2 && right<right2 && left2<right) ||
654          (left>left2 && right>right2 && right2>left);
655 }
656 
657 bool Pseudoknot::Cross(short *str, int left, int right)
658 {
659   int left2 = *parts[0].begin();
660   if (cross(left, right, left2, str[left2])) return true;
661   left2 = *parts[1].begin();
662   if (cross(left, right, left2, str[left2])) return true;
663   if (parts[2].empty()) return false;
664   left2 = *parts[2].begin();
665   if (cross(left, right, left2, str[left2])) return true;
666   if (parts[3].empty()) return false;
667   left2 = *parts[3].begin();
668   if (cross(left, right, left2, str[left2])) return true;
669   return false;
670 }*/
671 
672 
673   // can we insert it inside?
Inside(short * str,int left,int right)674 int Pseudoknot::Inside(short *str, int left, int right)
675 {
676   for (int i=0; i<4; i++) {
677     if (parts[i].empty()) return -1;
678 
679     set<int>::iterator it = parts[i].upper_bound(left);
680 
681     // check if we are inside :   it--(..left[..it( ... it)..right]..it--)
682     if ((it == parts[i].end() || str[*it]<right) &&
683         (it == parts[i].begin() || str[*(--it)]>right)) {
684           return i;
685     }
686   }
687 
688   return -1;
689 }
690 
IsViable(int size,bool imat[4][4])691 bool IsViable(int size, bool imat[4][4]) {
692 /*PH    PK    PL    PM
693   0100  0100  0110  0110
694   0000  0010  0010  0011
695   0000  0000  0000  0001
696   0000  0000  0000  0000*/
697 
698   if (size == 2) return true;
699   if (size == 3) return true;
700   if (size == 4) {
701     return imat[0][1] && imat[0][2] && !imat[0][3] &&
702            imat[1][2] && imat[1][3] &&
703            imat[2][3];
704   }
705   return false;
706 }
707 
Pseudoknot(const Pseudoknot & pknot)708 Pseudoknot::Pseudoknot(const Pseudoknot &pknot)
709 {
710   for (int i=0; i<4; i++) {
711     for (int j=0; j<4; j++) imat[i][j] = pknot.imat[i][j];
712     parts[i] = pknot.parts[i];
713   }
714   size = pknot.size;
715 }
716 
CanInsert(int left,vector<int> & numbers,bool insert)717 bool Pseudoknot::CanInsert(int left, vector<int> &numbers, bool insert)
718 {
719   if (size==4) return false;
720 
721   bool imat2[4][4];
722   int new_pos = 0;
723   while (!parts[new_pos].empty() && *parts[new_pos].begin()<left) new_pos++;
724   for (int i=0; i<size; i++) {
725     for (int j=i+1; j<size; j++) {
726       int ii = i >= new_pos ? i+1:i;
727       int jj = j >= new_pos ? j+1:j;
728       imat2[ii][jj] = imat[i][j];
729     }
730   }
731 
732   for (int i=0; i<size+1; i++) imat2[min(new_pos, i)][max(new_pos, i)] = false;
733   for (unsigned int i=0; i<numbers.size(); i++) {
734     int number = numbers[i]>=new_pos ? numbers[i]+1:numbers[i];
735     imat2[min(new_pos, number)][max(new_pos,number)] = true;
736   }
737 
738   bool viable = IsViable(size+1, imat2);
739 
740   if (viable && insert) {
741     for (int i=3; i>new_pos; i--) {
742       swap(parts[i], parts[i-1]);
743     }
744     parts[new_pos].insert(left);
745     size++;
746 
747     for (int i=0; i<size; i++) {
748       for (int j=i+1; j<size; j++) {
749         imat[i][j] = imat2[i][j];
750       }
751     }
752   }
753 
754   return viable;
755 }
756 
Delete(int left)757 bool Pseudoknot::Delete(int left)
758 {
759   for (int i=0; i<4; i++) {
760   // try to find it
761     set<int>::iterator it = parts[i].find(left);
762     if (it != parts[i].end()) {
763       // delete it
764       parts[i].erase(it);
765       // change type?
766       if (parts[i].empty()) {
767         size--;
768         if (size==1) return true; // cancelled H type
769         if (size==2 && i==1 && !imat[0][2]) { // cancelled K type
770           size--;
771           return true;
772         }
773         for (int j=0; j<size; j++) {
774           int jj = j>=i?j+1:j;
775           for (int k=j+1; k<size; k++) {
776             int kk = k>=i?k+1:k;
777             imat[j][k] = imat[jj][kk];
778           }
779           parts[j] = parts[jj];
780         }
781         parts[size].clear();
782         imat[0][size] = imat[1][size] = imat[2][size] = false;
783         return true;
784       }
785       return true;
786     }
787   }
788 
789   return false;
790 }
791 
operator =(const Pseudoknot & pknot)792 Pseudoknot &Pseudoknot::operator=(const Pseudoknot &pknot)
793 {
794   for (int i=0; i<4; i++) {
795     for (int j=0; j<4; j++) this->imat[i][j] = pknot.imat[i][j];
796     this->parts[i] = pknot.parts[i];
797   }
798   this->size = pknot.size;
799 
800   return *this;
801 }
802 
Structure(int length)803 Structure::Structure(int length)
804 {
805   this->str = (short*) malloc(sizeof(short)*(length+1));
806   for (int i=1; i<=length; i++) this->str[i] = 0;
807   this->str[0] = length;
808 
809   this->energy = 0;
810 }
811 
Structure(short * structure,int energy)812 Structure::Structure(short *structure, int energy)
813 {
814   int length = structure[0];
815   this->str = (short*) malloc(sizeof(short)*(length+1));
816   for (int i=1; i<=length; i++) this->str[i] = 0;
817   this->str[0] = length;
818 
819   // assign all bpairs:
820   for (int i=1; i<=structure[0]; i++) {
821     if (structure[i]>i) ViableInsert(i, structure[i], true);
822   }
823 
824   this->energy = energy;
825 }
826 
Structure(char * structure,int energy)827 Structure::Structure(char *structure, int energy)
828 {
829   int length = strlen(structure);
830   this->str = (short*) malloc(sizeof(short)*(length+1));
831   for (int i=1; i<=length; i++) this->str[i] = 0;
832   this->str[0] = length;
833 
834   // assign all bpairs:
835   short *str_tmp = make_pair_table_PK(structure);
836   for (int i=1; i<=str_tmp[0]; i++) {
837     if (str_tmp[i]>i) ViableInsert(i, str_tmp[i], true);
838   }
839   free(str_tmp);
840 
841   this->energy = energy;
842 }
843 
Structure(const char * seq,short * structure,short * s0,short * s1)844 Structure::Structure(const char *seq, short *structure, short *s0, short *s1)
845 {
846   int length = structure[0];
847   this->str = (short*) malloc(sizeof(short)*(length+1));
848   for (int i=1; i<=length; i++) this->str[i] = 0;
849   this->str[0] = length;
850 
851   // assign all bpairs:
852   for (int i=1; i<=structure[0]; i++) {
853     if (structure[i]>i) ViableInsert(i, structure[i], true);
854   }
855 
856   energy = energy_of_struct_pk(seq, str, s0, s1, 0);
857 }
858 
Structure(const char * seq,char * structure,short * s0,short * s1)859 Structure::Structure(const char *seq, char *structure, short *s0, short *s1)
860 {
861   int length = strlen(structure);
862   this->str = (short*) malloc(sizeof(short)*(length+1));
863   for (int i=1; i<=length; i++) this->str[i] = 0;
864   this->str[0] = length;
865 
866   // assign all bpairs:
867   short *str_tmp = make_pair_table_PK(structure);
868   for (int i=1; i<=str_tmp[0]; i++) {
869     if (str_tmp[i]>i) ViableInsert(i, str_tmp[i], true);
870   }
871   free(str_tmp);
872 
873   energy = energy_of_struct_pk(seq, str, s0, s1, 0);
874 }
875 
Structure(const Structure & second)876 Structure::Structure(const Structure &second)
877 {
878   energy = second.energy;
879   str = allocopy(second.str);
880 
881   pknots = second.pknots;
882 
883   bpair_pknot = second.bpair_pknot;
884 }
885 
~Structure()886 Structure::~Structure()
887 {
888   free(str);
889 }
890 
operator <(const Structure & second) const891 bool const Structure::operator<(const Structure &second) const
892 {
893   if (energy != second.energy) return energy<second.energy;
894   int i=1;
895   char l=0,r=0;
896   while (i<=str[0]) {
897     l = (str[i]==0?'.':(str[i]<str[str[i]]?')':'('));
898     r = (second.str[i]==0?'.':(second.str[i]<second.str[second.str[i]]?')':'('));
899     if (l != r) return l<r;
900     //if (str[i] != second.str[i]) return str[i] > second.str[i];
901     i++;
902   }
903 
904   return false;
905 }
906 
operator ==(const Structure & second) const907 bool const Structure::operator==(const Structure &second) const
908 {
909   if (energy != second.energy) return false;
910   int i=1;
911   while (i<=str[0]) {
912     if (str[i] != second.str[i]) return false;
913     i++;
914   }
915   return true;
916 }
917 
operator =(const Structure & second)918 Structure &Structure::operator=(const Structure &second)
919 {
920   energy = second.energy;
921   copy_arr(str, second.str);
922 
923   pknots = second.pknots;
924 
925   bpair_pknot = second.bpair_pknot;
926 
927   return *this;
928 }
929 
930 // checks if we can add it to the 'cross' vector (if it is compatible with )
CrossOuter(vector<int> & cross,int point,short * str)931 bool CrossOuter(vector<int> &cross, int point, short *str) {
932   if (cross.empty()) return true;
933   int left = cross.back();
934   int right = str[left];
935   if (point<left && str[point]>right) return true;
936   return false;
937 }
938 
CrossInner(vector<int> & cross,int point,short * str)939 bool CrossInner(vector<int> &cross, int point, short *str) {
940   if (cross.empty()) return true;
941   int left = cross.back();
942   int right = str[left];
943   if (point>left && str[point]<right) return true;
944   return false;
945 }
946 
947 // is the bpair viable?
ViableInsert(int left,int right,bool insert)948 INS_FLAG Structure::ViableInsert(int left, int right, bool insert)
949 {
950   if (str[left] || str[right]) return NO_INS;
951 
952   INS_FLAG res = NO_INS;
953 
954   // get crossings:
955   vector<set<int> > crossings;
956   vector<vector<int> > cross_tmp;
957   for (int i=left+1; i<right; i++) { // can be faster:
958     if (str[i] == 0) continue;
959     if (str[i] < left) { // ')'
960       int k=0;
961       if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
962       if ((int)crossings.size()<=k) crossings.resize(k+1);
963       while (!CrossOuter(cross_tmp[k], str[i], str)) {
964         k++;
965         if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
966         if ((int)crossings.size()<=k) crossings.resize(k+1);
967       }
968       cross_tmp[k].push_back(str[i]);
969       crossings[k].insert(str[i]);
970     } else if (str[i] > right) { // '('
971       int k=0;
972       if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
973       if ((int)crossings.size()<=k) crossings.resize(k+1);
974       while (!CrossInner(cross_tmp[k], i, str)) {
975         k++;
976         if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
977         if ((int)crossings.size()<=k) crossings.resize(k+1);
978       }
979       cross_tmp[k].push_back(i);
980       crossings[k].insert(i);
981     }
982   }
983   // sort the sets:
984   sort(crossings.begin(), crossings.end());
985 
986   // find if it crosses a pknot:
987   Pseudoknot *cross_pk = NULL;
988   int cross_index = -1;
989   if (pknots.size()>0) {
990     for (unsigned int i=0; i<crossings.size(); i++) {
991       set<int>::iterator it = crossings[i].begin();
992 
993 
994 
995       int index = bpair_pknot[*it];
996       Pseudoknot *pk_tmp = Pknot_index(index);
997       for (it++; it!=crossings[i].end(); it++) {
998         Pseudoknot *pk_now = Pknot_bpair(*it);
999         if (pk_tmp != pk_now) return NO_INS; // split the stack!
1000       }
1001       if (pk_tmp != NULL) {
1002         if (cross_pk == NULL) {
1003           cross_pk = pk_tmp;
1004           cross_index = index;
1005         } else if (cross_pk != pk_tmp) return NO_INS; // we have found 2 colliding pknots
1006       }
1007     }
1008   }
1009 
1010   /*for (unsigned int i=0; i<pknots.size(); i++) {
1011     if (pknots[i].Cross(str, left, right)) {
1012       if (cross_pk == NULL) cross_pk = &pknots[i];
1013       else return false;
1014     }
1015   }*/
1016 
1017   if (cross_pk) {
1018     // check if we don't split the parts
1019     vector<int> numbers;
1020     for (unsigned int i=0; i<crossings.size(); i++) {
1021       for (int j=0; j<cross_pk->size; j++) {
1022         if (*cross_pk->parts[j].begin() == *crossings[i].begin()) {
1023           if (cross_pk->parts[j] == crossings[i]) {
1024             numbers.push_back(j);
1025             // stack is correct
1026             break;
1027           }
1028           // stack is split
1029           return NO_INS;
1030         }
1031       }
1032       // each crossing we must find otherwise wrong.
1033       if (numbers.size()==i) return NO_INS;
1034     }
1035 
1036     // now we have the numbers which we border (0-3), the rest should be checked if we can insert it in it
1037     int inside = cross_pk->Inside(str, left, right);
1038     if (inside>=0) {
1039       // check if neighbourhood fits  // essentially numbers[x] == Imat(x, inside)
1040       int k=0;
1041       for (int i=0; i<cross_pk->size; i++) {
1042         if (cross_pk->Imat(inside, i)) {
1043           if (k>(int)numbers.size()-1) return NO_INS;
1044           if (numbers[k]==i) k++;
1045           else return NO_INS;
1046         }
1047       }
1048       // lastly check if we do not cross more than we should
1049       if (k != (int)numbers.size()) return NO_INS;
1050 
1051       // neighbourhood fits -> we can insert it in the parts[inside]
1052       if (insert) cross_pk->parts[inside].insert(left);
1053       res = INSIDE_PK;
1054     } else {
1055       // we cannot insert it, but maybe we can change the type:
1056       bool ci = cross_pk->CanInsert(left, numbers, insert);
1057       if (ci) res = CHNG_PK;
1058       else res = NO_INS;
1059     }
1060 
1061   } else {
1062     // now we can insert it since we do not cross any pk.
1063     if (insert) {
1064       switch (crossings.size()) {
1065       case 0:
1066         break;
1067       case 1: {
1068         Pseudoknot pknot;
1069         int cross_first = *crossings[0].begin();
1070         int bpair = (left > cross_first)? 1:0;
1071         pknot.parts[bpair].insert(left);
1072         pknot.parts[bpair==0?1:0] = crossings[0];
1073         pknot.imat[0][1] = true;
1074         pknot.size = 2;
1075         pknots.push_back(pknot);
1076         cross_index = pknots.size()-1;
1077         for (set<int>::iterator it=crossings[0].begin(); it!=crossings[0].end(); it++) bpair_pknot[*it] = pknots.size()-1;
1078         break;
1079         }
1080       case 2: {
1081         Pseudoknot pknot;
1082         int bpair = (left > *crossings[0].begin())? ((left > *crossings[1].begin())? 2:1):0;
1083         pknot.parts[bpair].insert(left);
1084         pknot.parts[bpair<=0?1:0] = crossings[0];
1085         pknot.parts[bpair<=1?2:1] = crossings[1];
1086         pknot.imat[0][1] = true;
1087         pknot.imat[1][2] = true;
1088         pknot.size = 3;
1089         pknots.push_back(pknot);
1090         cross_index = pknots.size()-1;
1091         for (set<int>::iterator it=crossings[0].begin(); it!=crossings[0].end(); it++) bpair_pknot[*it] = pknots.size()-1;
1092         for (set<int>::iterator it=crossings[1].begin(); it!=crossings[1].end(); it++) bpair_pknot[*it] = pknots.size()-1;
1093         break;
1094         }
1095       default: assert(false);
1096       }
1097     }
1098     switch (crossings.size()) {
1099       case 0: res = REG_INS; break;
1100       case 1:
1101       case 2: res = CREATE_PK; break;
1102     }
1103   }
1104 
1105   if (res != NO_INS && insert) {
1106     bpair_pknot[left] = cross_index;
1107     str[left] = right;
1108     str[right] = left;
1109   }
1110   return res;
1111 }
1112 
Delete(int left)1113 bool Structure::Delete(int left)
1114 {
1115   if (left<0) left = -left;
1116   if (!str[left]) return false;
1117 
1118   int right = str[left];
1119 
1120   for (unsigned int i=0; i<pknots.size(); i++) {
1121     if (pknots[i].Delete(left)) {
1122       if (pknots[i].size == 1) {
1123         for (int j=0; j<4; j++) {
1124           for (set<int>::iterator it = pknots[i].parts[j].begin(); it!=pknots[i].parts[j].end(); it++) {
1125             bpair_pknot[*it] = -1;
1126           }
1127         }
1128         // erase the pknot and fix bpair_pknot
1129         pknots.erase(pknots.begin()+i);
1130         for (map<int, int>::iterator it=bpair_pknot.begin(); it!=bpair_pknot.end(); it++) {
1131           if (it->second >= (int)i) it->second--;
1132         }
1133       }
1134       break;
1135     }
1136   }
1137 
1138   bpair_pknot.erase(left);
1139   str[left] = 0;
1140   str[right] = 0;
1141   return true;
1142 }
1143 
UndoMove()1144 int Structure::UndoMove()
1145 {
1146   if (undo_l == 0) {
1147     fprintf(stderr, "ERROR: Undoing non-existent move!\n");
1148     return 0;
1149   }
1150 
1151   int left = undo_l;
1152   int right = undo_r;
1153 
1154   int dif_en = - energy + undo_en;
1155   energy = undo_en;
1156 
1157   // switch?
1158   if (left>0 && right>0 && (str[left]>0 || str[right]>0)) {
1159     Shift(left, right);
1160   } else {
1161     if (left<0) {
1162       Delete(left);
1163     } else {
1164       Insert(left, right);
1165     }
1166   }
1167 
1168   undo_l = 0;
1169   undo_r = 0;
1170   undo_en = 0;
1171 
1172   return dif_en;
1173 }
1174 
MakeMove(const char * seq,short * s0,short * s1,int left,int right)1175 int Structure::MakeMove(const char *seq, short *s0, short *s1, int left, int right)
1176 {
1177   undo_en = energy;
1178   bool change = false;
1179 
1180   // switch?
1181   if (left>0 && right>0 && (str[left]>0 || str[right]>0)) {
1182     if (str[left]>0) {
1183       undo_l = left;
1184       undo_r = str[left];
1185     } else {
1186       undo_r = right;
1187       undo_l = str[right];
1188     }
1189     change = (Shift(left, right) != NO_INS);
1190   } else {
1191     if (left<0) {
1192       undo_l = -left;
1193       undo_r = -right;
1194       change = Delete(left);
1195     } else {
1196       undo_l = -left;
1197       undo_r = -right;
1198       change = (Insert(left, right) != NO_INS);
1199     }
1200   }
1201 
1202   if (change) energy = energy_of_struct_pk(seq, str, s0, s1, 0);
1203   return energy - undo_en;
1204 }
1205 
CanShift(int left,int right)1206 INS_FLAG Structure::CanShift(int left, int right)
1207 {
1208   if ((str[left]>0 && str[right]>0)  ||
1209       (str[left]==0 && str[right]==0)) return NO_INS;
1210 
1211   int left_orig = str[left]>0? left : str[right];
1212   int right_orig = str[right]>0? right : str[left];
1213 
1214   Delete(left_orig);
1215 
1216   INS_FLAG flag = CanInsert(left, right);
1217 
1218   Insert(left_orig, right_orig);
1219 
1220   return flag;
1221 }
1222 
Shift(int left,int right)1223 INS_FLAG Structure::Shift(int left, int right)
1224 {
1225   if ((str[left]>0 && str[right]>0)  ||
1226       (str[left]==0 && str[right]==0)) return NO_INS;
1227 
1228   int left_orig = str[left]>0? left : str[right];
1229   int right_orig = str[right]>0? right : str[left];
1230 
1231   Delete(left_orig);
1232 
1233   INS_FLAG flag = Insert(left, right);
1234 
1235   if (flag==NO_INS) Insert(left_orig, right_orig);
1236 
1237   return flag;
1238 }
1239 
CanInsert(int left,int right)1240 INS_FLAG Structure::CanInsert(int left, int right)
1241 {
1242   return ViableInsert(left, right, false);
1243 }
1244 
Insert(int left,int right)1245 INS_FLAG Structure::Insert(int left, int right)
1246 {
1247   return ViableInsert(left, right, true);
1248 }
1249 
Contains_PK(short * str)1250 int Contains_PK(short *str)
1251 {
1252   stack<int> pairing;
1253   pairing.push(str[0]+1);
1254   for (int i=1; i<=str[0]; i++) {
1255     if (str[i]==0) continue;
1256     if (str[i]>i) { //'('
1257       if (str[i]>pairing.top()) return i;
1258       pairing.push(str[i]);
1259     }
1260     else { //')'
1261       pairing.pop();
1262     }
1263   }
1264   return 0;
1265 }
1266