1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <assert.h>
5
6 #include <unordered_map>
7 #include <stack>
8 #include <algorithm>
9 #include <vector>
10 #include <ctime>
11
12 extern "C" {
13 #include "pair_mat.h"
14 #include "fold.h"
15 #include "loop_energies.h"
16
17 #include "move_set_inside.h"
18 }
19
20 #include "pknots.h"
21
22 static float time_eos = 0.0;
23 static paramT *P = NULL;
24
freeP()25 void freeP()
26 {
27 if (P) free(P);
28 P = NULL;
29 }
30
get_eos_time()31 float get_eos_time()
32 {
33 return time_eos;
34 }
35
36 // ################ issue: TODO GU pair on end adds 0.5 penalty....
37
Ext_Loop(int p,int q,short * s0,paramT * P)38 inline int Ext_Loop(int p, int q, short *s0, paramT *P)
39 {
40 int energy = E_ExtLoop(pair[s0[p]][s0[q]], -1, -1, P);
41 return energy;
42 }
43
M_Loop(int p,int q,short * s0,paramT * P)44 inline int M_Loop(int p, int q, short *s0, paramT *P)
45 {
46 int energy = E_MLstem(pair[s0[p]][s0[q]], -1, -1, P);
47 return energy;
48 }
49
50 using namespace std;
51 /*
52 void copy_arr(short *dest, const short *src)
53 {
54 if (!src || !dest) {
55 fprintf(stderr, "Empty pointer in copying\n");
56 return;
57 }
58 memcpy(dest, src, sizeof(short)*(src[0]+1));
59 }
60
61 short *allocopy(const short *src)
62 {
63 short *res = (short*) malloc(sizeof(short)*(src[0]+1));
64 copy_arr(res, src);
65 return res;
66 }
67
68 char *allocopy(const char *src)
69 {
70 char *res = (char*) malloc(sizeof(char)*(strlen(src)+1));
71 strcpy(res, src);
72 return res;
73 }*/
74
make_pair_table_PK(const char * str)75 short *make_pair_table_PK(const char *str)
76 {
77 const int maxp = 4;
78 /* const char ptl[] = {"([{<"};
79 const char ptr[] = {")]}>"};*/
80
81 // stack of end points of each parenthesis
82 vector<vector<int> > pars(maxp);
83
84 int length = strlen(str);
85 short *structure = (short*) malloc(sizeof(short)*(length+1));
86 for (int i=1; i<=length; i++) {
87 switch (str[i-1]) {
88 case '.': structure[i] = 0; break;
89 case '(': pars[0].push_back(i); break;
90 case ')': structure[i] = pars[0].back(); structure[pars[0].back()] = i; pars[0].pop_back(); break;
91 case '[': pars[1].push_back(i); break;
92 case ']': structure[i] = pars[1].back(); structure[pars[1].back()] = i; pars[1].pop_back(); break;
93 case '{': pars[2].push_back(i); break;
94 case '}': structure[i] = pars[2].back(); structure[pars[2].back()] = i; pars[2].pop_back(); break;
95 case '<': pars[3].push_back(i); break;
96 case '>': structure[i] = pars[3].back(); structure[pars[3].back()] = i; pars[3].pop_back(); break;
97 }
98 }
99
100 pars.clear();
101
102 structure[0] = length;
103
104 return structure;
105 }
106
pt_to_str_pk(const short * str)107 string pt_to_str_pk(const short *str)
108 {
109 // we can do with 4 types now:
110 const int maxp = 4;
111 const char ptl[] = {"([{<"};
112 const char ptr[] = {")]}>"};
113
114 // result:
115 string res;
116 res.reserve(str[0]);
117
118 // stack of end points of each parenthesis
119 vector<stack<int> > pars(maxp);
120
121 // temprary structure
122 vector<int> tmp(str[0]+1, 0);
123
124 // precomputation
125 for (int i=1; i<=str[0]; i++) {
126 if (str[i]>i) {
127 int j=0;
128 //fprintf(stderr, "%d %d \n", (int)pars[j].size(), pars[j].empty());
129 while (j<maxp && !pars[j].empty() && pars[j].top()<str[i]) {
130 j++;
131 }
132 if (j==maxp) {
133 fprintf(stderr, "Cannot print it with %d types of parentheses!!!\n", maxp);
134 throw;
135 return res;
136 } else {
137 // jth parenthesis:
138 pars[j].push(str[i]);
139 tmp[i] = j;
140 tmp[str[i]] = j;
141 }
142 } else if (str[i]>0 && str[i]<i) {
143 pars[tmp[i]].pop();
144 }
145 }
146
147 // filling the result:
148 for (int i=1; i<=str[0]; i++) {
149 if (str[i]==0) res += '.';
150 else if (str[i]>i) res += ptl[tmp[i]];
151 else res += ptr[tmp[i]];
152 }
153 res+='\0';
154
155 return res;
156 }
157
pt_to_chars_pk(const short * str)158 char* pt_to_chars_pk(const short *str)
159 {
160 // we can do with 4 types now:
161 const int maxp = 4;
162 const char ptl[] = {"([{<"};
163 const char ptr[] = {")]}>"};
164
165 // result:
166 char *res = (char*) malloc((str[0]+1)*sizeof(char));
167
168 // stack of end points of each parenthesis
169 vector<stack<int> > pars(maxp);
170
171 // temprary structure
172 vector<int> tmp(str[0]+1, 0);
173
174 // precomputation
175 for (int i=1; i<=str[0]; i++) {
176 if (str[i]>i) {
177 int j=0;
178 //fprintf(stderr, "%d %d \n", (int)pars[j].size(), pars[j].empty());
179 while (j<maxp && !pars[j].empty() && pars[j].top()<str[i]) {
180 j++;
181 }
182 if (j==maxp) {
183 fprintf(stderr, "Cannot print it with %d types of parentheses!!!\n", maxp);
184 free(res);
185 return NULL;
186 } else {
187 // jth parenthesis:
188 pars[j].push(str[i]);
189 tmp[i] = j;
190 tmp[str[i]] = j;
191 }
192 } else if (str[i]>0 && str[i]<i) {
193 pars[tmp[i]].pop();
194 }
195 }
196
197 // filling the result:
198 for (int i=1; i<=str[0]; i++) {
199 if (str[i]==0) res[i-1] = '.';
200 else if (str[i]>i) res[i-1] = ptl[tmp[i]];
201 else res[i-1] = ptr[tmp[i]];
202 }
203 res[str[0]] ='\0';
204
205 return res;
206 }
pt_to_chars_pk(const short * str,char * dest)207 void pt_to_chars_pk(const short *str, char *dest)
208 {
209 // we can do with 4 types now:
210 const int maxp = 4;
211 const char ptl[] = {"([{<"};
212 const char ptr[] = {")]}>"};
213
214 // stack of end points of each parenthesis
215 vector<stack<int> > pars(maxp);
216
217 // temprary structure
218 vector<int> tmp(str[0]+1, 0);
219
220 // precomputation
221 for (int i=1; i<=str[0]; i++) {
222 if (str[i]>i) {
223 int j=0;
224 //fprintf(stderr, "%d %d \n", (int)pars[j].size(), pars[j].empty());
225 while (j<maxp && !pars[j].empty() && pars[j].top()<str[i]) {
226 j++;
227 }
228 if (j==maxp) {
229 fprintf(stderr, "Cannot print it with %d types of parentheses!!!\n", maxp);
230 return ;
231 } else {
232 // jth parenthesis:
233 pars[j].push(str[i]);
234 tmp[i] = j;
235 tmp[str[i]] = j;
236 }
237 } else if (str[i]>0 && str[i]<i) {
238 pars[tmp[i]].pop();
239 }
240 }
241
242 // filling the result:
243 for (int i=1; i<=str[0]; i++) {
244 if (str[i]==0) dest[i-1] = '.';
245 else if (str[i]>i) dest[i-1] = ptl[tmp[i]];
246 else dest[i-1] = ptr[tmp[i]];
247 }
248 dest[str[0]] ='\0';
249 }
250
loop_energy_pk(int begin,short * str,short * s0,short * s1,paramT * P,bool & multiloop)251 int loop_energy_pk(int begin, short *str, short *s0, short *s1, paramT *P, bool &multiloop) {
252
253 int end = str[begin];
254 if (begin==0) end++;
255 int alone = end - begin - 1;
256 int energy = 0;
257 int alone_upto = begin;
258 int loops = 0;
259
260 //fprintf(stderr, "le_pk %d %d\n", begin, multiloop);
261
262 for (int i=begin+1; i<end; i++) {
263 ///TODO dangles == 0
264 if (str[i]>i && str[i]>alone_upto && str[i]<end) { // '(' ,but not nested '('
265 if (multiloop) {
266 energy += M_Loop(i, str[i], s0, P);
267 loops ++;
268 // and subtract alone positions
269 if (alone_upto<i) {
270 alone -= str[i] - i +1;
271 } else {
272 alone -= str[i] - alone_upto;
273 }
274 alone_upto = str[i];
275 } else {
276 energy += Ext_Loop(i, str[i], s0, P);
277 //fprintf(stderr, "le_pk EXT %d %d %d\n", i, str[i], energy);
278 alone_upto = str[i];
279 }
280
281 //if (tree->debug) fprintf(stderr, "eval EN: %3d %3d => %7.2f (%7.2f -> %7.2f)\n", it->second->begin, it->second->end, (energy - old_energy)/100.0, old_energy/100.0, energy/100.0);
282 }
283 }
284
285 // case that we don't have a multiloop
286 if (loops == 1 && multiloop) {
287 multiloop = false;
288 return loop_energy(str, s0, s1, begin);
289 }
290
291 if (multiloop) {
292 energy += M_Loop(begin, end, s0, P);
293 energy += P->MLclosing;
294 /* logarithmic ML loop energy if logML */
295 if (logML && (alone>6)) {
296 energy += 6*P->MLbase+(int)(P->lxc*log((double)alone/6.));
297 } else {
298 energy += (alone*P->MLbase);
299 }
300 }
301
302 return energy;
303 }
304
305 struct LE_ret {
306 int energy;
307 int ending; // my ending - only the end of the simple bp
308 int pk_ending; // ending of whole pknot
309 int pk_loops; // number of pknot loops
310 };
311
try_pk()312 void try_pk() {
313 energy_of_struct_pk("CCCCCCCCCCGGCCCCGGGGGGGGG",
314 "(((..((...)).[[[)))...]]]", 4);
315 }
316
317 // checks if we are inside of pknot or inside something else (return true if inside pknot)
OKStack(vector<int> & stack,int pk[4])318 bool OKStack(vector<int> &stack, int pk[4]) {
319 if (stack.size() == 0) return true;
320 int data = stack.back();
321 return data==pk[0] || data==pk[1] || data==pk[2] || data==pk[3];
322 }
323
OKStacks(vector<int> stacks[3],int pk[4])324 bool OKStacks(vector<int> stacks[3], int pk[4]) {
325 return OKStack(stacks[0], pk) && OKStack(stacks[1], pk) && OKStack(stacks[2], pk);
326 }
327
loop_energy_rec(int i,short * str,short * s0,short * s1,paramT * P,Helpers & hlps,int encircling=0)328 LE_ret loop_energy_rec(int i, short *str, short *s0, short *s1, paramT *P, Helpers &hlps, int encircling = 0)
329 {
330 // first find the type and count alone points:
331 int begin = i;
332 int true_begin = begin;
333 int bpairs = 0;
334 int last_in_begin = hlps.last_open;
335 hlps.last_open = i;
336
337 hlps.str_toleft[i] = i;
338 hlps.str_torght[i] = i;
339
340 // for pk counting
341 int opening = 1;
342 int max_op = 1;
343
344 int last_pk = 0;
345 int pk[4] = {i,0,0,0};
346 int pk_outer[4] = {i,0,0,0};
347
348 int reg_inloops = 0;
349 int next_i;
350
351 //int ending = str[begin];
352 i++;
353
354 #define END() str[pk_outer[last_pk]]
355
356 LE_ret ret;
357 ret.energy = 0;
358 ret.ending = str[begin];
359 ret.pk_loops = 1;
360 ret.pk_ending = ret.ending;
361
362 // debug:
363 //fprintf(stderr, "%4d called loop_energy_rec %s\n", begin, pt_to_str_pk(str).c_str());
364
365 // if we have done it before:
366 /*if (0 && str_type[i]!=ROOT) {
367 ret.ending = ending;
368 ret.pknot = str_type[i]>=P_H?true:false;
369 ret.energy = str_energy[i];
370 return ret;
371 }*/
372
373 // and start crawling to the right:
374 while (i < END()) {
375 next_i = i;
376 if (str[i] == 0) {
377 } else
378 if (str[i] > i) {
379 if (str[i] < END()) { // regular inloop - skip it
380 reg_inloops++;
381 if (last_pk>0 && (i < str[pk[last_pk-1]] && str[i] > str[pk[last_pk-1]])) { // we had wrong pk
382 bool multiloop = true;
383 int energy_tmp = loop_energy_pk(pk[last_pk], str, s0, s1, P, multiloop);
384 hlps.str_type[pk[last_pk]] = multiloop?N_M:N_S;
385 hlps.str_energy[pk[last_pk]] = energy_tmp;
386
387 ret.pk_ending = max(ret.pk_ending, (int)str[pk[last_pk]]);
388 //ending = str[pk[last_pk]];
389 ret.energy += energy_tmp;
390 pk[last_pk] = i;
391 } else { // do everything inbetween i, str[i] (+ maybe more)
392 LE_ret ret_tmp = loop_energy_rec(i, str, s0, s1, P, hlps, max(encircling, ret.ending));
393 next_i = ret_tmp.pk_ending;
394 bpairs++;
395 // if we are encircling a pseudoknot, we have to add 2+ loops:
396 if (ret_tmp.pk_loops > 1 && ret_tmp.pk_ending<ret.ending) {
397 reg_inloops += ret_tmp.pk_loops-1;
398 } else { // or we are extending a pknot, then we have to keep the pknot info for a multiloop above.
399 ret.pk_ending = max(ret.pk_ending, ret_tmp.pk_ending);
400 ret.pk_loops = max(ret.pk_loops, ret_tmp.pk_loops);
401 }
402 ret.energy += ret_tmp.energy;
403 }
404
405 } else if (str[i] > END()) { // now we know we are in pknot / we found another part of pknot
406 hlps.str_torght[hlps.last_open] = hlps.last_open;
407 hlps.last_open = 0;
408 // pknot identifying
409 last_pk++;
410 opening++;
411 if (max_op < opening) max_op = opening;
412 pk[last_pk] = i;
413 pk_outer[last_pk] = i;
414 ret.pk_ending = max(ret.pk_ending, (int)str[i]);
415 }
416 // now we can do the left & right stacking:
417 if (hlps.last_open != 0) {
418 hlps.str_toleft[i] = hlps.last_open;
419 hlps.str_torght[hlps.last_open] = i;
420 } else {
421 hlps.str_toleft[i] = i;
422 hlps.str_torght[i] = i;
423 }
424
425 hlps.last_open = i;
426 } else
427 if (str[i] < i) { // should happen only in pknot:
428 hlps.str_torght[hlps.last_open] = hlps.last_open;
429 hlps.last_open = 0;
430 for (int j=0; j<last_pk; j++) if (str[i]==pk[j]) {
431 opening--;
432 break;
433 }
434 if (str[i]<true_begin) true_begin = str[i];
435 }
436 i = next_i +1;
437 }
438
439 hlps.last_open = last_in_begin;
440 int energy;
441
442 // return value 3 options:
443 BPAIR_TYPE type = P_H;
444 if (last_pk == 0) {
445 if (reg_inloops <= 1) {
446 energy = loop_energy(str, s0, s1, begin);
447 ret.energy += energy;
448 type = N_S;
449 } else {
450 bool multiloop = true;
451 energy = loop_energy_pk(begin, str, s0, s1, P, multiloop);
452 ret.energy += energy;
453 type = N_M;
454 }
455
456 } else {
457
458 if (last_pk==3) type = P_M;
459 if (last_pk==2 && max_op==2) type = P_K;
460 if (last_pk==2 && max_op==3) type = P_L;
461 if (encircling > ret.pk_ending) {
462 energy = beta1mp_pen[type];
463 } else energy = beta1_pen[type];
464
465 // compute alone:
466 int alone = 0;
467 vector<int> stacks[3];
468 for (int j=true_begin; j<=ret.pk_ending; j++) {
469 if (str[j] == 0) {
470 if (OKStacks(stacks, pk)) {
471 alone++;
472 }
473 } else if (str[j] > j) {
474 int k=0;
475 while (stacks[k].size() > 0 && str[j]>str[stacks[k].back()]) k++;
476 stacks[k].push_back(j);
477 } else if (str[j] < j) {
478 int k=0;
479 while (k<3 && (stacks[k].size() == 0 || str[j] != stacks[k].back())) k++;
480 if (k<3) stacks[k].pop_back();
481 }
482 }
483 hlps.beta1 = energy;
484 energy += beta2_pen[type]*(bpairs+last_pk+1) + beta3_pen[type]*(alone);
485 hlps.beta2 = bpairs+last_pk+1;
486 hlps.beta3 = alone;
487
488 // weird 0.1:
489 if (type == P_L || type == P_K || type == P_M) energy -= 10;
490
491 ret.energy += energy;
492 ret.pk_loops = max(last_pk+1, ret.pk_loops);
493 }
494
495 // assign a type:
496 for (int j=0; j<=last_pk; j++) {
497 hlps.str_type[pk[j]] = type;
498 }
499 hlps.str_energy[pk[0]] = energy;
500
501 //fprintf(stderr, "%4d ended loop_energy_rec\n", begin);
502
503 return ret;
504 }
505
Helpers(int length)506 Helpers::Helpers(int length)
507 {
508 Create(length);
509 }
510
Create(int length)511 void Helpers::Create(int length)
512 {
513 str_energy.resize(length + 1, 0);
514 str_type.resize(length+1, ROOT);
515 str_toleft.resize(length+1, 0);
516 str_torght.resize(length+1, 0);
517 beta1 = beta2 = beta3 = last_open = 0;
518 }
519
energy_of_struct_pk(const char * seq,char * structure,int verbose)520 int energy_of_struct_pk(const char *seq, char *structure, int verbose)
521 {
522 if (P == NULL) {
523 make_pair_matrix();
524 update_fold_params();
525 P = scale_parameters();
526 }
527 short *str = make_pair_table_PK(structure);
528 int res = energy_of_struct_pk(seq, str, verbose);
529 free(str);
530 return res;
531 }
532
energy_of_struct_pk(const char * seq,short * structure,int verbose)533 int energy_of_struct_pk(const char *seq, short *structure, int verbose)
534 {
535 if (P == NULL) {
536 make_pair_matrix();
537 update_fold_params();
538 P = scale_parameters();
539 }
540 short *s0 = encode_sequence(seq, 0);
541 short *s1 = encode_sequence(seq, 1);
542
543 int res = energy_of_struct_pk(seq, structure, s0, s1, verbose);
544
545 free(s1);
546 free(s0);
547
548 return res;
549 }
550
energy_of_struct_pk(const char * seq,short * structure,short * s0,short * s1,int verbose)551 int energy_of_struct_pk(const char *seq, short *structure, short *s0, short *s1, int verbose)
552 {
553 clock_t time = clock();
554
555 if (P == NULL) {
556 make_pair_matrix();
557 update_fold_params();
558 P = scale_parameters();
559 }
560 short *str = structure;
561
562 // some debug/helper arrays:
563 Helpers hlps(str[0]);
564
565 int energy = 0;
566
567 // go through the structure:
568 for (int i=1; i<=str[0]; i++) {
569 if (str[i]>i && hlps.str_type[i]==ROOT) { //'('
570 // found new loop, evaluate the energy
571 LE_ret ret = loop_energy_rec(i, str, s0, s1, P, hlps);
572 i = ret.ending;
573 energy += ret.energy;
574 // just verbose
575 if (verbose && hlps.beta1 != 0) {
576 fprintf(stderr, "found pknot: %4d %4d %4d\n", hlps.beta1, hlps.beta2, hlps.beta3);
577 }
578 }
579 }
580
581 //add the energy of external loop
582 bool multiloop = false;
583 int ext = loop_energy_pk(0, str, s0, s1, P, multiloop);
584 energy += ext;
585
586 string type;
587 for (int i=1; i<=str[0]; i++) {
588 type += bpair_type_sname[hlps.str_type[i]];
589 }
590
591 if (verbose) fprintf(stderr, "%s\n%s", seq, pt_to_str_pk(structure).c_str());
592 if (verbose) fprintf(stderr, " %7.2f\n%s\n", energy/100.0, type.c_str());
593
594 if (verbose) {
595 int summ = ext;
596 fprintf(stderr, "%7.2f EXT\n", ext/100.0);
597 for (int i=1; i<=str[0]; i++) {
598 fprintf(stderr, "%7.2f %5d %5d\n", hlps.str_energy[i]/100.0, hlps.str_toleft[i], hlps.str_torght[i]);
599 summ += hlps.str_energy[i];
600 }
601
602 fprintf(stderr, "%7.2f summed\n", summ/100.0);
603 }
604
605 float time_tmp = (clock()-time)/(double)CLOCKS_PER_SEC;
606 //fprintf(stderr, "time_tmp = %10g\n", time_tmp);
607 time_eos += time_tmp;
608
609 return energy;
610 }
611
612
Pseudoknot()613 Pseudoknot::Pseudoknot()
614 {
615 for (int i=0; i<4; i++)
616 for (int j=0; j<4; j++) imat[i][j] = false;
617
618 size = 0;
619 }
620 /*
621 // constructor
622 Pseudoknot::Pseudoknot(int left, int right, int k)
623 {
624 assert(left>0);
625 if (left > right) swap(left, right);
626 if (k && left > k) swap(left, k);
627 if (k && k > right) swap(k, right);
628
629 for (int i=0; i<4; i++)
630 for (int j=0; j<4; j++) imat[i][j] = false;
631
632 int pos_right = k?2:1;
633
634 parts[0].insert(left);
635 parts[pos_right].insert(right);
636
637 imat[0][pos_right] = 1;
638
639 if (k) {
640 parts[1].insert(k);
641 imat[0][1] = 1;
642 imat[1][pos_right] = 1;
643 }
644 }*/
645
646 // contains some bpair?
Contains(int left)647 bool Pseudoknot::Contains(int left)
648 {
649 return parts[0].count(left) || parts[1].count(left) || parts[2].count(left)|| parts[3].count(left);
650 }
651
652 /*bool cross(int left, int right, int left2, int right2) {
653 return (left<left2 && right<right2 && left2<right) ||
654 (left>left2 && right>right2 && right2>left);
655 }
656
657 bool Pseudoknot::Cross(short *str, int left, int right)
658 {
659 int left2 = *parts[0].begin();
660 if (cross(left, right, left2, str[left2])) return true;
661 left2 = *parts[1].begin();
662 if (cross(left, right, left2, str[left2])) return true;
663 if (parts[2].empty()) return false;
664 left2 = *parts[2].begin();
665 if (cross(left, right, left2, str[left2])) return true;
666 if (parts[3].empty()) return false;
667 left2 = *parts[3].begin();
668 if (cross(left, right, left2, str[left2])) return true;
669 return false;
670 }*/
671
672
673 // can we insert it inside?
Inside(short * str,int left,int right)674 int Pseudoknot::Inside(short *str, int left, int right)
675 {
676 for (int i=0; i<4; i++) {
677 if (parts[i].empty()) return -1;
678
679 set<int>::iterator it = parts[i].upper_bound(left);
680
681 // check if we are inside : it--(..left[..it( ... it)..right]..it--)
682 if ((it == parts[i].end() || str[*it]<right) &&
683 (it == parts[i].begin() || str[*(--it)]>right)) {
684 return i;
685 }
686 }
687
688 return -1;
689 }
690
IsViable(int size,bool imat[4][4])691 bool IsViable(int size, bool imat[4][4]) {
692 /*PH PK PL PM
693 0100 0100 0110 0110
694 0000 0010 0010 0011
695 0000 0000 0000 0001
696 0000 0000 0000 0000*/
697
698 if (size == 2) return true;
699 if (size == 3) return true;
700 if (size == 4) {
701 return imat[0][1] && imat[0][2] && !imat[0][3] &&
702 imat[1][2] && imat[1][3] &&
703 imat[2][3];
704 }
705 return false;
706 }
707
Pseudoknot(const Pseudoknot & pknot)708 Pseudoknot::Pseudoknot(const Pseudoknot &pknot)
709 {
710 for (int i=0; i<4; i++) {
711 for (int j=0; j<4; j++) imat[i][j] = pknot.imat[i][j];
712 parts[i] = pknot.parts[i];
713 }
714 size = pknot.size;
715 }
716
CanInsert(int left,vector<int> & numbers,bool insert)717 bool Pseudoknot::CanInsert(int left, vector<int> &numbers, bool insert)
718 {
719 if (size==4) return false;
720
721 bool imat2[4][4];
722 int new_pos = 0;
723 while (!parts[new_pos].empty() && *parts[new_pos].begin()<left) new_pos++;
724 for (int i=0; i<size; i++) {
725 for (int j=i+1; j<size; j++) {
726 int ii = i >= new_pos ? i+1:i;
727 int jj = j >= new_pos ? j+1:j;
728 imat2[ii][jj] = imat[i][j];
729 }
730 }
731
732 for (int i=0; i<size+1; i++) imat2[min(new_pos, i)][max(new_pos, i)] = false;
733 for (unsigned int i=0; i<numbers.size(); i++) {
734 int number = numbers[i]>=new_pos ? numbers[i]+1:numbers[i];
735 imat2[min(new_pos, number)][max(new_pos,number)] = true;
736 }
737
738 bool viable = IsViable(size+1, imat2);
739
740 if (viable && insert) {
741 for (int i=3; i>new_pos; i--) {
742 swap(parts[i], parts[i-1]);
743 }
744 parts[new_pos].insert(left);
745 size++;
746
747 for (int i=0; i<size; i++) {
748 for (int j=i+1; j<size; j++) {
749 imat[i][j] = imat2[i][j];
750 }
751 }
752 }
753
754 return viable;
755 }
756
Delete(int left)757 bool Pseudoknot::Delete(int left)
758 {
759 for (int i=0; i<4; i++) {
760 // try to find it
761 set<int>::iterator it = parts[i].find(left);
762 if (it != parts[i].end()) {
763 // delete it
764 parts[i].erase(it);
765 // change type?
766 if (parts[i].empty()) {
767 size--;
768 if (size==1) return true; // cancelled H type
769 if (size==2 && i==1 && !imat[0][2]) { // cancelled K type
770 size--;
771 return true;
772 }
773 for (int j=0; j<size; j++) {
774 int jj = j>=i?j+1:j;
775 for (int k=j+1; k<size; k++) {
776 int kk = k>=i?k+1:k;
777 imat[j][k] = imat[jj][kk];
778 }
779 parts[j] = parts[jj];
780 }
781 parts[size].clear();
782 imat[0][size] = imat[1][size] = imat[2][size] = false;
783 return true;
784 }
785 return true;
786 }
787 }
788
789 return false;
790 }
791
operator =(const Pseudoknot & pknot)792 Pseudoknot &Pseudoknot::operator=(const Pseudoknot &pknot)
793 {
794 for (int i=0; i<4; i++) {
795 for (int j=0; j<4; j++) this->imat[i][j] = pknot.imat[i][j];
796 this->parts[i] = pknot.parts[i];
797 }
798 this->size = pknot.size;
799
800 return *this;
801 }
802
Structure(int length)803 Structure::Structure(int length)
804 {
805 this->str = (short*) malloc(sizeof(short)*(length+1));
806 for (int i=1; i<=length; i++) this->str[i] = 0;
807 this->str[0] = length;
808
809 this->energy = 0;
810 }
811
Structure(short * structure,int energy)812 Structure::Structure(short *structure, int energy)
813 {
814 int length = structure[0];
815 this->str = (short*) malloc(sizeof(short)*(length+1));
816 for (int i=1; i<=length; i++) this->str[i] = 0;
817 this->str[0] = length;
818
819 // assign all bpairs:
820 for (int i=1; i<=structure[0]; i++) {
821 if (structure[i]>i) ViableInsert(i, structure[i], true);
822 }
823
824 this->energy = energy;
825 }
826
Structure(char * structure,int energy)827 Structure::Structure(char *structure, int energy)
828 {
829 int length = strlen(structure);
830 this->str = (short*) malloc(sizeof(short)*(length+1));
831 for (int i=1; i<=length; i++) this->str[i] = 0;
832 this->str[0] = length;
833
834 // assign all bpairs:
835 short *str_tmp = make_pair_table_PK(structure);
836 for (int i=1; i<=str_tmp[0]; i++) {
837 if (str_tmp[i]>i) ViableInsert(i, str_tmp[i], true);
838 }
839 free(str_tmp);
840
841 this->energy = energy;
842 }
843
Structure(const char * seq,short * structure,short * s0,short * s1)844 Structure::Structure(const char *seq, short *structure, short *s0, short *s1)
845 {
846 int length = structure[0];
847 this->str = (short*) malloc(sizeof(short)*(length+1));
848 for (int i=1; i<=length; i++) this->str[i] = 0;
849 this->str[0] = length;
850
851 // assign all bpairs:
852 for (int i=1; i<=structure[0]; i++) {
853 if (structure[i]>i) ViableInsert(i, structure[i], true);
854 }
855
856 energy = energy_of_struct_pk(seq, str, s0, s1, 0);
857 }
858
Structure(const char * seq,char * structure,short * s0,short * s1)859 Structure::Structure(const char *seq, char *structure, short *s0, short *s1)
860 {
861 int length = strlen(structure);
862 this->str = (short*) malloc(sizeof(short)*(length+1));
863 for (int i=1; i<=length; i++) this->str[i] = 0;
864 this->str[0] = length;
865
866 // assign all bpairs:
867 short *str_tmp = make_pair_table_PK(structure);
868 for (int i=1; i<=str_tmp[0]; i++) {
869 if (str_tmp[i]>i) ViableInsert(i, str_tmp[i], true);
870 }
871 free(str_tmp);
872
873 energy = energy_of_struct_pk(seq, str, s0, s1, 0);
874 }
875
Structure(const Structure & second)876 Structure::Structure(const Structure &second)
877 {
878 energy = second.energy;
879 str = allocopy(second.str);
880
881 pknots = second.pknots;
882
883 bpair_pknot = second.bpair_pknot;
884 }
885
~Structure()886 Structure::~Structure()
887 {
888 free(str);
889 }
890
operator <(const Structure & second) const891 bool const Structure::operator<(const Structure &second) const
892 {
893 if (energy != second.energy) return energy<second.energy;
894 int i=1;
895 char l=0,r=0;
896 while (i<=str[0]) {
897 l = (str[i]==0?'.':(str[i]<str[str[i]]?')':'('));
898 r = (second.str[i]==0?'.':(second.str[i]<second.str[second.str[i]]?')':'('));
899 if (l != r) return l<r;
900 //if (str[i] != second.str[i]) return str[i] > second.str[i];
901 i++;
902 }
903
904 return false;
905 }
906
operator ==(const Structure & second) const907 bool const Structure::operator==(const Structure &second) const
908 {
909 if (energy != second.energy) return false;
910 int i=1;
911 while (i<=str[0]) {
912 if (str[i] != second.str[i]) return false;
913 i++;
914 }
915 return true;
916 }
917
operator =(const Structure & second)918 Structure &Structure::operator=(const Structure &second)
919 {
920 energy = second.energy;
921 copy_arr(str, second.str);
922
923 pknots = second.pknots;
924
925 bpair_pknot = second.bpair_pknot;
926
927 return *this;
928 }
929
930 // checks if we can add it to the 'cross' vector (if it is compatible with )
CrossOuter(vector<int> & cross,int point,short * str)931 bool CrossOuter(vector<int> &cross, int point, short *str) {
932 if (cross.empty()) return true;
933 int left = cross.back();
934 int right = str[left];
935 if (point<left && str[point]>right) return true;
936 return false;
937 }
938
CrossInner(vector<int> & cross,int point,short * str)939 bool CrossInner(vector<int> &cross, int point, short *str) {
940 if (cross.empty()) return true;
941 int left = cross.back();
942 int right = str[left];
943 if (point>left && str[point]<right) return true;
944 return false;
945 }
946
947 // is the bpair viable?
ViableInsert(int left,int right,bool insert)948 INS_FLAG Structure::ViableInsert(int left, int right, bool insert)
949 {
950 if (str[left] || str[right]) return NO_INS;
951
952 INS_FLAG res = NO_INS;
953
954 // get crossings:
955 vector<set<int> > crossings;
956 vector<vector<int> > cross_tmp;
957 for (int i=left+1; i<right; i++) { // can be faster:
958 if (str[i] == 0) continue;
959 if (str[i] < left) { // ')'
960 int k=0;
961 if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
962 if ((int)crossings.size()<=k) crossings.resize(k+1);
963 while (!CrossOuter(cross_tmp[k], str[i], str)) {
964 k++;
965 if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
966 if ((int)crossings.size()<=k) crossings.resize(k+1);
967 }
968 cross_tmp[k].push_back(str[i]);
969 crossings[k].insert(str[i]);
970 } else if (str[i] > right) { // '('
971 int k=0;
972 if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
973 if ((int)crossings.size()<=k) crossings.resize(k+1);
974 while (!CrossInner(cross_tmp[k], i, str)) {
975 k++;
976 if ((int)cross_tmp.size()<=k) cross_tmp.resize(k+1);
977 if ((int)crossings.size()<=k) crossings.resize(k+1);
978 }
979 cross_tmp[k].push_back(i);
980 crossings[k].insert(i);
981 }
982 }
983 // sort the sets:
984 sort(crossings.begin(), crossings.end());
985
986 // find if it crosses a pknot:
987 Pseudoknot *cross_pk = NULL;
988 int cross_index = -1;
989 if (pknots.size()>0) {
990 for (unsigned int i=0; i<crossings.size(); i++) {
991 set<int>::iterator it = crossings[i].begin();
992
993
994
995 int index = bpair_pknot[*it];
996 Pseudoknot *pk_tmp = Pknot_index(index);
997 for (it++; it!=crossings[i].end(); it++) {
998 Pseudoknot *pk_now = Pknot_bpair(*it);
999 if (pk_tmp != pk_now) return NO_INS; // split the stack!
1000 }
1001 if (pk_tmp != NULL) {
1002 if (cross_pk == NULL) {
1003 cross_pk = pk_tmp;
1004 cross_index = index;
1005 } else if (cross_pk != pk_tmp) return NO_INS; // we have found 2 colliding pknots
1006 }
1007 }
1008 }
1009
1010 /*for (unsigned int i=0; i<pknots.size(); i++) {
1011 if (pknots[i].Cross(str, left, right)) {
1012 if (cross_pk == NULL) cross_pk = &pknots[i];
1013 else return false;
1014 }
1015 }*/
1016
1017 if (cross_pk) {
1018 // check if we don't split the parts
1019 vector<int> numbers;
1020 for (unsigned int i=0; i<crossings.size(); i++) {
1021 for (int j=0; j<cross_pk->size; j++) {
1022 if (*cross_pk->parts[j].begin() == *crossings[i].begin()) {
1023 if (cross_pk->parts[j] == crossings[i]) {
1024 numbers.push_back(j);
1025 // stack is correct
1026 break;
1027 }
1028 // stack is split
1029 return NO_INS;
1030 }
1031 }
1032 // each crossing we must find otherwise wrong.
1033 if (numbers.size()==i) return NO_INS;
1034 }
1035
1036 // now we have the numbers which we border (0-3), the rest should be checked if we can insert it in it
1037 int inside = cross_pk->Inside(str, left, right);
1038 if (inside>=0) {
1039 // check if neighbourhood fits // essentially numbers[x] == Imat(x, inside)
1040 int k=0;
1041 for (int i=0; i<cross_pk->size; i++) {
1042 if (cross_pk->Imat(inside, i)) {
1043 if (k>(int)numbers.size()-1) return NO_INS;
1044 if (numbers[k]==i) k++;
1045 else return NO_INS;
1046 }
1047 }
1048 // lastly check if we do not cross more than we should
1049 if (k != (int)numbers.size()) return NO_INS;
1050
1051 // neighbourhood fits -> we can insert it in the parts[inside]
1052 if (insert) cross_pk->parts[inside].insert(left);
1053 res = INSIDE_PK;
1054 } else {
1055 // we cannot insert it, but maybe we can change the type:
1056 bool ci = cross_pk->CanInsert(left, numbers, insert);
1057 if (ci) res = CHNG_PK;
1058 else res = NO_INS;
1059 }
1060
1061 } else {
1062 // now we can insert it since we do not cross any pk.
1063 if (insert) {
1064 switch (crossings.size()) {
1065 case 0:
1066 break;
1067 case 1: {
1068 Pseudoknot pknot;
1069 int cross_first = *crossings[0].begin();
1070 int bpair = (left > cross_first)? 1:0;
1071 pknot.parts[bpair].insert(left);
1072 pknot.parts[bpair==0?1:0] = crossings[0];
1073 pknot.imat[0][1] = true;
1074 pknot.size = 2;
1075 pknots.push_back(pknot);
1076 cross_index = pknots.size()-1;
1077 for (set<int>::iterator it=crossings[0].begin(); it!=crossings[0].end(); it++) bpair_pknot[*it] = pknots.size()-1;
1078 break;
1079 }
1080 case 2: {
1081 Pseudoknot pknot;
1082 int bpair = (left > *crossings[0].begin())? ((left > *crossings[1].begin())? 2:1):0;
1083 pknot.parts[bpair].insert(left);
1084 pknot.parts[bpair<=0?1:0] = crossings[0];
1085 pknot.parts[bpair<=1?2:1] = crossings[1];
1086 pknot.imat[0][1] = true;
1087 pknot.imat[1][2] = true;
1088 pknot.size = 3;
1089 pknots.push_back(pknot);
1090 cross_index = pknots.size()-1;
1091 for (set<int>::iterator it=crossings[0].begin(); it!=crossings[0].end(); it++) bpair_pknot[*it] = pknots.size()-1;
1092 for (set<int>::iterator it=crossings[1].begin(); it!=crossings[1].end(); it++) bpair_pknot[*it] = pknots.size()-1;
1093 break;
1094 }
1095 default: assert(false);
1096 }
1097 }
1098 switch (crossings.size()) {
1099 case 0: res = REG_INS; break;
1100 case 1:
1101 case 2: res = CREATE_PK; break;
1102 }
1103 }
1104
1105 if (res != NO_INS && insert) {
1106 bpair_pknot[left] = cross_index;
1107 str[left] = right;
1108 str[right] = left;
1109 }
1110 return res;
1111 }
1112
Delete(int left)1113 bool Structure::Delete(int left)
1114 {
1115 if (left<0) left = -left;
1116 if (!str[left]) return false;
1117
1118 int right = str[left];
1119
1120 for (unsigned int i=0; i<pknots.size(); i++) {
1121 if (pknots[i].Delete(left)) {
1122 if (pknots[i].size == 1) {
1123 for (int j=0; j<4; j++) {
1124 for (set<int>::iterator it = pknots[i].parts[j].begin(); it!=pknots[i].parts[j].end(); it++) {
1125 bpair_pknot[*it] = -1;
1126 }
1127 }
1128 // erase the pknot and fix bpair_pknot
1129 pknots.erase(pknots.begin()+i);
1130 for (map<int, int>::iterator it=bpair_pknot.begin(); it!=bpair_pknot.end(); it++) {
1131 if (it->second >= (int)i) it->second--;
1132 }
1133 }
1134 break;
1135 }
1136 }
1137
1138 bpair_pknot.erase(left);
1139 str[left] = 0;
1140 str[right] = 0;
1141 return true;
1142 }
1143
UndoMove()1144 int Structure::UndoMove()
1145 {
1146 if (undo_l == 0) {
1147 fprintf(stderr, "ERROR: Undoing non-existent move!\n");
1148 return 0;
1149 }
1150
1151 int left = undo_l;
1152 int right = undo_r;
1153
1154 int dif_en = - energy + undo_en;
1155 energy = undo_en;
1156
1157 // switch?
1158 if (left>0 && right>0 && (str[left]>0 || str[right]>0)) {
1159 Shift(left, right);
1160 } else {
1161 if (left<0) {
1162 Delete(left);
1163 } else {
1164 Insert(left, right);
1165 }
1166 }
1167
1168 undo_l = 0;
1169 undo_r = 0;
1170 undo_en = 0;
1171
1172 return dif_en;
1173 }
1174
MakeMove(const char * seq,short * s0,short * s1,int left,int right)1175 int Structure::MakeMove(const char *seq, short *s0, short *s1, int left, int right)
1176 {
1177 undo_en = energy;
1178 bool change = false;
1179
1180 // switch?
1181 if (left>0 && right>0 && (str[left]>0 || str[right]>0)) {
1182 if (str[left]>0) {
1183 undo_l = left;
1184 undo_r = str[left];
1185 } else {
1186 undo_r = right;
1187 undo_l = str[right];
1188 }
1189 change = (Shift(left, right) != NO_INS);
1190 } else {
1191 if (left<0) {
1192 undo_l = -left;
1193 undo_r = -right;
1194 change = Delete(left);
1195 } else {
1196 undo_l = -left;
1197 undo_r = -right;
1198 change = (Insert(left, right) != NO_INS);
1199 }
1200 }
1201
1202 if (change) energy = energy_of_struct_pk(seq, str, s0, s1, 0);
1203 return energy - undo_en;
1204 }
1205
CanShift(int left,int right)1206 INS_FLAG Structure::CanShift(int left, int right)
1207 {
1208 if ((str[left]>0 && str[right]>0) ||
1209 (str[left]==0 && str[right]==0)) return NO_INS;
1210
1211 int left_orig = str[left]>0? left : str[right];
1212 int right_orig = str[right]>0? right : str[left];
1213
1214 Delete(left_orig);
1215
1216 INS_FLAG flag = CanInsert(left, right);
1217
1218 Insert(left_orig, right_orig);
1219
1220 return flag;
1221 }
1222
Shift(int left,int right)1223 INS_FLAG Structure::Shift(int left, int right)
1224 {
1225 if ((str[left]>0 && str[right]>0) ||
1226 (str[left]==0 && str[right]==0)) return NO_INS;
1227
1228 int left_orig = str[left]>0? left : str[right];
1229 int right_orig = str[right]>0? right : str[left];
1230
1231 Delete(left_orig);
1232
1233 INS_FLAG flag = Insert(left, right);
1234
1235 if (flag==NO_INS) Insert(left_orig, right_orig);
1236
1237 return flag;
1238 }
1239
CanInsert(int left,int right)1240 INS_FLAG Structure::CanInsert(int left, int right)
1241 {
1242 return ViableInsert(left, right, false);
1243 }
1244
Insert(int left,int right)1245 INS_FLAG Structure::Insert(int left, int right)
1246 {
1247 return ViableInsert(left, right, true);
1248 }
1249
Contains_PK(short * str)1250 int Contains_PK(short *str)
1251 {
1252 stack<int> pairing;
1253 pairing.push(str[0]+1);
1254 for (int i=1; i<=str[0]; i++) {
1255 if (str[i]==0) continue;
1256 if (str[i]>i) { //'('
1257 if (str[i]>pairing.top()) return i;
1258 pairing.push(str[i]);
1259 }
1260 else { //')'
1261 pairing.pop();
1262 }
1263 }
1264 return 0;
1265 }
1266