1 /*
2 * combinatorics.c
3 *
4 * Various implementations that deal with combinatorial aspects
5 * of RNA/DNA folding
6 *
7 * (c) 2016, Ronny Lorenz
8 *
9 * Vienna RNA package
10 */
11
12 #ifdef HAVE_CONFIG_H
13 #include "config.h"
14 #endif
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "ViennaRNA/utils/basic.h"
21 #include "ViennaRNA/search/BoyerMoore.h"
22 #include "ViennaRNA/combinatorics.h"
23
24 /*
25 #################################
26 # GLOBAL VARIABLES #
27 #################################
28 */
29
30 /*
31 #################################
32 # PRIVATE VARIABLES and STRUCTS #
33 #################################
34 */
35 struct perm_list {
36 int value;
37 struct perm_list *next, *prev;
38 };
39
40 struct necklace_content {
41 int value;
42 int count;
43 };
44
45
46 /*
47 #################################
48 # PRIVATE FUNCTION DECLARATIONS #
49 #################################
50 */
51 PRIVATE struct perm_list *perm_list_insert(struct perm_list *before,
52 int value);
53
54
55 PRIVATE struct perm_list *perm_list_remove_val(struct perm_list *head,
56 int value);
57
58
59 PRIVATE struct perm_list *perm_list_head(struct perm_list *entry);
60
61
62 PRIVATE void perm_list_destroy(struct perm_list *entry);
63
64
65 PRIVATE int cmpfunc(const void *a,
66 const void *b);
67
68
69 PRIVATE void sawada_fast_finish_perm(struct necklace_content *content,
70 unsigned int ***results,
71 unsigned int *result_count,
72 unsigned int *result_size,
73 unsigned int n);
74
75
76 PRIVATE void sawada_fast(unsigned int t,
77 unsigned int p,
78 unsigned int s,
79 struct necklace_content *content,
80 unsigned int k,
81 unsigned int *r,
82 struct perm_list *a,
83 unsigned int n,
84 unsigned int ***results,
85 unsigned int *result_count,
86 unsigned int *result_size);
87
88
89 /*
90 #################################
91 # BEGIN OF FUNCTION DEFINITIONS #
92 #################################
93 */
94
95 /*
96 * This is an implementation of Joe Sawada's
97 * "Fast algorithm to generate necklaces with fixed content"
98 * Theoretical Computer Science 301 (2003) 477 - 489
99 */
100 PUBLIC unsigned int **
vrna_enumerate_necklaces(const unsigned int * entity_counts)101 vrna_enumerate_necklaces(const unsigned int *entity_counts)
102 {
103 unsigned int n, i, *r, **result, result_count, result_size, num_entities;
104 struct necklace_content *content;
105 struct perm_list *a;
106
107 num_entities = 0;
108
109 if (entity_counts)
110 for (i = 0; entity_counts[i] > 0; i++)
111 num_entities++;
112
113 /* count total number of strands */
114 for (n = i = 0; i < num_entities; i++)
115 n += entity_counts[i];
116
117 /* first re-order entity_counts such that k - 1 has most occurences */
118 content = (struct necklace_content *)vrna_alloc(sizeof(struct necklace_content) * num_entities);
119 for (i = 0; i < num_entities; i++) {
120 content[i].value = i;
121 content[i].count = entity_counts[i];
122 }
123 qsort(content, num_entities, sizeof(struct necklace_content), cmpfunc);
124
125 /* create a-list (available characters) */
126 a = NULL;
127 for (i = 0; i < num_entities; i++)
128 a = perm_list_insert(a, i); /* inserts new element before a */
129
130 /* create r-array */
131 r = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 1));
132
133 /* prepare result list */
134 result = NULL;
135 result_count = 0;
136 result_size = 20;
137
138 result = (unsigned int **)vrna_alloc(sizeof(unsigned int *) * (result_size));
139 for (i = 0; i < result_size; i++)
140 result[i] = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 1));
141
142 /* do initial step */
143 for (i = 1; i <= n; i++)
144 result[result_count][i] = num_entities - 1;
145
146 result[result_count][1] = 0;
147 content[0].count = content[0].count - 1;
148 if (content[0].count == 0)
149 a = perm_list_remove_val(a, 0);
150
151 /* now we iterate to retrieve full permutation(s) */
152 sawada_fast(2, 1, 2, content, num_entities, r, a, n, &result, &result_count, &result_size);
153
154 /* resize results list to actual requirements */
155 for (i = result_count; i < result_size; i++)
156 free(result[i]);
157 result =
158 (unsigned int **)vrna_realloc(result, sizeof(unsigned int *) * (result_count + 1));
159 result[result_count] = NULL;
160
161 /* cleanup memory */
162 free(r);
163 free(content);
164 perm_list_destroy(a);
165
166 return result;
167 }
168
169
170 PUBLIC unsigned int
vrna_rotational_symmetry_num(const unsigned int * string,size_t string_length)171 vrna_rotational_symmetry_num(const unsigned int *string,
172 size_t string_length)
173 {
174 return vrna_rotational_symmetry_pos_num(string,
175 string_length,
176 NULL);
177 }
178
179
180 PUBLIC unsigned int
vrna_rotational_symmetry_pos_num(const unsigned int * string,size_t string_length,unsigned int ** positions)181 vrna_rotational_symmetry_pos_num(const unsigned int *string,
182 size_t string_length,
183 unsigned int **positions)
184 {
185 const unsigned int *ptr;
186 unsigned int matches, max, shifts_size;
187 size_t *badchars, shift, i;
188
189 if ((!string) || (string_length == 0)) {
190 if (positions)
191 *positions = NULL;
192
193 return 0;
194 }
195
196 /* any string is at least order 1 */
197 matches = 1;
198
199 if (positions) {
200 shifts_size = 10; /* initial guess for the order of rotational symmetry */
201 *positions = vrna_alloc(sizeof(unsigned int) * shifts_size);
202
203 /* store trivial symmetry */
204 (*positions)[matches - 1] = 0;
205 }
206
207 /* strings of length 1 are order 1 */
208 if (string_length == 1) {
209 /* resize positions array to actual length */
210 if (positions)
211 *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
212
213 return matches;
214 }
215
216 /* determine largest number/character in string */
217 max = string[0];
218 for (i = 1; i < string_length; i++)
219 max = MAX2(max, string[i]);
220
221 badchars = vrna_search_BM_BCT_num(string, string_length, max);
222 shift = 1; /* skip trivial symmetry */
223
224 /* detect order of rotational symmetry */
225
226 /*
227 * Note, that finding the smallest shift s of the string that
228 * results in an identity mapping of the string to itself
229 * already determines the order of rotational symmetry R, i.e.
230 * R = n / r where n is the length of the string
231 */
232 ptr = vrna_search_BMH_num(string,
233 string_length,
234 string,
235 string_length,
236 shift,
237 badchars,
238 1);
239 if (ptr) {
240 shift = ptr - string;
241 matches = string_length / shift;
242 if (positions) {
243 *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
244 for (i = 0; i < matches; i++)
245 (*positions)[i] = i * shift;
246 }
247 }
248
249 free(badchars);
250
251 return matches;
252 }
253
254
255 PUBLIC unsigned int
vrna_rotational_symmetry(const char * string)256 vrna_rotational_symmetry(const char *string)
257 {
258 return vrna_rotational_symmetry_pos(string,
259 NULL);
260 }
261
262
263 PUBLIC unsigned int
vrna_rotational_symmetry_pos(const char * string,unsigned int ** positions)264 vrna_rotational_symmetry_pos(const char *string,
265 unsigned int **positions)
266 {
267 const char *ptr;
268 unsigned int matches, shifts_size;
269 size_t *badchars, shift, i, string_length;
270
271 if (!string) {
272 if (positions)
273 *positions = NULL;
274
275 return 0;
276 }
277
278 string_length = strlen(string);
279
280 if (string_length == 0) {
281 if (positions)
282 *positions = NULL;
283
284 return 0;
285 }
286
287 /* any string is at least order 1 */
288 matches = 1;
289
290 if (positions) {
291 shifts_size = 10; /* initial guess for the order of rotational symmetry */
292 *positions = vrna_alloc(sizeof(unsigned int) * shifts_size);
293
294 /* store trivial symmetry */
295 (*positions)[matches - 1] = 0;
296 }
297
298 /* strings of length 1 are order 1 */
299 if (string_length == 1) {
300 /* resize positions array to actual length */
301 if (positions)
302 *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
303
304 return matches;
305 }
306
307 /* determine largest number/character in string */
308 badchars = vrna_search_BM_BCT(string);
309
310 shift = 1; /* skip trivial symmetry */
311
312 /* detect order of rotational symmetry */
313
314 /*
315 * Note, that finding the smallest shift s of the string that
316 * results in an identity mapping of the string to itself
317 * already determines the order of rotational symmetry R, i.e.
318 * R = n / r where n is the length of the string
319 */
320 ptr = vrna_search_BMH(string,
321 string_length,
322 string,
323 string_length,
324 shift,
325 badchars,
326 1);
327
328 if (ptr) {
329 shift = ptr - string;
330 matches = string_length / shift;
331 if (positions) {
332 *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
333 for (i = 0; i < matches; i++)
334 (*positions)[i] = i * shift;
335 }
336 }
337
338 free(badchars);
339
340 return matches;
341 }
342
343
344 /**
345 * @brief Compute the order of rotational symmetry for a secondary structure @p s
346 *
347 * This is the size of the stabilizer of @p s, i.e. the set of cyclic permutations
348 * of the strand identifiers of the base pairs in @p s that map @p s onto
349 * itself.
350 */
351 PUBLIC unsigned int
vrna_rotational_symmetry_db(vrna_fold_compound_t * fc,const char * structure)352 vrna_rotational_symmetry_db(vrna_fold_compound_t *fc,
353 const char *structure)
354 {
355 return vrna_rotational_symmetry_db_pos(fc,
356 structure,
357 NULL);
358 }
359
360
361 PUBLIC unsigned int
vrna_rotational_symmetry_db_pos(vrna_fold_compound_t * fc,const char * structure,unsigned int ** positions)362 vrna_rotational_symmetry_db_pos(vrna_fold_compound_t *fc,
363 const char *structure,
364 unsigned int **positions)
365 {
366 unsigned int n, permutations;
367
368 permutations = 0;
369
370 if (positions)
371 *positions = NULL;
372
373 if ((fc) && (structure)) {
374 n = strlen(structure);
375 if (fc->length != n) {
376 vrna_message_warning("vrna_rotational_symmetry_db*: "
377 "Sequence and structure have unequal lengths (%d vs. %d)",
378 fc->length,
379 n);
380 } else {
381 unsigned int *shifts, s, r, i, j, ii, jj, string_permutations;
382 short *pt;
383
384 /* any structure has rotational symmetry of at least order 1, i.e. identity */
385 string_permutations = permutations = 1;
386
387 if (positions) {
388 *positions = vrna_alloc(sizeof(unsigned int));
389 /* store trivial symmetry, i.e. identity */
390 (*positions)[0] = 0;
391 }
392
393 /* single strands only exhibit rotational symmetry if the string is circular */
394 if ((fc->strands == 1) && (fc->params->model_details.circ)) {
395 /* compute rotational symmetry for the circular sequence */
396 string_permutations = vrna_rotational_symmetry_pos(fc->sequence,
397 &shifts);
398 } else if (fc->strands > 1) {
399 /* determine rotational symmetry of the current strand permutation */
400 string_permutations = vrna_rotational_symmetry_pos_num(fc->strand_order,
401 fc->strands,
402 &shifts);
403 }
404
405 /*
406 * There are no rotationally symmetric structures if the strand ordering is
407 * not rotationally symmetric, i.e. string_permutations == 1
408 */
409 if (string_permutations > 1) {
410 /*
411 * For each cyclic permutation of the strand(s), we check if the structure
412 * is also an identify mapping. For that purpose, we simply convert the
413 * structure into a pair table, perform the permutation and check for
414 * identity with the initial structure
415 */
416 pt = vrna_ptable(structure);
417 s = 0;
418
419 for (r = 1; r < string_permutations; r++) {
420 /*
421 * initial shift is given by smallest shift on sequence level
422 * Here, we distinguish circular RNAs from multi stranded ones
423 * as we've stored the shifts on sequence level and strand level,
424 * respectively.
425 */
426 if (fc->strands == 1) {
427 /*
428 * 1. Circular string, i.e. shifts[r] already contains the shifts in
429 * nucleotide positions
430 */
431 s += shifts[r] - shifts[r - 1];
432 } else {
433 /*
434 * 2. For multiple strands, we have to compute the actual nucleotide shift, since
435 * shifts[r] represents a shift by the first shifts[r] strands, i.e. their total
436 * length in nucleotides
437 */
438 for (i = shifts[r - 1]; i < shifts[r]; i++)
439 s += fc->nucleotides[fc->strand_order[i]].length;
440 }
441
442 /*
443 * Finally, go through the structure and decrease rot_s if the rotationally
444 * symmetric arrangement of the strand(s) still leads to an asymetry of the
445 * structure.
446 */
447 for (i = 1; i <= n; i++) {
448 j = pt[i]; /* pairing partner of i (1-based), or 0 if unpaired */
449 ii = (i + s);
450 if (ii > n)
451 ii = (ii % (n + 1)) + 1; /* position i' in the image of rotation by s */
452
453 jj = pt[ii]; /* pairing partner of i' (1-based), or 0 if unpaired */
454
455 /* i is paired? */
456 if (j != 0) {
457 j += s;
458 if (j > n)
459 j = (j % (n + 1)) + 1; /* pairing partner of i (1-based) in the image of rotation by s */
460 }
461
462 /* finally check if j is identical to jj, and stop loop in case of mismatch */
463 if (j != jj)
464 break;
465 }
466
467 /* check whether we had a match of the structure under current permutation */
468 if (i == n + 1) {
469 /*
470 * now, we know the minimal shift to retrieve an identical structure, so
471 * the order of rotational symmetry is given by multiples of that shift
472 */
473 permutations = fc->length / s;
474
475 /* store permutation if structure was matches successfully */
476 if (positions) {
477 *positions = vrna_realloc(*positions, sizeof(unsigned int) * permutations);
478 for (i = 0; i < permutations; i++)
479 (*positions)[i] = i * s;
480 }
481
482 /* we are finished */
483 break;
484 }
485 }
486 free(pt);
487 }
488
489 free(shifts);
490 }
491 }
492
493 return permutations;
494 }
495
496
497 /*
498 #################################
499 # STATIC helper functions below #
500 #################################
501 */
502 PRIVATE struct perm_list *
perm_list_insert(struct perm_list * before,int value)503 perm_list_insert(struct perm_list *before,
504 int value)
505 {
506 struct perm_list *new;
507
508 new = (struct perm_list *)vrna_alloc(sizeof(struct perm_list));
509
510 new->value = value;
511 new->next = NULL;
512 new->prev = NULL;
513
514 if (before) {
515 new->prev = before->prev;
516 new->next = before;
517 before->prev = new;
518 }
519
520 return new;
521 }
522
523
524 PRIVATE struct perm_list *
perm_list_remove_val(struct perm_list * head,int value)525 perm_list_remove_val(struct perm_list *head,
526 int value)
527 {
528 struct perm_list *ptr;
529
530 ptr = head;
531
532 while (ptr) {
533 if (ptr->value == value) {
534 if (ptr->prev)
535 ptr->prev->next = ptr->next;
536 else
537 head = ptr->next;
538
539 if (ptr->next)
540 ptr->next->prev = ptr->prev;
541
542 free(ptr);
543 break;
544 }
545
546 ptr = ptr->next;
547 }
548
549 return head;
550 }
551
552
553 PRIVATE struct perm_list *
perm_list_head(struct perm_list * entry)554 perm_list_head(struct perm_list *entry)
555 {
556 struct perm_list *head;
557
558 head = entry;
559 if (head)
560 while (head->prev)
561 head = head->prev;
562
563 return head;
564 }
565
566
567 PRIVATE void
perm_list_destroy(struct perm_list * entry)568 perm_list_destroy(struct perm_list *entry)
569 {
570 struct perm_list *head, *ptr;
571
572 head = perm_list_head(entry);
573 while (head) {
574 ptr = head;
575 head = ptr->next;
576 free(ptr);
577 }
578 }
579
580
581 PRIVATE int
cmpfunc(const void * a,const void * b)582 cmpfunc(const void *a,
583 const void *b)
584 {
585 return ((struct necklace_content *)a)->count - ((struct necklace_content *)b)->count;
586 }
587
588
589 PRIVATE void
sawada_fast_finish_perm(struct necklace_content * content,unsigned int *** results,unsigned int * result_count,unsigned int * result_size,unsigned int n)590 sawada_fast_finish_perm(struct necklace_content *content,
591 unsigned int ***results,
592 unsigned int *result_count,
593 unsigned int *result_size,
594 unsigned int n)
595 {
596 unsigned int i;
597
598 /* adjust results list size if necessary */
599 if ((*result_count + 1) == (*result_size)) {
600 *result_size *= 1.2;
601 (*results) =
602 (unsigned int **)vrna_realloc(*results, sizeof(unsigned int *) * (*result_size));
603 for (i = *result_count + 1; i < *result_size; i++)
604 (*results)[i] = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 1));
605 }
606
607 /* create new (next) entry, copy-over curent result, and transliterate current one */
608 for (i = 1; i <= n; i++) {
609 unsigned int v = (*results)[*result_count][i];
610 (*results)[(*result_count) + 1][i] = v;
611 (*results)[(*result_count)][i] = content[v].value;
612 }
613 (*result_count)++;
614 }
615
616
617 PRIVATE void
sawada_fast(unsigned int t,unsigned int p,unsigned int s,struct necklace_content * content,unsigned int k,unsigned int * r,struct perm_list * a,unsigned int n,unsigned int *** results,unsigned int * result_count,unsigned int * result_size)618 sawada_fast(unsigned int t,
619 unsigned int p,
620 unsigned int s,
621 struct necklace_content *content,
622 unsigned int k,
623 unsigned int *r,
624 struct perm_list *a,
625 unsigned int n,
626 unsigned int ***results,
627 unsigned int *result_count,
628 unsigned int *result_size)
629 {
630 if (content[k - 1].count == n - t + 1) {
631 if ((content[k - 1].count == r[t - p]) && (n % p == 0))
632 sawada_fast_finish_perm(content, results, result_count, result_size, n);
633 else if (content[k - 1].count > r[t - p])
634 sawada_fast_finish_perm(content, results, result_count, result_size, n);
635 } else if (content[0].count != (n - t + 1)) {
636 unsigned int *result = (*results)[*result_count];
637 struct perm_list *ptr, *before, *after;
638 ptr = perm_list_head(a);
639
640 after = before = NULL;
641
642 unsigned int j = ptr->value;
643 unsigned int sp = s;
644 while (j >= result[t - p]) {
645 r[s] = t - s;
646 result[t] = j;
647 content[j].count = content[j].count - 1;
648
649 if (content[j].count == 0) {
650 /* detach current element */
651 if (ptr->prev) {
652 before = ptr->prev;
653 before->next = ptr->next;
654 } else {
655 before = NULL;
656 }
657
658 if (ptr->next) {
659 after = ptr->next;
660 after->prev = ptr->prev;
661 } else {
662 after = NULL;
663 }
664
665 if (!before)
666 a = ptr->next;
667 }
668
669 if (j != k - 1)
670 sp = t + 1;
671
672 if (j == result[t - p])
673 sawada_fast(t + 1, p, sp, content, k, r, a, n, results, result_count, result_size);
674 else
675 sawada_fast(t + 1, t, sp, content, k, r, a, n, results, result_count, result_size);
676
677 if (content[j].count == 0) {
678 /* re-attach current element */
679 if (before)
680 before->next = ptr;
681 else
682 a = ptr;
683
684 if (after)
685 after->prev = ptr;
686 }
687
688 content[j].count = content[j].count + 1;
689
690 result = (*results)[*result_count];
691
692 if (ptr->next) {
693 ptr = ptr->next;
694 j = ptr->value;
695 } else {
696 break;
697 }
698 }
699 result[t] = k - 1;
700 }
701 }
702