1 /*
2  *  combinatorics.c
3  *
4  *  Various implementations that deal with combinatorial aspects
5  *  of RNA/DNA folding
6  *
7  *  (c) 2016, Ronny Lorenz
8  *
9  *  Vienna RNA package
10  */
11 
12 #ifdef HAVE_CONFIG_H
13 #include "config.h"
14 #endif
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "ViennaRNA/utils/basic.h"
21 #include "ViennaRNA/search/BoyerMoore.h"
22 #include "ViennaRNA/combinatorics.h"
23 
24 /*
25  #################################
26  # GLOBAL VARIABLES              #
27  #################################
28  */
29 
30 /*
31  #################################
32  # PRIVATE VARIABLES and STRUCTS #
33  #################################
34  */
35 struct perm_list {
36   int               value;
37   struct perm_list  *next, *prev;
38 };
39 
40 struct necklace_content {
41   int value;
42   int count;
43 };
44 
45 
46 /*
47  #################################
48  # PRIVATE FUNCTION DECLARATIONS #
49  #################################
50  */
51 PRIVATE struct  perm_list *perm_list_insert(struct perm_list  *before,
52                                             int               value);
53 
54 
55 PRIVATE struct  perm_list *perm_list_remove_val(struct perm_list  *head,
56                                                 int               value);
57 
58 
59 PRIVATE struct  perm_list *perm_list_head(struct perm_list *entry);
60 
61 
62 PRIVATE void    perm_list_destroy(struct perm_list *entry);
63 
64 
65 PRIVATE int     cmpfunc(const void  *a,
66                         const void  *b);
67 
68 
69 PRIVATE void    sawada_fast_finish_perm(struct necklace_content *content,
70                                         unsigned int            ***results,
71                                         unsigned int            *result_count,
72                                         unsigned int            *result_size,
73                                         unsigned int            n);
74 
75 
76 PRIVATE void    sawada_fast(unsigned int            t,
77                             unsigned int            p,
78                             unsigned int            s,
79                             struct necklace_content *content,
80                             unsigned int            k,
81                             unsigned int            *r,
82                             struct perm_list        *a,
83                             unsigned int            n,
84                             unsigned int            ***results,
85                             unsigned int            *result_count,
86                             unsigned int            *result_size);
87 
88 
89 /*
90  #################################
91  # BEGIN OF FUNCTION DEFINITIONS #
92  #################################
93  */
94 
95 /*
96  * This is an implementation of Joe Sawada's
97  * "Fast algorithm to generate necklaces with fixed content"
98  * Theoretical Computer Science 301 (2003) 477 - 489
99  */
100 PUBLIC unsigned int **
vrna_enumerate_necklaces(const unsigned int * entity_counts)101 vrna_enumerate_necklaces(const unsigned int *entity_counts)
102 {
103   unsigned int            n, i, *r, **result, result_count, result_size, num_entities;
104   struct necklace_content *content;
105   struct  perm_list       *a;
106 
107   num_entities = 0;
108 
109   if (entity_counts)
110     for (i = 0; entity_counts[i] > 0; i++)
111       num_entities++;
112 
113   /* count total number of strands */
114   for (n = i = 0; i < num_entities; i++)
115     n += entity_counts[i];
116 
117   /* first re-order entity_counts such that k - 1 has most occurences */
118   content = (struct necklace_content *)vrna_alloc(sizeof(struct necklace_content) * num_entities);
119   for (i = 0; i < num_entities; i++) {
120     content[i].value  = i;
121     content[i].count  = entity_counts[i];
122   }
123   qsort(content, num_entities, sizeof(struct necklace_content), cmpfunc);
124 
125   /* create a-list (available characters) */
126   a = NULL;
127   for (i = 0; i < num_entities; i++)
128     a = perm_list_insert(a, i); /* inserts new element before a */
129 
130   /* create r-array */
131   r = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 1));
132 
133   /* prepare result list */
134   result        = NULL;
135   result_count  = 0;
136   result_size   = 20;
137 
138   result = (unsigned int **)vrna_alloc(sizeof(unsigned int *) * (result_size));
139   for (i = 0; i < result_size; i++)
140     result[i] = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 1));
141 
142   /* do initial step */
143   for (i = 1; i <= n; i++)
144     result[result_count][i] = num_entities - 1;
145 
146   result[result_count][1] = 0;
147   content[0].count        = content[0].count - 1;
148   if (content[0].count == 0)
149     a = perm_list_remove_val(a, 0);
150 
151   /* now we iterate to retrieve full permutation(s) */
152   sawada_fast(2, 1, 2, content, num_entities, r, a, n, &result, &result_count, &result_size);
153 
154   /* resize results list to actual requirements */
155   for (i = result_count; i < result_size; i++)
156     free(result[i]);
157   result =
158     (unsigned int **)vrna_realloc(result, sizeof(unsigned int *) * (result_count + 1));
159   result[result_count] = NULL;
160 
161   /* cleanup memory */
162   free(r);
163   free(content);
164   perm_list_destroy(a);
165 
166   return result;
167 }
168 
169 
170 PUBLIC unsigned int
vrna_rotational_symmetry_num(const unsigned int * string,size_t string_length)171 vrna_rotational_symmetry_num(const unsigned int *string,
172                              size_t             string_length)
173 {
174   return vrna_rotational_symmetry_pos_num(string,
175                                           string_length,
176                                           NULL);
177 }
178 
179 
180 PUBLIC unsigned int
vrna_rotational_symmetry_pos_num(const unsigned int * string,size_t string_length,unsigned int ** positions)181 vrna_rotational_symmetry_pos_num(const unsigned int *string,
182                                  size_t             string_length,
183                                  unsigned int       **positions)
184 {
185   const unsigned int  *ptr;
186   unsigned int        matches, max, shifts_size;
187   size_t              *badchars, shift, i;
188 
189   if ((!string) || (string_length == 0)) {
190     if (positions)
191       *positions = NULL;
192 
193     return 0;
194   }
195 
196   /* any string is at least order 1 */
197   matches = 1;
198 
199   if (positions) {
200     shifts_size = 10; /* initial guess for the order of rotational symmetry */
201     *positions  = vrna_alloc(sizeof(unsigned int) * shifts_size);
202 
203     /* store trivial symmetry */
204     (*positions)[matches - 1] = 0;
205   }
206 
207   /* strings of length 1 are order 1 */
208   if (string_length == 1) {
209     /* resize positions array to actual length */
210     if (positions)
211       *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
212 
213     return matches;
214   }
215 
216   /* determine largest number/character in string */
217   max = string[0];
218   for (i = 1; i < string_length; i++)
219     max = MAX2(max, string[i]);
220 
221   badchars  = vrna_search_BM_BCT_num(string, string_length, max);
222   shift     = 1; /* skip trivial symmetry */
223 
224   /* detect order of rotational symmetry */
225 
226   /*
227    *  Note, that finding the smallest shift s of the string that
228    *  results in an identity mapping of the string to itself
229    *  already determines the order of rotational symmetry R, i.e.
230    *  R = n / r where n is the length of the string
231    */
232   ptr = vrna_search_BMH_num(string,
233                             string_length,
234                             string,
235                             string_length,
236                             shift,
237                             badchars,
238                             1);
239   if (ptr) {
240     shift   = ptr - string;
241     matches = string_length / shift;
242     if (positions) {
243       *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
244       for (i = 0; i < matches; i++)
245         (*positions)[i] = i * shift;
246     }
247   }
248 
249   free(badchars);
250 
251   return matches;
252 }
253 
254 
255 PUBLIC unsigned int
vrna_rotational_symmetry(const char * string)256 vrna_rotational_symmetry(const char *string)
257 {
258   return vrna_rotational_symmetry_pos(string,
259                                       NULL);
260 }
261 
262 
263 PUBLIC unsigned int
vrna_rotational_symmetry_pos(const char * string,unsigned int ** positions)264 vrna_rotational_symmetry_pos(const char   *string,
265                              unsigned int **positions)
266 {
267   const char    *ptr;
268   unsigned int  matches, shifts_size;
269   size_t        *badchars, shift, i, string_length;
270 
271   if (!string) {
272     if (positions)
273       *positions = NULL;
274 
275     return 0;
276   }
277 
278   string_length = strlen(string);
279 
280   if (string_length == 0) {
281     if (positions)
282       *positions = NULL;
283 
284     return 0;
285   }
286 
287   /* any string is at least order 1 */
288   matches = 1;
289 
290   if (positions) {
291     shifts_size = 10; /* initial guess for the order of rotational symmetry */
292     *positions  = vrna_alloc(sizeof(unsigned int) * shifts_size);
293 
294     /* store trivial symmetry */
295     (*positions)[matches - 1] = 0;
296   }
297 
298   /* strings of length 1 are order 1 */
299   if (string_length == 1) {
300     /* resize positions array to actual length */
301     if (positions)
302       *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
303 
304     return matches;
305   }
306 
307   /* determine largest number/character in string */
308   badchars = vrna_search_BM_BCT(string);
309 
310   shift = 1; /* skip trivial symmetry */
311 
312   /* detect order of rotational symmetry */
313 
314   /*
315    *  Note, that finding the smallest shift s of the string that
316    *  results in an identity mapping of the string to itself
317    *  already determines the order of rotational symmetry R, i.e.
318    *  R = n / r where n is the length of the string
319    */
320   ptr = vrna_search_BMH(string,
321                         string_length,
322                         string,
323                         string_length,
324                         shift,
325                         badchars,
326                         1);
327 
328   if (ptr) {
329     shift   = ptr - string;
330     matches = string_length / shift;
331     if (positions) {
332       *positions = vrna_realloc(*positions, sizeof(unsigned int) * matches);
333       for (i = 0; i < matches; i++)
334         (*positions)[i] = i * shift;
335     }
336   }
337 
338   free(badchars);
339 
340   return matches;
341 }
342 
343 
344 /**
345  * @brief Compute the order of rotational symmetry for a secondary structure @p s
346  *
347  * This is the size of the stabilizer of @p s, i.e. the set of cyclic permutations
348  * of the strand identifiers of the base pairs in @p s that map @p s onto
349  * itself.
350  */
351 PUBLIC unsigned int
vrna_rotational_symmetry_db(vrna_fold_compound_t * fc,const char * structure)352 vrna_rotational_symmetry_db(vrna_fold_compound_t  *fc,
353                             const char            *structure)
354 {
355   return vrna_rotational_symmetry_db_pos(fc,
356                                          structure,
357                                          NULL);
358 }
359 
360 
361 PUBLIC unsigned int
vrna_rotational_symmetry_db_pos(vrna_fold_compound_t * fc,const char * structure,unsigned int ** positions)362 vrna_rotational_symmetry_db_pos(vrna_fold_compound_t  *fc,
363                                 const char            *structure,
364                                 unsigned int          **positions)
365 {
366   unsigned int n, permutations;
367 
368   permutations = 0;
369 
370   if (positions)
371     *positions = NULL;
372 
373   if ((fc) && (structure)) {
374     n = strlen(structure);
375     if (fc->length != n) {
376       vrna_message_warning("vrna_rotational_symmetry_db*: "
377                            "Sequence and structure have unequal lengths (%d vs. %d)",
378                            fc->length,
379                            n);
380     } else {
381       unsigned int  *shifts, s, r, i, j, ii, jj, string_permutations;
382       short         *pt;
383 
384       /* any structure has rotational symmetry of at least order 1, i.e. identity */
385       string_permutations = permutations = 1;
386 
387       if (positions) {
388         *positions = vrna_alloc(sizeof(unsigned int));
389         /* store trivial symmetry, i.e. identity */
390         (*positions)[0] = 0;
391       }
392 
393       /* single strands only exhibit rotational symmetry if the string is circular */
394       if ((fc->strands == 1) && (fc->params->model_details.circ)) {
395         /* compute rotational symmetry for the circular sequence */
396         string_permutations = vrna_rotational_symmetry_pos(fc->sequence,
397                                                            &shifts);
398       } else if (fc->strands > 1) {
399         /* determine rotational symmetry of the current strand permutation */
400         string_permutations = vrna_rotational_symmetry_pos_num(fc->strand_order,
401                                                                fc->strands,
402                                                                &shifts);
403       }
404 
405       /*
406        *  There are no rotationally symmetric structures if the strand ordering is
407        *  not rotationally symmetric, i.e. string_permutations == 1
408        */
409       if (string_permutations > 1) {
410         /*
411          *  For each cyclic permutation of the strand(s), we check if the structure
412          *  is also an identify mapping. For that purpose, we simply convert the
413          *  structure into a pair table, perform the permutation and check for
414          *  identity with the initial structure
415          */
416         pt  = vrna_ptable(structure);
417         s   = 0;
418 
419         for (r = 1; r < string_permutations; r++) {
420           /*
421            *  initial shift is given by smallest shift on sequence level
422            *  Here, we distinguish circular RNAs from multi stranded ones
423            *  as we've stored the shifts on sequence level and strand level,
424            *  respectively.
425            */
426           if (fc->strands == 1) {
427             /*
428              *  1. Circular string, i.e. shifts[r] already contains the shifts in
429              *  nucleotide positions
430              */
431             s += shifts[r] - shifts[r - 1];
432           } else {
433             /*
434              * 2. For multiple strands, we have to compute the actual nucleotide shift, since
435              * shifts[r] represents a shift by the first shifts[r] strands, i.e. their total
436              * length in nucleotides
437              */
438             for (i = shifts[r - 1]; i < shifts[r]; i++)
439               s += fc->nucleotides[fc->strand_order[i]].length;
440           }
441 
442           /*
443            *  Finally, go through the structure and decrease rot_s if the rotationally
444            *  symmetric arrangement of the strand(s) still leads to an asymetry of the
445            *  structure.
446            */
447           for (i = 1; i <= n; i++) {
448             j   = pt[i]; /* pairing partner of i (1-based), or 0 if unpaired */
449             ii  = (i + s);
450             if (ii > n)
451               ii = (ii % (n + 1)) + 1;  /* position i' in the image of rotation by s */
452 
453             jj = pt[ii];                /* pairing partner of i' (1-based), or 0 if unpaired */
454 
455             /* i is paired? */
456             if (j != 0) {
457               j += s;
458               if (j > n)
459                 j = (j % (n + 1)) + 1;  /* pairing partner of i (1-based) in the image of rotation by s */
460             }
461 
462             /* finally check if j is identical to jj, and stop loop in case of mismatch */
463             if (j != jj)
464               break;
465           }
466 
467           /* check whether we had a match of the structure under current permutation */
468           if (i == n + 1) {
469             /*
470              *  now, we know the minimal shift to retrieve an identical structure, so
471              *  the order of rotational symmetry is given by multiples of that shift
472              */
473             permutations = fc->length / s;
474 
475             /* store permutation if structure was matches successfully */
476             if (positions) {
477               *positions = vrna_realloc(*positions, sizeof(unsigned int) * permutations);
478               for (i = 0; i < permutations; i++)
479                 (*positions)[i] = i * s;
480             }
481 
482             /* we are finished */
483             break;
484           }
485         }
486         free(pt);
487       }
488 
489       free(shifts);
490     }
491   }
492 
493   return permutations;
494 }
495 
496 
497 /*
498  #################################
499  # STATIC helper functions below #
500  #################################
501  */
502 PRIVATE struct perm_list *
perm_list_insert(struct perm_list * before,int value)503 perm_list_insert(struct perm_list *before,
504                  int              value)
505 {
506   struct perm_list *new;
507 
508   new = (struct perm_list *)vrna_alloc(sizeof(struct perm_list));
509 
510   new->value  = value;
511   new->next   = NULL;
512   new->prev   = NULL;
513 
514   if (before) {
515     new->prev     = before->prev;
516     new->next     = before;
517     before->prev  = new;
518   }
519 
520   return new;
521 }
522 
523 
524 PRIVATE struct perm_list *
perm_list_remove_val(struct perm_list * head,int value)525 perm_list_remove_val(struct perm_list *head,
526                      int              value)
527 {
528   struct perm_list *ptr;
529 
530   ptr = head;
531 
532   while (ptr) {
533     if (ptr->value == value) {
534       if (ptr->prev)
535         ptr->prev->next = ptr->next;
536       else
537         head = ptr->next;
538 
539       if (ptr->next)
540         ptr->next->prev = ptr->prev;
541 
542       free(ptr);
543       break;
544     }
545 
546     ptr = ptr->next;
547   }
548 
549   return head;
550 }
551 
552 
553 PRIVATE struct perm_list *
perm_list_head(struct perm_list * entry)554 perm_list_head(struct perm_list *entry)
555 {
556   struct perm_list *head;
557 
558   head = entry;
559   if (head)
560     while (head->prev)
561       head = head->prev;
562 
563   return head;
564 }
565 
566 
567 PRIVATE void
perm_list_destroy(struct perm_list * entry)568 perm_list_destroy(struct perm_list *entry)
569 {
570   struct perm_list *head, *ptr;
571 
572   head = perm_list_head(entry);
573   while (head) {
574     ptr   = head;
575     head  = ptr->next;
576     free(ptr);
577   }
578 }
579 
580 
581 PRIVATE int
cmpfunc(const void * a,const void * b)582 cmpfunc(const void  *a,
583         const void  *b)
584 {
585   return ((struct necklace_content *)a)->count - ((struct necklace_content *)b)->count;
586 }
587 
588 
589 PRIVATE void
sawada_fast_finish_perm(struct necklace_content * content,unsigned int *** results,unsigned int * result_count,unsigned int * result_size,unsigned int n)590 sawada_fast_finish_perm(struct necklace_content *content,
591                         unsigned int            ***results,
592                         unsigned int            *result_count,
593                         unsigned int            *result_size,
594                         unsigned int            n)
595 {
596   unsigned int i;
597 
598   /* adjust results list size if necessary */
599   if ((*result_count + 1) == (*result_size)) {
600     *result_size  *= 1.2;
601     (*results)    =
602       (unsigned int **)vrna_realloc(*results, sizeof(unsigned int *) * (*result_size));
603     for (i = *result_count + 1; i < *result_size; i++)
604       (*results)[i] = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 1));
605   }
606 
607   /* create new (next) entry, copy-over curent result, and transliterate current one */
608   for (i = 1; i <= n; i++) {
609     unsigned int v = (*results)[*result_count][i];
610     (*results)[(*result_count) + 1][i]  = v;
611     (*results)[(*result_count)][i]      = content[v].value;
612   }
613   (*result_count)++;
614 }
615 
616 
617 PRIVATE void
sawada_fast(unsigned int t,unsigned int p,unsigned int s,struct necklace_content * content,unsigned int k,unsigned int * r,struct perm_list * a,unsigned int n,unsigned int *** results,unsigned int * result_count,unsigned int * result_size)618 sawada_fast(unsigned int            t,
619             unsigned int            p,
620             unsigned int            s,
621             struct necklace_content *content,
622             unsigned int            k,
623             unsigned int            *r,
624             struct perm_list        *a,
625             unsigned int            n,
626             unsigned int            ***results,
627             unsigned int            *result_count,
628             unsigned int            *result_size)
629 {
630   if (content[k - 1].count == n - t + 1) {
631     if ((content[k - 1].count == r[t - p]) && (n % p == 0))
632       sawada_fast_finish_perm(content, results, result_count, result_size, n);
633     else if (content[k - 1].count > r[t - p])
634       sawada_fast_finish_perm(content, results, result_count, result_size, n);
635   } else if (content[0].count != (n - t + 1)) {
636     unsigned int      *result = (*results)[*result_count];
637     struct perm_list  *ptr, *before, *after;
638     ptr = perm_list_head(a);
639 
640     after = before = NULL;
641 
642     unsigned int      j   = ptr->value;
643     unsigned int      sp  = s;
644     while (j >= result[t - p]) {
645       r[s]              = t - s;
646       result[t]         = j;
647       content[j].count  = content[j].count - 1;
648 
649       if (content[j].count == 0) {
650         /* detach current element */
651         if (ptr->prev) {
652           before        = ptr->prev;
653           before->next  = ptr->next;
654         } else {
655           before = NULL;
656         }
657 
658         if (ptr->next) {
659           after       = ptr->next;
660           after->prev = ptr->prev;
661         } else {
662           after = NULL;
663         }
664 
665         if (!before)
666           a = ptr->next;
667       }
668 
669       if (j != k - 1)
670         sp = t + 1;
671 
672       if (j == result[t - p])
673         sawada_fast(t + 1, p, sp, content, k, r, a, n, results, result_count, result_size);
674       else
675         sawada_fast(t + 1, t, sp, content, k, r, a, n, results, result_count, result_size);
676 
677       if (content[j].count == 0) {
678         /* re-attach current element */
679         if (before)
680           before->next = ptr;
681         else
682           a = ptr;
683 
684         if (after)
685           after->prev = ptr;
686       }
687 
688       content[j].count = content[j].count + 1;
689 
690       result = (*results)[*result_count];
691 
692       if (ptr->next) {
693         ptr = ptr->next;
694         j   = ptr->value;
695       } else {
696         break;
697       }
698     }
699     result[t] = k - 1;
700   }
701 }
702