1package RNA::Design;
2use strict;
3use warnings;
4use RNA;
5use RNA::Utils;
6use Carp;
7use Data::Dumper;
8
9use Exporter;
10
11our $VERSION    = 1.10;
12our @ISA        = qw(Exporter);
13our @EXPORT     = ();
14our @EXPORT_OK  = ();
15
16=head1 NAME
17
18B<RNA::Design> -- plug-and-play design of nucleic acid sequences
19
20=head1 DESCRIPTION
21
22This package provides various functions to design RNA or DNA molecules.
23Sequences may be optimized to fold into single or multiple conformations. The
24properties of the target molecules have to be specified in the Main Object:
25C<$Design = RNA::Design-E<gt>new()>.
26
27B<RNA::Design> supports structure constraints from multiple secondary
28structures in dot-bracket notation and sequence constraints in I<IUPAC> code
29(i.e. A, C, G, U/T, N, R, Y, S, M, W, K, V, H, D, B). Both sequence and
30structure constraints are strictly enforced during the design process. However,
31B<RNA::Design> avoids difficulties of multi-stable designs where a single
32nucleotide has more than two dependencies.  In that case, base-pair constraints
33are not strictly enforced, but still evaluated in the objective function. A
34warning will be printed to *STDERR*.
35
36Sequence optimization functions can be composed of the sub-functions:
37C<eos(i,t)> (energy of structure), C<efe(i,t)> (ensemble free energy),
38C<prob(i,j,t)> (probability of structure), C<acc(i,j,t)> (accessibility of
39region), C<barr(i,j,t)> (direct path barrier). C<i> and C<j> stand for the
40number of the input-structure and C<t> is optional to specify a temperature
41other than 37 Celsius. These functions exist for circular sequences by
42attatching a C<_circ> flag (for example C<eos_circ(i,t)>. More detailed
43explanation follows below.
44
45By default, two independent penalties are added to the objective function:
46set_base_probs() corrects for a specified distribution of nucleotides,
47set_score_motifs() penalizes particular subsequences.
48
49=head1 METHODS
50
51=cut
52
53=head2 new()
54
55Initialize the Global Object for Nucleic Acid Design. It contains various
56public elements, such as a list of structures specified in the cost- function,
57the cost-function itself, a target distribution of nucleotide counts, and a set
58of penalized nucleic acid sequences. See get/set routines for description of
59public functions.
60
61=cut
62
63sub new {
64  my $class = shift;
65  my $self = {
66    # Internal Stuff
67    fibo  => [0,1],
68    border=> 0,
69    nos   => undef,
70    plist => [],
71    rlist => [],
72
73    # Default options
74    score_motifs => ({
75        AAAAA => 5,
76        CCCCC => 5,
77        GGGGG => 5,
78        UUUUU => 5
79      }),
80    base_probs => ({
81        A => 0.25,
82        C => 0.25,
83        G => 0.25,
84        U => 0.25
85      }),
86    paramfile  => '',
87    verb  => 0,
88
89    structures  => [],
90    cut_point   => -1,
91    findpath    => 10,
92    base_pen    => 500,
93    constraint  => '',
94    optfunc     => 'eos(1)+eos(2) - 2*efe() + 0.3*(eos(1)-eos(2)+0.00)**2',
95
96    base => ({
97        'A' => 0,
98        'C' => 1,
99        'G' => 2,
100        'U' => 3,
101      }),
102
103    pair => [
104      #A C G U
105      [0,0,0,1], # A
106      [0,0,1,0], # C
107      [0,1,0,1], # G
108      [1,0,1,0]  # U
109    ],
110
111    max_const_plen => 20,
112    solution_space => ({
113
114      }),
115
116    iupac => ({
117      'A' => 'A',
118      'C' => 'C',
119      'G' => 'G',
120      'T' => 'U',
121      'U' => 'U',
122      'R' => 'AG',  # purine
123      'Y' => 'CU',  # pyrimidine
124      'S' => 'CG',
125      'M' => 'AC',
126      'W' => 'AU',
127      'K' => 'GU',
128      'V' => 'ACG', # not T
129      'H' => 'ACU', # not G
130      'D' => 'AGU', # not C
131      'B' => 'CGU', # not A
132      'N' => 'ACGU'
133    }),
134    neighbor => ({  # ACGU => ACGU
135      'A' => 'U',   # 1000 => 0001
136      'C' => 'G',   # 0100 => 0010
137      'G' => 'Y',   # 0010 => 0101
138      'U' => 'R',   # 0001 => 1010
139      'R' => 'Y',   # 1010 => 0101
140      'Y' => 'R',   # 0101 => 1010
141      'S' => 'B',   # 0110 => 0111
142      'M' => 'K',   # 1100 => 0011
143      'W' => 'D',   # 1001 => 1011
144      'K' => 'N',   # 0011 => 1111
145      'V' => 'B',   # 1110 => 0111
146      'H' => 'D',   # 1101 => 1011
147      'D' => 'N',   # 1011 => 1111
148      'B' => 'N',   # 0111 => 1111
149      'N' => 'N',   # 1111 => 1111
150    }),
151    iupac_bin => ({ # ACGU
152      'A' => 8,     # 1000,
153      'C' => 4,     # 0100,
154      'G' => 2,     # 0010,
155      'T' => 1,     # 0001,
156      'U' => 1,     # 0001,
157      'R' => 10,    # 1010,  # purine
158      'Y' => 5,     # 0101,  # pyrimidine
159      'S' => 6,     # 0110,
160      'M' => 12,    # 1100,
161      'W' => 9,     # 1001,
162      'K' => 3,     # 0011,
163      'V' => 14,    # 1110,  # not T
164      'H' => 13,    # 1101,  # not G
165      'D' => 11,    # 1011,  # not C
166      'B' => 7,     # 0111,  # not A
167      'N' => 15,    # 1111,
168    }),
169    bin_iupac => [  # ACGU
170      '',           # 0000  0
171      'U',          # 0001  1
172      'G',          # 0010  2
173      'K',          # 0011  3
174      'C',          # 0100  4
175      'Y',          # 0101  5
176      'S',          # 0110  6
177      'B',          # 0111  7
178      'A',          # 1000  8
179      'W',          # 1001  9
180      'R',          # 1010 10
181      'D',          # 1011 11
182      'M',          # 1100 12
183      'H',          # 1101 13
184      'V',          # 1110 14
185      'N'           # 1111 15
186    ],
187  };
188
189
190  bless $self, $class;
191  return $self;
192}
193
194
195=head2 get/set parameters
196
197Get and Set parameters for sequence design. For every C<set> routine, there
198exists the equivalent C<get> routine.
199
200=cut
201
202=head3 set_verbosity(<INT>)
203
204Set the verboisty of warnings to STDERR.
205
206=cut 
207
208sub set_verbosity {
209  my ($self, $var) = @_;
210  $self->{verb}=$var;
211  return $self->{verb};
212}
213
214sub get_verbosity {
215  my $self = shift;
216  return $self->{verb};
217}
218
219=head3 set_optfunc(<STRING>)
220
221Set the cost-function for sequence optimization.
222
223=cut
224
225sub set_optfunc {
226  my ($self, $var) = @_;
227  $self->{optfunc} = $var;
228  return $self->{optfunc};
229}
230
231sub get_optfunc {
232  my $self = shift;
233  return $self->{optfunc};
234}
235
236=head3 set_constraint(<STRING>)
237
238Set the sequence constraints for sequence optimization.
239
240=cut
241
242sub set_constraint {
243  my ($self, $var) = @_;
244  $self->{constraint} = $var;
245  return $self->{constraint};
246}
247
248sub get_constraint {
249  my $self = shift;
250  return $self->{constraint};
251}
252
253=head3 set_score_motifs(<HASH>)
254
255Set a list of sequence motifs that will recieve a penalty (postive number) or
256bonus (negative number) during optimization.
257
258=cut
259
260sub set_score_motifs {
261  my ($self, $var) = @_;
262  $self->{score_motifs} = $var;
263  return $self->{score_motifs};
264}
265
266sub get_score_motifs {
267  my $self = shift;
268  return $self->{score_motifs};
269}
270
271=head3 set_base_probs(<HASH>)
272
273Specify the distribution of nucleotides in the target sequence.
274
275=cut
276
277sub set_base_probs {
278  my ($self, $var) = @_;
279  $self->{base_probs} = $var;
280  return $self->{base_probs};
281}
282
283sub get_base_probs {
284  my $self = shift;
285  return $self->{base_probs};
286}
287
288=head3 set_parameter_file(<STRING>)
289
290Set a parameter file other than rna_turner2004.par.
291
292=cut
293
294sub set_parameter_file {
295  my ($self, $var) = @_;
296  $self->{paramfile} = $var;
297  return $self->{paramfile};
298}
299
300sub get_parameter_file {
301  my $self = shift;
302  return $self->{paramfile};
303}
304
305=head3 set_structures(<ARRAY>)
306
307Set the list of structures for sequence optimization.
308
309=cut
310
311sub set_structures {
312  my $self = shift;
313  my @structs = @_;
314  if ($self->{verb}) {
315    foreach (@structs) {
316      croak "only dot-bracket strings allowed in add_structures()" if m/[^\(\)\.]/g;
317    }
318  }
319  $self->{structures} = [@structs];
320  return $self->{structures};
321}
322
323sub add_structures {
324  my $self = shift;
325  foreach (@_) {
326    if ($self->{verb} && m/[^\(\)\.]/g) {
327      croak "only dot-bracket strings allowed in add_structures()";
328    }
329    push @{$self->{structures}}, $_;
330  }
331  return $self->{structures};
332}
333
334sub get_structures {
335  my $self = shift;
336  return $self->{structures};
337}
338
339=head3 set_cut_point(<INT>)
340
341Set a cut_point when designing two interacting molecules. The cut point
342indicates the first nucleotide of the second sequence.
343
344=cut
345
346sub set_cut_point {
347  my ($self, $var) = @_;
348  if ($self->{verb} && $self->{cut_point} != -1 && $self->{cut_point} != $var) {
349    carp "overwriting old cut_point";
350  }
351  $self->{cut_point} = $var;
352  return $self->{cut_point};
353}
354
355sub get_cut_point {
356  my $self = shift;
357  return $self->{cut_point};
358}
359
360=head3 set_findpath_bound(<INT>)
361
362Set the upper bound for findpath when using B<barr(i,j,t)> in the objective function.
363
364=cut
365
366sub set_findpath_bound {
367  my ($self, $var) = @_;
368  $self->{findpath} = $var;
369  return $self->{findpath};
370}
371
372sub get_findpath_bound {
373  my $self = shift;
374  return $self->{findpath};
375}
376
377=head3 set_base_penalty(<INT>)
378
379Set a weighting factor to correct for desired base percentage.
380
381=cut
382
383sub set_base_penalty {
384  my ($self, $var) = @_;
385  $self->{base_pen} = $var;
386  return $self->{base_pen};
387}
388
389sub get_base_penalty {
390  my $self = shift;
391  return $self->{base_pen};
392}
393
394
395=head3 get_num_of_seqs()
396
397Set the number of sequences after calling explore_sequence_space().
398
399=cut
400
401sub get_num_of_seqs {
402  my $self = shift;
403  return $self->{nos};
404}
405
406=head3 get_num_of_nbors()
407
408Get the number of neighbors of a sequence after calling explore_sequence_space().
409
410=cut
411
412sub get_num_of_nbors {
413  my $self = shift;
414  return $self->{border};
415}
416
417
418=head3 get_fibo(<INT>)
419
420Get the fibronaccy number at position <INT>. The list starts with [0,1] at
421positions 0,1. Note that there is no C<set> routine for fibonacci.
422
423=cut
424
425sub get_fibo {
426  my ($self, $var) = @_;
427  $self->fill_fibo($var) unless defined $self->{fibo}[$var];
428  return $self->{fibo}[$var];
429}
430
431sub fill_fibo {
432  my ($self, $var) = @_;
433  while ($#{$self->{fibo}} < $var) {
434    push @{$self->{fibo}}, $self->{fibo}->[-1] + $self->{fibo}->[-2];
435  }
436  return $self->{fibo};
437}
438
439=head2 find_dependency_paths(@structures)
440
441If @structures is empty, the mutation dependency graph is constructed from all
442structures added with the set_structures() routine. Specify @structures only if
443you do not want all of your structures for the cost-function to be part of the
444depencency graph.
445
446=cut
447
448sub find_dependency_paths {
449  my $self = shift;
450  my @structures = @_;
451  my (@pts, @seen);
452
453  if ($self->{plist}) {
454    #carp "overwriting old dependency-path!\n";
455    $self->{plist}=[];
456  }
457
458  @structures = @{$self->{structures}} unless @structures;
459  croak "no structure input found" unless @structures;
460
461  my @check;
462  for (my $s=0; $s<@structures; ++$s) {
463    my $struct = $structures[$s];
464
465    # TODO: discard if too short!
466    next if $struct !~ m/[\(\)\[\]\.]/;
467
468    # change '[,]' to '(,)'
469    if ($struct =~ m/[\[]/) {
470      if ($struct =~ m/[\)]/) {
471        die "do not mix two different bracket types in one structure!\n";
472      } else {
473        $struct =~ s/\[/\(/g;
474        $struct =~ s/\]/\)/g;
475      }
476    }
477
478    # add, but warn if base-pairs are not compatible
479    my $pt = RNA::Utils::make_pair_table($struct);
480    for my $i (1 .. @$pt) {
481      next unless $$pt[$i]; # next if unpaired
482      my $j=$$pt[$i];
483      if (defined $check[$i]) {
484        my $new=1;
485        foreach (@{$check[$i]}) {
486          if ($_ == $j) {
487            $new=0;
488            $$pt[$i]=$$pt[$j]=0;
489          }
490        }
491        if ($new && @{$check[$i]} > 1) {
492          carp "ignoring basepair $i - $j from structure ".($s+1);
493          $$pt[$i]=$$pt[$j]=0;
494        } elsif ($new) {
495          push @{$check[$i]}, $j;
496        }
497      } else {
498        push @{$check[$i]}, $j;
499      }
500    }
501    push @pts, $pt;
502  }
503
504  # Construct a list of depencency-pathways (ZERO-Based)
505  for my $i (1..$#{$pts[0]}) {
506    next if $seen[$i];
507
508    # initialize path with $i
509    my @path  = ($i-1);
510    $seen[$i] = 1;
511
512    my $fw = 0;
513    my ($j,$cpt);
514
515    # iterate over every pairtable at pos j
516    for my $pt (0 .. $#pts) {
517      $j  = $pts[$pt][$i];
518      $cpt= $pt;
519
520      $fw = !$fw if $j && !$seen[$j];
521      # found a base-pair? go for it
522      while ($j && !$seen[$j]) {
523        if ($fw) {
524          push @path, $j-1;
525        } else {
526          unshift @path, $j-1;
527        }
528        $seen[$j]=1;
529        my $oj=$j;
530        for my $npt (0 .. $#pts) {
531          next if $npt == $cpt;
532          if ($pts[$npt]->[$j] > 0) {
533            $j  = $pts[$npt]->[$j];
534            $cpt= $npt;
535            last;
536          }
537        }
538        $j = ($oj==$j) ? 0 : $j;
539      } # while $j and !seen
540    }
541
542    # duplicate element if we have a cycle
543    if ($j) {
544      croak "messed up cycle!" unless $j-1 == $path[-1];
545      unshift @path, $j-1 unless @path == 2; # 2 => duplicate basepair
546    }
547
548    push @{$self->{plist}}, [@path];
549  }
550  return $self->{plist};
551}
552
553=head2 explore_sequence_space()
554
555This function initializes internal paramters for subsequent sequence
556optimization. It reads the previously computed dependency graph to
557
558=over
559
560=item 1.
561
562update and check the sequence constraint
563
564=item 2.
565
566caclulate the total number of sequences compatible with all constraints
567
568=item 3.
569
570set a stop-condition for optimization from the number of possible mutations
571
572=item 4.
573
574store the number of possible solutions per depencendy-path for subsequent fair
575sampling.
576
577=back
578
579=cut
580
581sub explore_sequence_space {
582  my $self = shift;
583  my $verb = 0;
584
585  my @plist =@{$self->{plist}};
586  my $con   =  $self->{constraint};
587  my %iupac =%{$self->{iupac}};
588
589  croak "need to compute dependency pathways first!" unless @plist;
590  croak "need constraint information!" unless $con;
591
592  my ($border, $max_len, $nos) = (0,0,1);
593  my @slim_plist;
594  my @rand_plist = (0);
595  foreach my $path (@plist) {
596    my @pseq=();
597    my $cycle=0;
598    my $num;
599
600    # Let's do the simple thingy separate
601    if (@$path == 1) {
602      # Get constraint
603      $pseq[0] = substr($con, $path->[0], 1);
604
605      # Statistics (bos, border)
606      $num  = length $iupac{$pseq[0]};
607    } else {
608      $cycle=1 if ($path->[0] eq $path->[-1]);
609
610      # Translate path into nucleotide constraints
611      my $l = ($cycle) ? $#$path-1 : $#$path;
612      push @pseq, substr($con, $$path[$_], 1) for (0 .. $l);
613
614      # Update Constraint
615      @pseq = $self->update_constraint($cycle, @pseq);
616
617      # Enumerate all possible pathways
618      $num = $self->enumerate_pathways($cycle, @pseq);
619    }
620
621    if ($num > 1) {
622      push @slim_plist, $path;# for (1 .. $num);
623      push @rand_plist, $rand_plist[-1]+($num-1);
624      $border += $num;
625      $nos    *= $num;
626    }
627
628    #print "@$path => @pseq | Num: $num\n" if 1;
629    substr($con, $path->[$_], 1, $pseq[$_]) for (0 .. $#pseq);
630  }
631  shift @rand_plist;
632  #print "@rand_plist\n";
633  #print "SLP: ".@slim_plist." => $border\n";
634  #print "$rand_plist[$_] => @{$slim_plist[$_]}\n" foreach (0 .. $#slim_plist);
635  #print "@{$slim_plist[$_]}\n" foreach (0 .. $#slim_plist);
636
637  print "$nos Number of sequences, $border neighbors for each sequence.\n" if $self->{verb};
638
639  $self->{plist}=\@slim_plist;
640  $self->{rlist}=\@rand_plist;
641  $self->{constraint}=$con;
642  $self->{border}=$border unless $self->{border};
643  $self->{nos}=$nos;
644  return ($con, $border, $nos);
645}
646
647=head3 update_constraint($is_cycle, @path)
648
649Rewrite the constraint for a particular dependency path, such that starting an
650arbitrary position would result in a correct path: "NNNRNNN" => "YRYRYRY";
651
652=cut
653
654sub update_constraint {
655  my $self = shift;
656  my $cycle = shift;
657  my @pseq  = @_;
658
659  croak "cycle of uneven length!" if $cycle && @pseq % 2;
660
661  my $jailbreak=0;
662  while (1) {
663    my $mutated = 0;
664    my $i;
665    if ($cycle) {
666      for $i (-$#pseq .. $#pseq-1) {
667        next if $pseq[$i] eq 'N';
668        $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i+1]);
669      }
670      for $i (reverse(-$#pseq .. $#pseq)) {
671        next if $pseq[$i] eq 'N';
672        $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i-1]);
673      }
674    } else {
675      for $i (0 .. $#pseq-1) {
676        next if $pseq[$i] eq 'N';
677        $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i+1]);
678      }
679      for $i (reverse(1 .. $#pseq)) {
680        next if $pseq[$i] eq 'N';
681        $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i-1]);
682      }
683    }
684    last unless $mutated;
685    croak "escaping from seemingly endless constraint updates" if $jailbreak++ > 100;
686  }
687  return @pseq;
688}
689
690=head3 enumerate_pathways($is_cycle, @path)
691
692For a given depencendy path, calculate the number of compatible sequences, and
693initialze data-structures for C<make_pathseq()>. If it is an unconstrained
694path, use the fibronacci numbers to enumerate pathways. If it is a constrained
695path, exhaustivley count and store the solutions in a tree structure. If the
696path is constrained and longer than C<max_const_plen>, estimate the number of
697pathways with the fibronacci numbers.
698
699=cut
700
701sub enumerate_pathways {
702  my $self = shift;
703  my $cycle= shift;
704  my @pseq = @_;
705
706  my $pstr = join '', @pseq;
707  my $plen = length $pstr;
708
709  croak "cycles must have even length" if $cycle && $plen%2;
710  #croak "first call update_constraint(), then enumerate_pathways()!" if ($pstr =~ m/[N]/) && ($pstr =~ m/[^N]/);
711
712  my %solutions = %{$self->{solution_space}};
713  my $max_plen  = $self->{max_const_plen};
714  my %iupac     = %{$self->{iupac}};
715  my %base      = %{$self->{base}};
716  my @pair      = @{$self->{pair}};
717
718  if (exists $solutions{$pstr.$cycle}) {
719    return scalar($solutions{$pstr.$cycle}->get_leaves);
720  } elsif ($pstr =~ m/[N]/ && $pstr !~ m/[^N]/) {
721    # its a path with all N's => (fibronacci: switch.pl)
722    my $l = ($cycle) ? $plen-1 : $plen;
723    return 2*($self->get_fibo($plen+1) + $self->get_fibo($l));
724  }
725  # enable this as soon as fair sampling in this case works!
726  # elsif ($pstr !~ m/[^RY]/g) {
727  #   # its a path with all RY's
728  #   my $l = ($cycle) ? $plen-1 : $plen;
729  #   return ($self->get_fibo($plen+1) + $self->get_fibo($l));
730  # }
731  #
732  elsif ($plen > $max_plen) {
733    # path too long to enumerate with constraints
734    carp "constrained path too long, fallback to greedy heuristic!";
735    my $l = ($cycle) ? $plen-1 : $plen;
736    return ($self->get_fibo($plen+1) + $self->get_fibo($l));
737    # calculate RYRYR as NNNN/2 and use greedy-shuffle
738
739    # compute the theoretical sequence length (n) to construct an
740    # example that breaks plen: n = (plen-1)*4 + 1
741    # so for plen=26 => n=101
742    # if we assume a helix has 4 base-pairs, it is:
743    # n = (plen-1)*12 + 1 = 301
744  }
745
746  my $tree = RNA::Design::Tree->new();
747  my @le = $tree->get_leaves;
748  $tree->init_new_leaves;
749  foreach (split //, $iupac{$pseq[0]}) {
750    $tree->push_to_current_leaves($_, $le[0]);
751  }
752
753  for my $i (1 .. $#pseq) {
754    my @choices = split //, $iupac{$pseq[$i]};
755    my @leaves  = $tree->get_leaves;
756    $tree->init_new_leaves;
757
758    if ($i == $#pseq && $cycle) {
759      foreach my $l (@leaves) {
760        foreach my $c (@choices) {
761          if ($pair[$base{$$l[0]}][$base{$c}]) {
762            # check if $c also pairs with anything in the first row!
763            my $string = $tree->get_full_path($l);
764            my $f = substr $string, 0, 1;
765            if ($pair[$base{$c}][$base{$f}]) {
766              $tree->push_to_current_leaves($c, $l);
767            }
768
769          }
770        }
771      }
772    } else {
773      foreach my $c (@choices) {
774        foreach my $l (@leaves) {
775          if ($pair[$base{$$l[0]}][$base{$c}]) {
776            $tree->push_to_current_leaves($c, $l);
777          }
778        }
779      }
780    }
781  }
782
783  #DEBUG:
784  #print $pstr.$cycle."\n";
785  #foreach ($tree->get_leaves) {
786  #  print $tree->get_full_path($_)."\n";
787  #}
788
789  $self->{solution_space}->{$pstr.$cycle} = $tree;
790  return scalar($tree->get_leaves);
791}
792
793=head3 rewrite_neighbor($iupac, \$iupac)
794
795Takes a IUPAC letter and a reference to a base-pairing IUPAC letter. Updates
796the IUPAC code if necessary and returns 0 or 1 if there exists a neighbor or
797not.
798
799=cut
800
801sub rewrite_neighbor {
802  my ($self, $c1, $c2) = @_;
803  print "$c1 -> $$c2\n" if 0;
804
805  my $nb = $self->{neighbor}->{$c1};
806  if ($nb eq $$c2) {
807    return 0;
808  } else {
809    my $bin_c2 = $self->{iupac_bin}{$$c2};
810    my $bin_nb = $self->{iupac_bin}{$nb};
811
812    my $new_nb = $self->{bin_iupac}[($bin_c2 & $bin_nb)];
813    croak "sequence constraints cannot be fulfilled\n" unless $new_nb;
814
815    return 0 if $new_nb eq $$c2;
816
817    $$c2 = $new_nb;
818    return 1;
819  }
820}
821
822=head2 find_a_sequence()
823
824Uses the dependency graph and the sequence constraint to make a random sequence.
825
826=cut
827
828sub find_a_sequence {
829  # make random sequence or randomly mutate sequence
830  # by replacing all @plists by random paths
831  my $self = shift;
832
833  my @plist=@{$self->{plist}};
834  my $con  = $self->{constraint};
835  my $seq  = $con;
836
837  foreach my $path (@plist) {
838    my @pseq = ();
839    my $cycle=0;
840
841    $cycle = 1 if ($$path[0] eq $$path[-1] && (@$path>1));
842
843    # Translate path into nucleotide constraints
844    my $l = ($cycle) ? $#$path-1 : $#$path;
845    push @pseq, substr($con, $$path[$_], 1) for (0 .. $l);
846
847    @pseq = $self->make_pathseq($cycle, @pseq);
848
849    substr($seq, $$path[$_], 1, $pseq[$_]) for (0 .. $#pseq);
850  }
851  return $seq;
852}
853
854=head2 optimize_sequence(sequence, maximum_number_of_mutations)
855
856The standard optimization function. Whenever a sequence mutation results in a
857better score from the cost-function, replaces the current solution. In case
858there are too many useless mutations, (see explore_sequence_space()), the
859current sequence is considered local-optimal and returned.
860
861=cut
862
863sub optimize_sequence {
864  my $self  = shift;
865  my $refseq= shift;
866  my $m     = shift;
867
868  my $verb    = $self->{verb};
869  my $border  = $self->{border};
870  my $refcost = $self->eval_sequence($refseq);
871  my ($mutseq,$newcost);
872
873  my $reject = 0;
874  my %seen = ();
875  for my $d (1..$m) {
876    $mutseq = $self->mutate_seq($refseq);
877    if (!exists $seen{$mutseq}) {
878      $seen{$mutseq}=1;
879      $newcost = $self->eval_sequence($mutseq);
880      if ($newcost < $refcost) {
881        $refseq = $mutseq;
882        $refcost= $newcost;
883        $reject = 0;
884        printf STDERR "%4d %s %6.3f\n", $d, $mutseq, $newcost if $verb;
885      }
886    }
887    else {
888      $seen{$mutseq}++;
889      last if (++$reject >= 2*$border);
890    }
891  }
892  return $refseq;
893}
894
895=head3 mutate_seq($sequence)
896
897Choose a random depencency path, mutate it using make_pathseq() of the sequence
898constraint. Choses randomly according to the number of solutions, but can be
899implemented more efficiently in C<log(n)> time!
900
901=cut
902
903sub mutate_seq {
904  my $self  = shift;
905  my $seq   = shift;
906
907  my $con   = $self->{constraint};
908  my @plist = @{$self->{plist}};
909  my @rlist = @{$self->{rlist}};
910
911  my $reject_same_solution=1;
912
913  my ($path, @pseq, @ref_pseq);
914  my $cycle = 0;
915
916  # chose a random path => weight by number of solutions to make it fair!
917  return $seq unless defined $rlist[-1];
918  my $rand = (int rand $rlist[-1]) + 1;
919  for (my $i=0; $i<=$#plist; ++$i) {
920    if ($rand <= $rlist[$i]) {
921      $path = \@{$plist[$i]};
922      last;
923    }
924  }
925
926  # this would be random independent of the pathlength!
927  #$path = $plist[int rand @plist];
928  #print "@$path\n";
929
930  $cycle = 1 if ($$path[0] eq $$path[-1] && (@$path>1));
931
932  my $l = ($cycle) ? $#$path-1 : $#$path;
933  push @pseq, substr($con, $$path[$_], 1) for (0 .. $l);
934
935  if ($reject_same_solution) {
936    push @ref_pseq, substr($seq, $$path[$_], 1) for (0 .. $l);
937    my @npseq;
938    my $jailbreak = 0;
939    do {
940      @npseq = $self->make_pathseq($cycle, @pseq);
941      last if ++$jailbreak > 100;
942    } while (join('', @ref_pseq) eq join('', @npseq));
943    @pseq = @npseq;
944  } else {
945    @pseq = $self->make_pathseq($cycle, @pseq);
946  }
947
948  substr($seq, $$path[$_], 1, $pseq[$_]) for (0 .. $#pseq);
949  return $seq;
950}
951
952=head3 make_pathseq($is_cycle, @path)
953
954Takes an Array of IUPAC code and rewrites it into a valid array of Nucleotides.
955There are four cases: (i) path of length 1 is a randomly shuffled nucleotide
956accoriding to IUPAC-letter, (ii) The solution-tree has been built during
957explore_sequence_space() and so we use it to sample, (iii) the sequence is only
958N's, so the fibronacci numbers are used for sampling (i.e. the switch.pl
959method), and (iv) the path is constrained and longer than C<max_const_plen>, so
960the paths are sampled using a greedy heuristic.
961
962=cut
963
964sub make_pathseq {
965  my $self = shift;
966  my $cycle= shift;
967  my @pseq = @_;
968
969  my $pstr = join '', @pseq;
970  my $plen = length $pstr;
971
972  croak "cycles must have even length" if $cycle && $plen%2;
973
974  my %iupac     = %{$self->{iupac}};
975  my %iupac_bin = %{$self->{iupac_bin}};
976  my %solutions = %{$self->{solution_space}};
977  my $max_plen  =   $self->{max_const_plen};
978
979  if ($plen == 1) {
980    # if path length 1 => return random iupac
981    my @i = split '', $iupac{$pstr};
982    return ($i[int rand @i]);
983  } elsif (exists $solutions{$pstr.$cycle}) {
984    my $tree = $solutions{$pstr.$cycle};
985    my @i = $tree->get_leaves;
986    return (split '', $tree->get_full_path($i[int rand @i]));
987  } elsif ($pstr !~ m/[^N]/g) {
988    # its a path with all N's => do it like switch.pl
989
990    #TODO: need to implement this method for RY-pathways as well
991
992    # This is original switch.pl code that I don't understand!
993    my $l = $plen;
994    my $ll = ($cycle) ? $l-1 : $l;
995    my $n = 2*($self->get_fibo($l+1) + $self->get_fibo($ll));
996
997    my $rand = rand($n);
998
999    my @seq = ();
1000    # set first base in sequence
1001    if ($rand < $self->get_fibo($ll)) {
1002      push @seq, 'A', 'U';
1003    } elsif ($rand < 2*$self->get_fibo($ll)) {
1004      push @seq, 'C', 'G';
1005      $rand -= $self->get_fibo($ll);
1006    } else {
1007      $rand -= 2*$self->get_fibo($ll);
1008      $ll=$l;
1009      push @seq, ($rand>=$self->get_fibo($l+1))?'U':'G';
1010      $rand -= $self->get_fibo($l+1) if $rand >= $self->get_fibo($l+1);
1011    }
1012
1013    # grow sequence to length $l
1014    # if we have a cycle starting with A or C $ll=$l-1, else $ll=$l
1015    while (@seq < $l) {
1016      if ($rand < $self->get_fibo($ll-@seq)) {
1017        push @seq, 'C','G' if ($seq[-1] eq 'G');
1018        push @seq, 'A','U' if ($seq[-1] eq 'U');
1019      } else {
1020        $rand -= $self->get_fibo($ll-@seq);
1021        push @seq, ($seq[-1] eq 'G') ? 'U' : 'G';
1022      }
1023    }
1024    pop @seq if (@seq > $l); # in case we've added one base too many
1025    return @seq;
1026  } else {
1027    # do a greedy constraint shuffling!
1028    croak "some special case not covered yet! (@pseq)" unless $plen > $max_plen;
1029
1030    my @path = @pseq;
1031    for (my $i=0; $i<=$#path; ++$i) {
1032      my $c = $path[$i];
1033      my @i = split '', $iupac{$c};
1034
1035      $path[$i] = $i[int rand @i];
1036
1037      if ($i==0 && $cycle) {
1038        my ($a, $j) = ($path[$i], -1);
1039        $a=$path[$j--] while $self->rewrite_neighbor($a, \$path[$j]);
1040      }
1041
1042      $self->rewrite_neighbor($path[$i], \$path[$i+1]) if $i < $#path;
1043    }
1044    return @path;
1045  }
1046  return;
1047}
1048
1049=head2 eval_sequence($sequence)
1050
1051Evaluate a given sequence according to the cost function. The final score is
1052calculated as the sum over three values:
1053
1054(1) The objective function is a simplified interface to access functions of the
1055*ViennaRNA package*. Every input secondary structure *can* serve as full target
1056conformation or structure constraint. The objective function can include terms
1057to compute the free energy of a target structure, the (constrained) ensemble
1058free energy, the (conditional) probabilities of secondary structure elements,
1059the accessibility of subsequences and the direct-path barriers between two
1060structures. All of these terms exist for linear, circular, and cofolded
1061molecules, as well as for custom specified temperatures. In the following
1062examples, the indices B<i, j> correspond to the secondary structures specified
1063in the input file, B<t> is optional to specify the temperature in Celsius. By
1064default, computations use the standard temperature of 37C.
1065
1066
1067
10682) score_motifs(): A set of sequence motifs that receive an extra penalty.
1069Whenever one of these motifs is found in a sequence, its penalty (or bonus) is
1070added to the score of the objective function.
1071
10723) base_prob(): A vector with nuleotide distributions is compared with a
1073target distribution vector. The similarity between the vectors is expressed
1074as a value s=[0,1]. In order to include and weight this similarity in the
1075final score, the penalty is calculated as (1-s)*k, where k is a constant
1076adjustable using set_base_penalty()
1077
1078=cut
1079
1080sub eval_sequence {
1081  my $self = shift;
1082  my $seq  = shift;
1083
1084  my $verb = $self->{verb};
1085  my $ofun = $self->{optfunc};
1086  my $ParamFile = $self->{paramfile};
1087  warn "$seq\n".$ofun."\n" if $verb > 1;
1088  croak "no structures specified!\n" unless @{$self->{structures}};
1089
1090  my %results;
1091  $RNA::fold_constrained=1;
1092  $RNA::cut_point=$self->{cut_point};
1093  RNA::read_parameter_file($ParamFile) if ($ParamFile);
1094  foreach my $func (qw/eos eos_circ efe efe_circ prob prob_circ barr acc/) {
1095    while ($ofun =~ m/$func\(([0-9\,\s]*)\)/) {
1096      warn "next: $&\n" if $verb > 1;
1097
1098      if (exists $results{$&}) {
1099        #print "OR: $& => $results{$&}\n";
1100      } else {
1101        my $res = eval("\$self->$func(\$seq, $1)");
1102        $res = sprintf "%.2f", $res;
1103        croak $@ if $@;
1104        #print "NR: $& => $res\n";
1105        $results{$&}=$res;
1106      }
1107      $ofun =~ s/$func\(([0-9\,\s]*)\)/ $results{$&} /;
1108    }
1109  }
1110  $RNA::cut_point=-1;
1111  $RNA::fold_constrained=0;
1112
1113  warn $ofun."\n" if $verb > 1;
1114  my $r = eval $ofun;
1115  croak $@ if $@;
1116
1117  my $bpen = $self->{base_pen};
1118
1119  my $p = ($bpen) ? $self->base_prob($seq)*$bpen : 0;
1120  my $s = $self->score_motifs($seq);
1121  #print "$r, $p, $s, \n";
1122
1123  return $r+$p+$s;
1124}
1125
1126sub score_motifs {
1127  my ($self, $seq) = @_;
1128  my $penalty = 0;
1129
1130  my %motifs = %{$self->{score_motifs}};
1131  my $cpnt = $self->{cut_point};
1132
1133  if ($cpnt == -1) {
1134    foreach my $string (keys %motifs) {
1135      $penalty += $motifs{$string} while ($seq =~ m/$string/g);
1136    }
1137  } else {
1138    my $left  = substr $seq, 0, $cpnt-1;
1139    my $right = substr $seq, $cpnt-1;
1140    #print "$left&$right\n";
1141    foreach my $string (keys %motifs) {
1142      $penalty += $motifs{$string} while ($left   =~ m/$string/g);
1143      $penalty += $motifs{$string} while ($right  =~ m/$string/g);
1144    }
1145  }
1146  return $penalty;
1147}
1148
1149sub base_prob {
1150  my ($self, $seq) = @_;
1151
1152  my %prob = %{$self->{base_probs}};
1153  my %base;
1154  $base{$_}++ foreach (split //, $seq);
1155
1156  my $cost=1;
1157  foreach my $k (keys %base) {
1158    $base{$k} /= length $seq;
1159    # print "Dist $k $base{$k} vs $prob{$k} => \n";
1160
1161    #$cost += (abs($base{$k}-$prob{$k})/$prob{$k});
1162    $cost -= sqrt($base{$k}*$prob{$k});
1163
1164    #print "$base{$k}, $prob{$k}, $cost\n";
1165  }
1166  return $cost;
1167}
1168
1169# sub ensemble_defect {
1170#   my ($self, $seq, $i, $t) = @_;
1171#   my $probs = get_base_pair_probs();
1172#
1173# }
1174#
1175# sub cofold_defect2 {
1176#   my ($self, $seq) = @_;
1177#
1178#   my $cpnt = $self->{cut_point};
1179#   my $left  = substr $seq, 0, $cpnt-1;
1180#   my $right = substr $seq, $cpnt-1;
1181#
1182#   $RNA::cut_point = $cpnt = length($right)+1;
1183#   my ($costruct, $comfe) = RNA::cofold($right.$right);
1184#   substr $costruct, $cpnt-1,0,'&';
1185#   $RNA::cut_point = -1;
1186#   #print "$costruct, $comfe:\n";
1187#   return 1 if ($comfe >= 0);
1188#
1189#   my @chars = split '', $costruct;
1190#   my @stack;
1191#   my $DIM=0;
1192#   for my $i (0 .. $#chars) {
1193#     if ($chars[$i] eq '&') {
1194#       $DIM = (@stack) ? 1 : 0;
1195#       # got two monomers
1196#       last;
1197#     } elsif ($chars[$i] eq '.') {
1198#       next;
1199#     } elsif ($chars[$i] eq '(') {
1200#       push @stack, ($i);
1201#     } elsif ($chars[$i] eq ')') {
1202#       my $j = pop @stack;
1203#       if ($j < $cpnt && $cpnt < ($i)) {
1204#         $DIM=1; last;
1205#       }
1206#     }
1207#   }
1208#   return ($DIM) ? -$comfe : 1;
1209# }
1210
1211=head2 objective function tools
1212
1213Subfunctions that can be specified in the objective function.
1214
1215=cut
1216
1217=head3 barr(i, j, t)
1218
1219direct path energy barrier from input structure number i to input structure
1220number j computed using findpath. The upper bound of findpath is adjustable
1221with set_findpath_bound()
1222
1223=cut
1224
1225sub barr {
1226  my ($self, $seq, $i, $j, $t) = @_;
1227  $t = 37 unless defined $t;
1228  $RNA::temperature=$t;
1229
1230  #TODO: make set_routine
1231  my $fpd = $self->{findpath};
1232
1233  croak "cannot find structure number $i" if $i && !$self->{structures}[$i-1];
1234  croak "cannot find structure number $j" if $i && !$self->{structures}[$j-1];
1235
1236  my $s_i = ($i) ? $self->{structures}[$i-1] : undef;
1237  my $s_j = ($j) ? $self->{structures}[$j-1] : undef;
1238
1239  my $e_i = sprintf("%.2f", RNA::energy_of_structure($seq, $s_i, 0));
1240  my $sE = RNA::find_saddle($seq, $s_i, $s_j, $fpd);
1241
1242  return sprintf("%.2f", ($sE/100)-$e_i);
1243}
1244
1245=head3 prob(i, j, t)
1246
1247Probability of input structure number B<i> given input structure number B<j>.
1248The probability is computed from the equilibrium partition functions:
1249B<Pr(i|j)=Z_i/Z_j>. Hence, the constraint B<i> B<must> include the constraint
1250of B<j> B<(Pr(i|j)=Z_i+j/Z_j)>. Omitting B<j>, or specifying B<j=0> computes the
1251probability of B<i> in the unconstrained ensemble B<Pr(i)=Z_i/Z>.
1252
1253=cut
1254
1255sub prob {
1256  my ($self, $seq, $i, $j, $t) = @_;
1257  $t = 37 unless defined $t;
1258  $RNA::temperature=$t;
1259  my $kT=0.6163207755;
1260  if ($t != 37) {
1261    $kT /= 310.15;
1262    $kT *= ($t+273.15);
1263  }
1264
1265  croak "prob(): no structure input" unless $i;
1266  croak "prob(): cannot find structure number $i" if !$self->{structures}[$i-1];
1267  croak "prob(): cannot find structure number $j" if $j && !$self->{structures}[$j-1];
1268
1269  my $s_i = $self->{structures}[$i-1];
1270  my $s_j = ($j) ? $self->{structures}[$j-1] : undef;
1271
1272  # A hack to make sure people do not fuck up their probability calculations
1273  # if ($s_j) {
1274  #   print $s_i."\n";
1275  #   my $c=0;
1276  #   foreach (split //, $s_i) {
1277  #     if ($_ ne '.') {
1278  #       print "$c, $_ vs ".substr($s_j, $c, 1)."\n";
1279  #       substr($s_j, $c, 1, $_);
1280  #     }
1281  #     $c++;
1282  #   }
1283  # }
1284
1285  my ($dGi, $dGj, $tmp);
1286  if ($RNA::cut_point == -1) {
1287    # seemingly useless string modification
1288    # to avoid ViennaRNA SWIG interface bugs
1289    $tmp=$s_i; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); }
1290    $dGi = RNA::pf_fold($seq, $tmp);
1291    $tmp=$s_j; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); }
1292    $dGj = RNA::pf_fold($seq, $tmp);
1293  } else {
1294    $tmp=$s_i; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); }
1295    $dGi = RNA::co_pf_fold($seq, $tmp);
1296    $tmp=$s_j; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); }
1297    $dGj = RNA::co_pf_fold($seq, $tmp);
1298  }
1299
1300  return exp(($dGj-$dGi)/$kT);
1301}
1302
1303=head3 prob_circ(i, j, t)
1304
1305prob(i,j,t) for circular molecules.
1306
1307=cut
1308
1309sub prob_circ {
1310  my ($self, $seq, $i, $j, $t) = @_;
1311  $t = 37 unless defined $t;
1312  $RNA::temperature=$t;
1313  my $kT=0.6163207755;
1314  if ($t != 37) {
1315    $kT /= 310.15;
1316    $kT *= ($t+273.15);
1317  }
1318
1319  croak "prob_circ(): no structure input" unless $i;
1320  croak "prob_circ(): cannot find structure number $i" if !$self->{structures}[$i-1];
1321  croak "prob_circ(): cannot find structure number $j" if $j && !$self->{structures}[$j-1];
1322  carp  "prob_circ(): ignoring cut_point" if ($RNA::cut_point != -1);
1323
1324  my $s_i = $self->{structures}[$i-1];
1325  my $s_j = ($j) ? $self->{structures}[$j-1] : undef;
1326
1327  my ($dGi, $dGj, $tmp);
1328  # seemingly useless string modification
1329  # to avoid ViennaRNA SWIG interface bugs
1330  $tmp=$s_i; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); }
1331  $dGi = RNA::pf_circ_fold($seq, $tmp);
1332  $tmp=$s_j; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); }
1333  $dGj = RNA::pf_circ_fold($seq, $tmp);
1334
1335  return exp(($dGj-$dGi)/$kT);
1336}
1337
1338=head3 acc(i, j, t)
1339
1340Accessibility of an RNA/DNA motif. This function is exactly the same as
1341prob(i,j,t), however, it is ment to be used with constraints that use the
1342character 'x' to specify strictly unpaired regions.
1343
1344=cut
1345
1346sub acc {
1347  return prob(@_);
1348}
1349
1350=head3 acc_circ(i, j, t)
1351
1352acc(i,j,t) for circular molecules.
1353
1354=cut
1355
1356sub acc_circ {
1357  return prob_circ(@_);
1358}
1359
1360=head3 eos(i, t)
1361
1362Free energy of input structure number B<i> at temperature B<t>.
1363
1364=cut
1365
1366sub eos {
1367  my ($self, $seq, $i, $t) = @_;
1368  $t = 37 unless defined $t;
1369  $RNA::temperature=$t;
1370  croak "eos(): no structure input" unless $i;
1371  croak "eos(): cannot find structure number $i" unless $self->{structures}[$i-1];
1372  my $str = $self->{structures}[$i-1];
1373  # ($str =~ m/[\(\)]/) ? k
1374  return RNA::energy_of_struct($seq, $str);
1375}
1376
1377=head3 eos_circ(i, t)
1378
1379Free energy of the circular input structure number B<i> at temperature B<t>.
1380
1381=cut
1382
1383sub eos_circ {
1384  my ($self, $seq, $i, $t) = @_;
1385  $t = 37 unless defined $t;
1386  $RNA::temperature=$t;
1387  croak "eos_circ(): no structure input" unless $i;
1388  croak "eos_circ(): cannot find structure number $i" unless $self->{structures}[$i-1];
1389  carp  "eos_circ(): ignoring cut_point" if ($RNA::cut_point != -1);
1390  my $str = $self->{structures}[$i-1];
1391  return RNA::energy_of_circ_struct($seq, $str);
1392}
1393
1394=head3 efe(i, t)
1395
1396Free energy of a secondary structure ensemble, constraint to be compatible with
1397input structure number B<i>. Omitting B<i>, or specifying B<i=0> computes the
1398unconstraint ensemble free energy.
1399
1400=cut
1401
1402sub efe {
1403  my ($self, $seq, $i, $t) = @_;
1404  $t = 37 unless defined $t;
1405  $RNA::temperature=$t;
1406  croak "efe(): cannot find structure number $i" if $i && !$self->{structures}[$i-1];
1407
1408  my $str = ($i) ? $self->{structures}[$i-1] : undef;
1409  # seemingly useless string modification
1410  # to avoid ViennaRNA SWIG interface bugs
1411  if ($str) {
1412    $str.='.'; $str=substr($str,0,-1);
1413  }
1414  return ($RNA::cut_point == -1) ? RNA::pf_fold($seq, $str) : RNA::co_pf_fold($seq, $str);
1415}
1416
1417=head3 efe_circ(i, t)
1418
1419Free energy of a circular secondary structure ensemble, constraint to be
1420compatible with input structure number B<i>. Omitting B<i>, or specifying
1421B<i=0> computes the unconstraint ensemble free energy.
1422
1423=cut
1424
1425sub efe_circ {
1426  my ($self, $seq, $i, $t) = @_;
1427  $t = 37 unless defined $t;
1428  $RNA::temperature=$t;
1429  croak "efe_circ(): cannot find structure number $i" if $i && !$self->{structures}[$i-1];
1430  carp  "efe_circ(): ignoring cut_point" if ($RNA::cut_point != -1);
1431
1432  my $str = ($i) ? $self->{structures}[$i-1] : undef;
1433  # seemingly useless string modification
1434  # to avoid ViennaRNA SWIG interface bugs
1435  if ($str) { $str.='.'; $str=substr($str,0,-1); }
1436  return RNA::pf_circ_fold($seq, $str);
1437}
1438
14391;
1440
1441package RNA::Design::Tree;
1442
1443use strict;
1444use warnings;
1445use Data::Dumper;
1446
1447sub new {
1448  my $class = shift;
1449  my $self  = {
1450    tree => [
1451      [
1452        ['root', undef]
1453      ]
1454    ],
1455  };
1456
1457  bless $self, $class;
1458  return $self;
1459}
1460
1461sub init_new_leaves {
1462  my $self = shift;
1463  push @{$self->{tree}}, [];
1464  return $self->{tree};
1465}
1466
1467sub push_to_current_leaves {
1468  my $self = shift;
1469  my ($string, $parent) = @_;
1470
1471  my $tree = $self->{tree};
1472  my $leaves = $$tree[-1];
1473
1474  push @$leaves, [$string, $parent];
1475
1476  return $self->{tree};
1477}
1478
1479sub get_leaves {
1480  my $self = shift;
1481  return @{${$self->{tree}}[-1]};
1482}
1483
1484sub get_first {
1485  my $self = shift;
1486  return @{${$self->{tree}}[1]};
1487}
1488
1489sub get_full_path {
1490  my $self  = shift;
1491  my $leave = shift;
1492
1493  my $string;
1494  my ($char, $ref) = @$leave;
1495  while ($char ne 'root') {
1496    $string .= $char;
1497    ($char, $ref) = @$ref;
1498  }
1499  return reverse $string;
1500}
1501
15021;
1503
1504=head1 AUTHOR
1505
1506Stefan Badelt (stef@tbi.univie.ac.at)
1507
1508=head1 VERSION-LOG
1509
1510  1.10 -- changed in cost function interface:
1511       *added acc(i,j,t)
1512       *added acc_circ(i,j,t)
1513       *changed score_motifs():
1514            reads a hash of (motif => score) pairs
1515            and adds the scores to the cost-function
1516       *changed base_prob():
1517            compares vector of nucleotide frequencies with specified vector
1518       *added base_penalty:
1519            is a factor to weight base_prob() score
1520       *changed eval_sequence():
1521          now computes score as sum over
1522          objective + base_prob + score_motifs
1523
1524       -- fixed SWIG interface problems for:
1525          prob(i,j,t),
1526          prob_circ(i,j,t)
1527
1528       -- bugfix in find_dependency_paths()
1529
1530  1.00 -- initial release (ViennaRNA-v2.2)
1531
1532=cut
1533