1package RNA::Design; 2use strict; 3use warnings; 4use RNA; 5use RNA::Utils; 6use Carp; 7use Data::Dumper; 8 9use Exporter; 10 11our $VERSION = 1.10; 12our @ISA = qw(Exporter); 13our @EXPORT = (); 14our @EXPORT_OK = (); 15 16=head1 NAME 17 18B<RNA::Design> -- plug-and-play design of nucleic acid sequences 19 20=head1 DESCRIPTION 21 22This package provides various functions to design RNA or DNA molecules. 23Sequences may be optimized to fold into single or multiple conformations. The 24properties of the target molecules have to be specified in the Main Object: 25C<$Design = RNA::Design-E<gt>new()>. 26 27B<RNA::Design> supports structure constraints from multiple secondary 28structures in dot-bracket notation and sequence constraints in I<IUPAC> code 29(i.e. A, C, G, U/T, N, R, Y, S, M, W, K, V, H, D, B). Both sequence and 30structure constraints are strictly enforced during the design process. However, 31B<RNA::Design> avoids difficulties of multi-stable designs where a single 32nucleotide has more than two dependencies. In that case, base-pair constraints 33are not strictly enforced, but still evaluated in the objective function. A 34warning will be printed to *STDERR*. 35 36Sequence optimization functions can be composed of the sub-functions: 37C<eos(i,t)> (energy of structure), C<efe(i,t)> (ensemble free energy), 38C<prob(i,j,t)> (probability of structure), C<acc(i,j,t)> (accessibility of 39region), C<barr(i,j,t)> (direct path barrier). C<i> and C<j> stand for the 40number of the input-structure and C<t> is optional to specify a temperature 41other than 37 Celsius. These functions exist for circular sequences by 42attatching a C<_circ> flag (for example C<eos_circ(i,t)>. More detailed 43explanation follows below. 44 45By default, two independent penalties are added to the objective function: 46set_base_probs() corrects for a specified distribution of nucleotides, 47set_score_motifs() penalizes particular subsequences. 48 49=head1 METHODS 50 51=cut 52 53=head2 new() 54 55Initialize the Global Object for Nucleic Acid Design. It contains various 56public elements, such as a list of structures specified in the cost- function, 57the cost-function itself, a target distribution of nucleotide counts, and a set 58of penalized nucleic acid sequences. See get/set routines for description of 59public functions. 60 61=cut 62 63sub new { 64 my $class = shift; 65 my $self = { 66 # Internal Stuff 67 fibo => [0,1], 68 border=> 0, 69 nos => undef, 70 plist => [], 71 rlist => [], 72 73 # Default options 74 score_motifs => ({ 75 AAAAA => 5, 76 CCCCC => 5, 77 GGGGG => 5, 78 UUUUU => 5 79 }), 80 base_probs => ({ 81 A => 0.25, 82 C => 0.25, 83 G => 0.25, 84 U => 0.25 85 }), 86 paramfile => '', 87 verb => 0, 88 89 structures => [], 90 cut_point => -1, 91 findpath => 10, 92 base_pen => 500, 93 constraint => '', 94 optfunc => 'eos(1)+eos(2) - 2*efe() + 0.3*(eos(1)-eos(2)+0.00)**2', 95 96 base => ({ 97 'A' => 0, 98 'C' => 1, 99 'G' => 2, 100 'U' => 3, 101 }), 102 103 pair => [ 104 #A C G U 105 [0,0,0,1], # A 106 [0,0,1,0], # C 107 [0,1,0,1], # G 108 [1,0,1,0] # U 109 ], 110 111 max_const_plen => 20, 112 solution_space => ({ 113 114 }), 115 116 iupac => ({ 117 'A' => 'A', 118 'C' => 'C', 119 'G' => 'G', 120 'T' => 'U', 121 'U' => 'U', 122 'R' => 'AG', # purine 123 'Y' => 'CU', # pyrimidine 124 'S' => 'CG', 125 'M' => 'AC', 126 'W' => 'AU', 127 'K' => 'GU', 128 'V' => 'ACG', # not T 129 'H' => 'ACU', # not G 130 'D' => 'AGU', # not C 131 'B' => 'CGU', # not A 132 'N' => 'ACGU' 133 }), 134 neighbor => ({ # ACGU => ACGU 135 'A' => 'U', # 1000 => 0001 136 'C' => 'G', # 0100 => 0010 137 'G' => 'Y', # 0010 => 0101 138 'U' => 'R', # 0001 => 1010 139 'R' => 'Y', # 1010 => 0101 140 'Y' => 'R', # 0101 => 1010 141 'S' => 'B', # 0110 => 0111 142 'M' => 'K', # 1100 => 0011 143 'W' => 'D', # 1001 => 1011 144 'K' => 'N', # 0011 => 1111 145 'V' => 'B', # 1110 => 0111 146 'H' => 'D', # 1101 => 1011 147 'D' => 'N', # 1011 => 1111 148 'B' => 'N', # 0111 => 1111 149 'N' => 'N', # 1111 => 1111 150 }), 151 iupac_bin => ({ # ACGU 152 'A' => 8, # 1000, 153 'C' => 4, # 0100, 154 'G' => 2, # 0010, 155 'T' => 1, # 0001, 156 'U' => 1, # 0001, 157 'R' => 10, # 1010, # purine 158 'Y' => 5, # 0101, # pyrimidine 159 'S' => 6, # 0110, 160 'M' => 12, # 1100, 161 'W' => 9, # 1001, 162 'K' => 3, # 0011, 163 'V' => 14, # 1110, # not T 164 'H' => 13, # 1101, # not G 165 'D' => 11, # 1011, # not C 166 'B' => 7, # 0111, # not A 167 'N' => 15, # 1111, 168 }), 169 bin_iupac => [ # ACGU 170 '', # 0000 0 171 'U', # 0001 1 172 'G', # 0010 2 173 'K', # 0011 3 174 'C', # 0100 4 175 'Y', # 0101 5 176 'S', # 0110 6 177 'B', # 0111 7 178 'A', # 1000 8 179 'W', # 1001 9 180 'R', # 1010 10 181 'D', # 1011 11 182 'M', # 1100 12 183 'H', # 1101 13 184 'V', # 1110 14 185 'N' # 1111 15 186 ], 187 }; 188 189 190 bless $self, $class; 191 return $self; 192} 193 194 195=head2 get/set parameters 196 197Get and Set parameters for sequence design. For every C<set> routine, there 198exists the equivalent C<get> routine. 199 200=cut 201 202=head3 set_verbosity(<INT>) 203 204Set the verboisty of warnings to STDERR. 205 206=cut 207 208sub set_verbosity { 209 my ($self, $var) = @_; 210 $self->{verb}=$var; 211 return $self->{verb}; 212} 213 214sub get_verbosity { 215 my $self = shift; 216 return $self->{verb}; 217} 218 219=head3 set_optfunc(<STRING>) 220 221Set the cost-function for sequence optimization. 222 223=cut 224 225sub set_optfunc { 226 my ($self, $var) = @_; 227 $self->{optfunc} = $var; 228 return $self->{optfunc}; 229} 230 231sub get_optfunc { 232 my $self = shift; 233 return $self->{optfunc}; 234} 235 236=head3 set_constraint(<STRING>) 237 238Set the sequence constraints for sequence optimization. 239 240=cut 241 242sub set_constraint { 243 my ($self, $var) = @_; 244 $self->{constraint} = $var; 245 return $self->{constraint}; 246} 247 248sub get_constraint { 249 my $self = shift; 250 return $self->{constraint}; 251} 252 253=head3 set_score_motifs(<HASH>) 254 255Set a list of sequence motifs that will recieve a penalty (postive number) or 256bonus (negative number) during optimization. 257 258=cut 259 260sub set_score_motifs { 261 my ($self, $var) = @_; 262 $self->{score_motifs} = $var; 263 return $self->{score_motifs}; 264} 265 266sub get_score_motifs { 267 my $self = shift; 268 return $self->{score_motifs}; 269} 270 271=head3 set_base_probs(<HASH>) 272 273Specify the distribution of nucleotides in the target sequence. 274 275=cut 276 277sub set_base_probs { 278 my ($self, $var) = @_; 279 $self->{base_probs} = $var; 280 return $self->{base_probs}; 281} 282 283sub get_base_probs { 284 my $self = shift; 285 return $self->{base_probs}; 286} 287 288=head3 set_parameter_file(<STRING>) 289 290Set a parameter file other than rna_turner2004.par. 291 292=cut 293 294sub set_parameter_file { 295 my ($self, $var) = @_; 296 $self->{paramfile} = $var; 297 return $self->{paramfile}; 298} 299 300sub get_parameter_file { 301 my $self = shift; 302 return $self->{paramfile}; 303} 304 305=head3 set_structures(<ARRAY>) 306 307Set the list of structures for sequence optimization. 308 309=cut 310 311sub set_structures { 312 my $self = shift; 313 my @structs = @_; 314 if ($self->{verb}) { 315 foreach (@structs) { 316 croak "only dot-bracket strings allowed in add_structures()" if m/[^\(\)\.]/g; 317 } 318 } 319 $self->{structures} = [@structs]; 320 return $self->{structures}; 321} 322 323sub add_structures { 324 my $self = shift; 325 foreach (@_) { 326 if ($self->{verb} && m/[^\(\)\.]/g) { 327 croak "only dot-bracket strings allowed in add_structures()"; 328 } 329 push @{$self->{structures}}, $_; 330 } 331 return $self->{structures}; 332} 333 334sub get_structures { 335 my $self = shift; 336 return $self->{structures}; 337} 338 339=head3 set_cut_point(<INT>) 340 341Set a cut_point when designing two interacting molecules. The cut point 342indicates the first nucleotide of the second sequence. 343 344=cut 345 346sub set_cut_point { 347 my ($self, $var) = @_; 348 if ($self->{verb} && $self->{cut_point} != -1 && $self->{cut_point} != $var) { 349 carp "overwriting old cut_point"; 350 } 351 $self->{cut_point} = $var; 352 return $self->{cut_point}; 353} 354 355sub get_cut_point { 356 my $self = shift; 357 return $self->{cut_point}; 358} 359 360=head3 set_findpath_bound(<INT>) 361 362Set the upper bound for findpath when using B<barr(i,j,t)> in the objective function. 363 364=cut 365 366sub set_findpath_bound { 367 my ($self, $var) = @_; 368 $self->{findpath} = $var; 369 return $self->{findpath}; 370} 371 372sub get_findpath_bound { 373 my $self = shift; 374 return $self->{findpath}; 375} 376 377=head3 set_base_penalty(<INT>) 378 379Set a weighting factor to correct for desired base percentage. 380 381=cut 382 383sub set_base_penalty { 384 my ($self, $var) = @_; 385 $self->{base_pen} = $var; 386 return $self->{base_pen}; 387} 388 389sub get_base_penalty { 390 my $self = shift; 391 return $self->{base_pen}; 392} 393 394 395=head3 get_num_of_seqs() 396 397Set the number of sequences after calling explore_sequence_space(). 398 399=cut 400 401sub get_num_of_seqs { 402 my $self = shift; 403 return $self->{nos}; 404} 405 406=head3 get_num_of_nbors() 407 408Get the number of neighbors of a sequence after calling explore_sequence_space(). 409 410=cut 411 412sub get_num_of_nbors { 413 my $self = shift; 414 return $self->{border}; 415} 416 417 418=head3 get_fibo(<INT>) 419 420Get the fibronaccy number at position <INT>. The list starts with [0,1] at 421positions 0,1. Note that there is no C<set> routine for fibonacci. 422 423=cut 424 425sub get_fibo { 426 my ($self, $var) = @_; 427 $self->fill_fibo($var) unless defined $self->{fibo}[$var]; 428 return $self->{fibo}[$var]; 429} 430 431sub fill_fibo { 432 my ($self, $var) = @_; 433 while ($#{$self->{fibo}} < $var) { 434 push @{$self->{fibo}}, $self->{fibo}->[-1] + $self->{fibo}->[-2]; 435 } 436 return $self->{fibo}; 437} 438 439=head2 find_dependency_paths(@structures) 440 441If @structures is empty, the mutation dependency graph is constructed from all 442structures added with the set_structures() routine. Specify @structures only if 443you do not want all of your structures for the cost-function to be part of the 444depencency graph. 445 446=cut 447 448sub find_dependency_paths { 449 my $self = shift; 450 my @structures = @_; 451 my (@pts, @seen); 452 453 if ($self->{plist}) { 454 #carp "overwriting old dependency-path!\n"; 455 $self->{plist}=[]; 456 } 457 458 @structures = @{$self->{structures}} unless @structures; 459 croak "no structure input found" unless @structures; 460 461 my @check; 462 for (my $s=0; $s<@structures; ++$s) { 463 my $struct = $structures[$s]; 464 465 # TODO: discard if too short! 466 next if $struct !~ m/[\(\)\[\]\.]/; 467 468 # change '[,]' to '(,)' 469 if ($struct =~ m/[\[]/) { 470 if ($struct =~ m/[\)]/) { 471 die "do not mix two different bracket types in one structure!\n"; 472 } else { 473 $struct =~ s/\[/\(/g; 474 $struct =~ s/\]/\)/g; 475 } 476 } 477 478 # add, but warn if base-pairs are not compatible 479 my $pt = RNA::Utils::make_pair_table($struct); 480 for my $i (1 .. @$pt) { 481 next unless $$pt[$i]; # next if unpaired 482 my $j=$$pt[$i]; 483 if (defined $check[$i]) { 484 my $new=1; 485 foreach (@{$check[$i]}) { 486 if ($_ == $j) { 487 $new=0; 488 $$pt[$i]=$$pt[$j]=0; 489 } 490 } 491 if ($new && @{$check[$i]} > 1) { 492 carp "ignoring basepair $i - $j from structure ".($s+1); 493 $$pt[$i]=$$pt[$j]=0; 494 } elsif ($new) { 495 push @{$check[$i]}, $j; 496 } 497 } else { 498 push @{$check[$i]}, $j; 499 } 500 } 501 push @pts, $pt; 502 } 503 504 # Construct a list of depencency-pathways (ZERO-Based) 505 for my $i (1..$#{$pts[0]}) { 506 next if $seen[$i]; 507 508 # initialize path with $i 509 my @path = ($i-1); 510 $seen[$i] = 1; 511 512 my $fw = 0; 513 my ($j,$cpt); 514 515 # iterate over every pairtable at pos j 516 for my $pt (0 .. $#pts) { 517 $j = $pts[$pt][$i]; 518 $cpt= $pt; 519 520 $fw = !$fw if $j && !$seen[$j]; 521 # found a base-pair? go for it 522 while ($j && !$seen[$j]) { 523 if ($fw) { 524 push @path, $j-1; 525 } else { 526 unshift @path, $j-1; 527 } 528 $seen[$j]=1; 529 my $oj=$j; 530 for my $npt (0 .. $#pts) { 531 next if $npt == $cpt; 532 if ($pts[$npt]->[$j] > 0) { 533 $j = $pts[$npt]->[$j]; 534 $cpt= $npt; 535 last; 536 } 537 } 538 $j = ($oj==$j) ? 0 : $j; 539 } # while $j and !seen 540 } 541 542 # duplicate element if we have a cycle 543 if ($j) { 544 croak "messed up cycle!" unless $j-1 == $path[-1]; 545 unshift @path, $j-1 unless @path == 2; # 2 => duplicate basepair 546 } 547 548 push @{$self->{plist}}, [@path]; 549 } 550 return $self->{plist}; 551} 552 553=head2 explore_sequence_space() 554 555This function initializes internal paramters for subsequent sequence 556optimization. It reads the previously computed dependency graph to 557 558=over 559 560=item 1. 561 562update and check the sequence constraint 563 564=item 2. 565 566caclulate the total number of sequences compatible with all constraints 567 568=item 3. 569 570set a stop-condition for optimization from the number of possible mutations 571 572=item 4. 573 574store the number of possible solutions per depencendy-path for subsequent fair 575sampling. 576 577=back 578 579=cut 580 581sub explore_sequence_space { 582 my $self = shift; 583 my $verb = 0; 584 585 my @plist =@{$self->{plist}}; 586 my $con = $self->{constraint}; 587 my %iupac =%{$self->{iupac}}; 588 589 croak "need to compute dependency pathways first!" unless @plist; 590 croak "need constraint information!" unless $con; 591 592 my ($border, $max_len, $nos) = (0,0,1); 593 my @slim_plist; 594 my @rand_plist = (0); 595 foreach my $path (@plist) { 596 my @pseq=(); 597 my $cycle=0; 598 my $num; 599 600 # Let's do the simple thingy separate 601 if (@$path == 1) { 602 # Get constraint 603 $pseq[0] = substr($con, $path->[0], 1); 604 605 # Statistics (bos, border) 606 $num = length $iupac{$pseq[0]}; 607 } else { 608 $cycle=1 if ($path->[0] eq $path->[-1]); 609 610 # Translate path into nucleotide constraints 611 my $l = ($cycle) ? $#$path-1 : $#$path; 612 push @pseq, substr($con, $$path[$_], 1) for (0 .. $l); 613 614 # Update Constraint 615 @pseq = $self->update_constraint($cycle, @pseq); 616 617 # Enumerate all possible pathways 618 $num = $self->enumerate_pathways($cycle, @pseq); 619 } 620 621 if ($num > 1) { 622 push @slim_plist, $path;# for (1 .. $num); 623 push @rand_plist, $rand_plist[-1]+($num-1); 624 $border += $num; 625 $nos *= $num; 626 } 627 628 #print "@$path => @pseq | Num: $num\n" if 1; 629 substr($con, $path->[$_], 1, $pseq[$_]) for (0 .. $#pseq); 630 } 631 shift @rand_plist; 632 #print "@rand_plist\n"; 633 #print "SLP: ".@slim_plist." => $border\n"; 634 #print "$rand_plist[$_] => @{$slim_plist[$_]}\n" foreach (0 .. $#slim_plist); 635 #print "@{$slim_plist[$_]}\n" foreach (0 .. $#slim_plist); 636 637 print "$nos Number of sequences, $border neighbors for each sequence.\n" if $self->{verb}; 638 639 $self->{plist}=\@slim_plist; 640 $self->{rlist}=\@rand_plist; 641 $self->{constraint}=$con; 642 $self->{border}=$border unless $self->{border}; 643 $self->{nos}=$nos; 644 return ($con, $border, $nos); 645} 646 647=head3 update_constraint($is_cycle, @path) 648 649Rewrite the constraint for a particular dependency path, such that starting an 650arbitrary position would result in a correct path: "NNNRNNN" => "YRYRYRY"; 651 652=cut 653 654sub update_constraint { 655 my $self = shift; 656 my $cycle = shift; 657 my @pseq = @_; 658 659 croak "cycle of uneven length!" if $cycle && @pseq % 2; 660 661 my $jailbreak=0; 662 while (1) { 663 my $mutated = 0; 664 my $i; 665 if ($cycle) { 666 for $i (-$#pseq .. $#pseq-1) { 667 next if $pseq[$i] eq 'N'; 668 $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i+1]); 669 } 670 for $i (reverse(-$#pseq .. $#pseq)) { 671 next if $pseq[$i] eq 'N'; 672 $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i-1]); 673 } 674 } else { 675 for $i (0 .. $#pseq-1) { 676 next if $pseq[$i] eq 'N'; 677 $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i+1]); 678 } 679 for $i (reverse(1 .. $#pseq)) { 680 next if $pseq[$i] eq 'N'; 681 $mutated = 1 if $self->rewrite_neighbor($pseq[$i], \$pseq[$i-1]); 682 } 683 } 684 last unless $mutated; 685 croak "escaping from seemingly endless constraint updates" if $jailbreak++ > 100; 686 } 687 return @pseq; 688} 689 690=head3 enumerate_pathways($is_cycle, @path) 691 692For a given depencendy path, calculate the number of compatible sequences, and 693initialze data-structures for C<make_pathseq()>. If it is an unconstrained 694path, use the fibronacci numbers to enumerate pathways. If it is a constrained 695path, exhaustivley count and store the solutions in a tree structure. If the 696path is constrained and longer than C<max_const_plen>, estimate the number of 697pathways with the fibronacci numbers. 698 699=cut 700 701sub enumerate_pathways { 702 my $self = shift; 703 my $cycle= shift; 704 my @pseq = @_; 705 706 my $pstr = join '', @pseq; 707 my $plen = length $pstr; 708 709 croak "cycles must have even length" if $cycle && $plen%2; 710 #croak "first call update_constraint(), then enumerate_pathways()!" if ($pstr =~ m/[N]/) && ($pstr =~ m/[^N]/); 711 712 my %solutions = %{$self->{solution_space}}; 713 my $max_plen = $self->{max_const_plen}; 714 my %iupac = %{$self->{iupac}}; 715 my %base = %{$self->{base}}; 716 my @pair = @{$self->{pair}}; 717 718 if (exists $solutions{$pstr.$cycle}) { 719 return scalar($solutions{$pstr.$cycle}->get_leaves); 720 } elsif ($pstr =~ m/[N]/ && $pstr !~ m/[^N]/) { 721 # its a path with all N's => (fibronacci: switch.pl) 722 my $l = ($cycle) ? $plen-1 : $plen; 723 return 2*($self->get_fibo($plen+1) + $self->get_fibo($l)); 724 } 725 # enable this as soon as fair sampling in this case works! 726 # elsif ($pstr !~ m/[^RY]/g) { 727 # # its a path with all RY's 728 # my $l = ($cycle) ? $plen-1 : $plen; 729 # return ($self->get_fibo($plen+1) + $self->get_fibo($l)); 730 # } 731 # 732 elsif ($plen > $max_plen) { 733 # path too long to enumerate with constraints 734 carp "constrained path too long, fallback to greedy heuristic!"; 735 my $l = ($cycle) ? $plen-1 : $plen; 736 return ($self->get_fibo($plen+1) + $self->get_fibo($l)); 737 # calculate RYRYR as NNNN/2 and use greedy-shuffle 738 739 # compute the theoretical sequence length (n) to construct an 740 # example that breaks plen: n = (plen-1)*4 + 1 741 # so for plen=26 => n=101 742 # if we assume a helix has 4 base-pairs, it is: 743 # n = (plen-1)*12 + 1 = 301 744 } 745 746 my $tree = RNA::Design::Tree->new(); 747 my @le = $tree->get_leaves; 748 $tree->init_new_leaves; 749 foreach (split //, $iupac{$pseq[0]}) { 750 $tree->push_to_current_leaves($_, $le[0]); 751 } 752 753 for my $i (1 .. $#pseq) { 754 my @choices = split //, $iupac{$pseq[$i]}; 755 my @leaves = $tree->get_leaves; 756 $tree->init_new_leaves; 757 758 if ($i == $#pseq && $cycle) { 759 foreach my $l (@leaves) { 760 foreach my $c (@choices) { 761 if ($pair[$base{$$l[0]}][$base{$c}]) { 762 # check if $c also pairs with anything in the first row! 763 my $string = $tree->get_full_path($l); 764 my $f = substr $string, 0, 1; 765 if ($pair[$base{$c}][$base{$f}]) { 766 $tree->push_to_current_leaves($c, $l); 767 } 768 769 } 770 } 771 } 772 } else { 773 foreach my $c (@choices) { 774 foreach my $l (@leaves) { 775 if ($pair[$base{$$l[0]}][$base{$c}]) { 776 $tree->push_to_current_leaves($c, $l); 777 } 778 } 779 } 780 } 781 } 782 783 #DEBUG: 784 #print $pstr.$cycle."\n"; 785 #foreach ($tree->get_leaves) { 786 # print $tree->get_full_path($_)."\n"; 787 #} 788 789 $self->{solution_space}->{$pstr.$cycle} = $tree; 790 return scalar($tree->get_leaves); 791} 792 793=head3 rewrite_neighbor($iupac, \$iupac) 794 795Takes a IUPAC letter and a reference to a base-pairing IUPAC letter. Updates 796the IUPAC code if necessary and returns 0 or 1 if there exists a neighbor or 797not. 798 799=cut 800 801sub rewrite_neighbor { 802 my ($self, $c1, $c2) = @_; 803 print "$c1 -> $$c2\n" if 0; 804 805 my $nb = $self->{neighbor}->{$c1}; 806 if ($nb eq $$c2) { 807 return 0; 808 } else { 809 my $bin_c2 = $self->{iupac_bin}{$$c2}; 810 my $bin_nb = $self->{iupac_bin}{$nb}; 811 812 my $new_nb = $self->{bin_iupac}[($bin_c2 & $bin_nb)]; 813 croak "sequence constraints cannot be fulfilled\n" unless $new_nb; 814 815 return 0 if $new_nb eq $$c2; 816 817 $$c2 = $new_nb; 818 return 1; 819 } 820} 821 822=head2 find_a_sequence() 823 824Uses the dependency graph and the sequence constraint to make a random sequence. 825 826=cut 827 828sub find_a_sequence { 829 # make random sequence or randomly mutate sequence 830 # by replacing all @plists by random paths 831 my $self = shift; 832 833 my @plist=@{$self->{plist}}; 834 my $con = $self->{constraint}; 835 my $seq = $con; 836 837 foreach my $path (@plist) { 838 my @pseq = (); 839 my $cycle=0; 840 841 $cycle = 1 if ($$path[0] eq $$path[-1] && (@$path>1)); 842 843 # Translate path into nucleotide constraints 844 my $l = ($cycle) ? $#$path-1 : $#$path; 845 push @pseq, substr($con, $$path[$_], 1) for (0 .. $l); 846 847 @pseq = $self->make_pathseq($cycle, @pseq); 848 849 substr($seq, $$path[$_], 1, $pseq[$_]) for (0 .. $#pseq); 850 } 851 return $seq; 852} 853 854=head2 optimize_sequence(sequence, maximum_number_of_mutations) 855 856The standard optimization function. Whenever a sequence mutation results in a 857better score from the cost-function, replaces the current solution. In case 858there are too many useless mutations, (see explore_sequence_space()), the 859current sequence is considered local-optimal and returned. 860 861=cut 862 863sub optimize_sequence { 864 my $self = shift; 865 my $refseq= shift; 866 my $m = shift; 867 868 my $verb = $self->{verb}; 869 my $border = $self->{border}; 870 my $refcost = $self->eval_sequence($refseq); 871 my ($mutseq,$newcost); 872 873 my $reject = 0; 874 my %seen = (); 875 for my $d (1..$m) { 876 $mutseq = $self->mutate_seq($refseq); 877 if (!exists $seen{$mutseq}) { 878 $seen{$mutseq}=1; 879 $newcost = $self->eval_sequence($mutseq); 880 if ($newcost < $refcost) { 881 $refseq = $mutseq; 882 $refcost= $newcost; 883 $reject = 0; 884 printf STDERR "%4d %s %6.3f\n", $d, $mutseq, $newcost if $verb; 885 } 886 } 887 else { 888 $seen{$mutseq}++; 889 last if (++$reject >= 2*$border); 890 } 891 } 892 return $refseq; 893} 894 895=head3 mutate_seq($sequence) 896 897Choose a random depencency path, mutate it using make_pathseq() of the sequence 898constraint. Choses randomly according to the number of solutions, but can be 899implemented more efficiently in C<log(n)> time! 900 901=cut 902 903sub mutate_seq { 904 my $self = shift; 905 my $seq = shift; 906 907 my $con = $self->{constraint}; 908 my @plist = @{$self->{plist}}; 909 my @rlist = @{$self->{rlist}}; 910 911 my $reject_same_solution=1; 912 913 my ($path, @pseq, @ref_pseq); 914 my $cycle = 0; 915 916 # chose a random path => weight by number of solutions to make it fair! 917 return $seq unless defined $rlist[-1]; 918 my $rand = (int rand $rlist[-1]) + 1; 919 for (my $i=0; $i<=$#plist; ++$i) { 920 if ($rand <= $rlist[$i]) { 921 $path = \@{$plist[$i]}; 922 last; 923 } 924 } 925 926 # this would be random independent of the pathlength! 927 #$path = $plist[int rand @plist]; 928 #print "@$path\n"; 929 930 $cycle = 1 if ($$path[0] eq $$path[-1] && (@$path>1)); 931 932 my $l = ($cycle) ? $#$path-1 : $#$path; 933 push @pseq, substr($con, $$path[$_], 1) for (0 .. $l); 934 935 if ($reject_same_solution) { 936 push @ref_pseq, substr($seq, $$path[$_], 1) for (0 .. $l); 937 my @npseq; 938 my $jailbreak = 0; 939 do { 940 @npseq = $self->make_pathseq($cycle, @pseq); 941 last if ++$jailbreak > 100; 942 } while (join('', @ref_pseq) eq join('', @npseq)); 943 @pseq = @npseq; 944 } else { 945 @pseq = $self->make_pathseq($cycle, @pseq); 946 } 947 948 substr($seq, $$path[$_], 1, $pseq[$_]) for (0 .. $#pseq); 949 return $seq; 950} 951 952=head3 make_pathseq($is_cycle, @path) 953 954Takes an Array of IUPAC code and rewrites it into a valid array of Nucleotides. 955There are four cases: (i) path of length 1 is a randomly shuffled nucleotide 956accoriding to IUPAC-letter, (ii) The solution-tree has been built during 957explore_sequence_space() and so we use it to sample, (iii) the sequence is only 958N's, so the fibronacci numbers are used for sampling (i.e. the switch.pl 959method), and (iv) the path is constrained and longer than C<max_const_plen>, so 960the paths are sampled using a greedy heuristic. 961 962=cut 963 964sub make_pathseq { 965 my $self = shift; 966 my $cycle= shift; 967 my @pseq = @_; 968 969 my $pstr = join '', @pseq; 970 my $plen = length $pstr; 971 972 croak "cycles must have even length" if $cycle && $plen%2; 973 974 my %iupac = %{$self->{iupac}}; 975 my %iupac_bin = %{$self->{iupac_bin}}; 976 my %solutions = %{$self->{solution_space}}; 977 my $max_plen = $self->{max_const_plen}; 978 979 if ($plen == 1) { 980 # if path length 1 => return random iupac 981 my @i = split '', $iupac{$pstr}; 982 return ($i[int rand @i]); 983 } elsif (exists $solutions{$pstr.$cycle}) { 984 my $tree = $solutions{$pstr.$cycle}; 985 my @i = $tree->get_leaves; 986 return (split '', $tree->get_full_path($i[int rand @i])); 987 } elsif ($pstr !~ m/[^N]/g) { 988 # its a path with all N's => do it like switch.pl 989 990 #TODO: need to implement this method for RY-pathways as well 991 992 # This is original switch.pl code that I don't understand! 993 my $l = $plen; 994 my $ll = ($cycle) ? $l-1 : $l; 995 my $n = 2*($self->get_fibo($l+1) + $self->get_fibo($ll)); 996 997 my $rand = rand($n); 998 999 my @seq = (); 1000 # set first base in sequence 1001 if ($rand < $self->get_fibo($ll)) { 1002 push @seq, 'A', 'U'; 1003 } elsif ($rand < 2*$self->get_fibo($ll)) { 1004 push @seq, 'C', 'G'; 1005 $rand -= $self->get_fibo($ll); 1006 } else { 1007 $rand -= 2*$self->get_fibo($ll); 1008 $ll=$l; 1009 push @seq, ($rand>=$self->get_fibo($l+1))?'U':'G'; 1010 $rand -= $self->get_fibo($l+1) if $rand >= $self->get_fibo($l+1); 1011 } 1012 1013 # grow sequence to length $l 1014 # if we have a cycle starting with A or C $ll=$l-1, else $ll=$l 1015 while (@seq < $l) { 1016 if ($rand < $self->get_fibo($ll-@seq)) { 1017 push @seq, 'C','G' if ($seq[-1] eq 'G'); 1018 push @seq, 'A','U' if ($seq[-1] eq 'U'); 1019 } else { 1020 $rand -= $self->get_fibo($ll-@seq); 1021 push @seq, ($seq[-1] eq 'G') ? 'U' : 'G'; 1022 } 1023 } 1024 pop @seq if (@seq > $l); # in case we've added one base too many 1025 return @seq; 1026 } else { 1027 # do a greedy constraint shuffling! 1028 croak "some special case not covered yet! (@pseq)" unless $plen > $max_plen; 1029 1030 my @path = @pseq; 1031 for (my $i=0; $i<=$#path; ++$i) { 1032 my $c = $path[$i]; 1033 my @i = split '', $iupac{$c}; 1034 1035 $path[$i] = $i[int rand @i]; 1036 1037 if ($i==0 && $cycle) { 1038 my ($a, $j) = ($path[$i], -1); 1039 $a=$path[$j--] while $self->rewrite_neighbor($a, \$path[$j]); 1040 } 1041 1042 $self->rewrite_neighbor($path[$i], \$path[$i+1]) if $i < $#path; 1043 } 1044 return @path; 1045 } 1046 return; 1047} 1048 1049=head2 eval_sequence($sequence) 1050 1051Evaluate a given sequence according to the cost function. The final score is 1052calculated as the sum over three values: 1053 1054(1) The objective function is a simplified interface to access functions of the 1055*ViennaRNA package*. Every input secondary structure *can* serve as full target 1056conformation or structure constraint. The objective function can include terms 1057to compute the free energy of a target structure, the (constrained) ensemble 1058free energy, the (conditional) probabilities of secondary structure elements, 1059the accessibility of subsequences and the direct-path barriers between two 1060structures. All of these terms exist for linear, circular, and cofolded 1061molecules, as well as for custom specified temperatures. In the following 1062examples, the indices B<i, j> correspond to the secondary structures specified 1063in the input file, B<t> is optional to specify the temperature in Celsius. By 1064default, computations use the standard temperature of 37C. 1065 1066 1067 10682) score_motifs(): A set of sequence motifs that receive an extra penalty. 1069Whenever one of these motifs is found in a sequence, its penalty (or bonus) is 1070added to the score of the objective function. 1071 10723) base_prob(): A vector with nuleotide distributions is compared with a 1073target distribution vector. The similarity between the vectors is expressed 1074as a value s=[0,1]. In order to include and weight this similarity in the 1075final score, the penalty is calculated as (1-s)*k, where k is a constant 1076adjustable using set_base_penalty() 1077 1078=cut 1079 1080sub eval_sequence { 1081 my $self = shift; 1082 my $seq = shift; 1083 1084 my $verb = $self->{verb}; 1085 my $ofun = $self->{optfunc}; 1086 my $ParamFile = $self->{paramfile}; 1087 warn "$seq\n".$ofun."\n" if $verb > 1; 1088 croak "no structures specified!\n" unless @{$self->{structures}}; 1089 1090 my %results; 1091 $RNA::fold_constrained=1; 1092 $RNA::cut_point=$self->{cut_point}; 1093 RNA::read_parameter_file($ParamFile) if ($ParamFile); 1094 foreach my $func (qw/eos eos_circ efe efe_circ prob prob_circ barr acc/) { 1095 while ($ofun =~ m/$func\(([0-9\,\s]*)\)/) { 1096 warn "next: $&\n" if $verb > 1; 1097 1098 if (exists $results{$&}) { 1099 #print "OR: $& => $results{$&}\n"; 1100 } else { 1101 my $res = eval("\$self->$func(\$seq, $1)"); 1102 $res = sprintf "%.2f", $res; 1103 croak $@ if $@; 1104 #print "NR: $& => $res\n"; 1105 $results{$&}=$res; 1106 } 1107 $ofun =~ s/$func\(([0-9\,\s]*)\)/ $results{$&} /; 1108 } 1109 } 1110 $RNA::cut_point=-1; 1111 $RNA::fold_constrained=0; 1112 1113 warn $ofun."\n" if $verb > 1; 1114 my $r = eval $ofun; 1115 croak $@ if $@; 1116 1117 my $bpen = $self->{base_pen}; 1118 1119 my $p = ($bpen) ? $self->base_prob($seq)*$bpen : 0; 1120 my $s = $self->score_motifs($seq); 1121 #print "$r, $p, $s, \n"; 1122 1123 return $r+$p+$s; 1124} 1125 1126sub score_motifs { 1127 my ($self, $seq) = @_; 1128 my $penalty = 0; 1129 1130 my %motifs = %{$self->{score_motifs}}; 1131 my $cpnt = $self->{cut_point}; 1132 1133 if ($cpnt == -1) { 1134 foreach my $string (keys %motifs) { 1135 $penalty += $motifs{$string} while ($seq =~ m/$string/g); 1136 } 1137 } else { 1138 my $left = substr $seq, 0, $cpnt-1; 1139 my $right = substr $seq, $cpnt-1; 1140 #print "$left&$right\n"; 1141 foreach my $string (keys %motifs) { 1142 $penalty += $motifs{$string} while ($left =~ m/$string/g); 1143 $penalty += $motifs{$string} while ($right =~ m/$string/g); 1144 } 1145 } 1146 return $penalty; 1147} 1148 1149sub base_prob { 1150 my ($self, $seq) = @_; 1151 1152 my %prob = %{$self->{base_probs}}; 1153 my %base; 1154 $base{$_}++ foreach (split //, $seq); 1155 1156 my $cost=1; 1157 foreach my $k (keys %base) { 1158 $base{$k} /= length $seq; 1159 # print "Dist $k $base{$k} vs $prob{$k} => \n"; 1160 1161 #$cost += (abs($base{$k}-$prob{$k})/$prob{$k}); 1162 $cost -= sqrt($base{$k}*$prob{$k}); 1163 1164 #print "$base{$k}, $prob{$k}, $cost\n"; 1165 } 1166 return $cost; 1167} 1168 1169# sub ensemble_defect { 1170# my ($self, $seq, $i, $t) = @_; 1171# my $probs = get_base_pair_probs(); 1172# 1173# } 1174# 1175# sub cofold_defect2 { 1176# my ($self, $seq) = @_; 1177# 1178# my $cpnt = $self->{cut_point}; 1179# my $left = substr $seq, 0, $cpnt-1; 1180# my $right = substr $seq, $cpnt-1; 1181# 1182# $RNA::cut_point = $cpnt = length($right)+1; 1183# my ($costruct, $comfe) = RNA::cofold($right.$right); 1184# substr $costruct, $cpnt-1,0,'&'; 1185# $RNA::cut_point = -1; 1186# #print "$costruct, $comfe:\n"; 1187# return 1 if ($comfe >= 0); 1188# 1189# my @chars = split '', $costruct; 1190# my @stack; 1191# my $DIM=0; 1192# for my $i (0 .. $#chars) { 1193# if ($chars[$i] eq '&') { 1194# $DIM = (@stack) ? 1 : 0; 1195# # got two monomers 1196# last; 1197# } elsif ($chars[$i] eq '.') { 1198# next; 1199# } elsif ($chars[$i] eq '(') { 1200# push @stack, ($i); 1201# } elsif ($chars[$i] eq ')') { 1202# my $j = pop @stack; 1203# if ($j < $cpnt && $cpnt < ($i)) { 1204# $DIM=1; last; 1205# } 1206# } 1207# } 1208# return ($DIM) ? -$comfe : 1; 1209# } 1210 1211=head2 objective function tools 1212 1213Subfunctions that can be specified in the objective function. 1214 1215=cut 1216 1217=head3 barr(i, j, t) 1218 1219direct path energy barrier from input structure number i to input structure 1220number j computed using findpath. The upper bound of findpath is adjustable 1221with set_findpath_bound() 1222 1223=cut 1224 1225sub barr { 1226 my ($self, $seq, $i, $j, $t) = @_; 1227 $t = 37 unless defined $t; 1228 $RNA::temperature=$t; 1229 1230 #TODO: make set_routine 1231 my $fpd = $self->{findpath}; 1232 1233 croak "cannot find structure number $i" if $i && !$self->{structures}[$i-1]; 1234 croak "cannot find structure number $j" if $i && !$self->{structures}[$j-1]; 1235 1236 my $s_i = ($i) ? $self->{structures}[$i-1] : undef; 1237 my $s_j = ($j) ? $self->{structures}[$j-1] : undef; 1238 1239 my $e_i = sprintf("%.2f", RNA::energy_of_structure($seq, $s_i, 0)); 1240 my $sE = RNA::find_saddle($seq, $s_i, $s_j, $fpd); 1241 1242 return sprintf("%.2f", ($sE/100)-$e_i); 1243} 1244 1245=head3 prob(i, j, t) 1246 1247Probability of input structure number B<i> given input structure number B<j>. 1248The probability is computed from the equilibrium partition functions: 1249B<Pr(i|j)=Z_i/Z_j>. Hence, the constraint B<i> B<must> include the constraint 1250of B<j> B<(Pr(i|j)=Z_i+j/Z_j)>. Omitting B<j>, or specifying B<j=0> computes the 1251probability of B<i> in the unconstrained ensemble B<Pr(i)=Z_i/Z>. 1252 1253=cut 1254 1255sub prob { 1256 my ($self, $seq, $i, $j, $t) = @_; 1257 $t = 37 unless defined $t; 1258 $RNA::temperature=$t; 1259 my $kT=0.6163207755; 1260 if ($t != 37) { 1261 $kT /= 310.15; 1262 $kT *= ($t+273.15); 1263 } 1264 1265 croak "prob(): no structure input" unless $i; 1266 croak "prob(): cannot find structure number $i" if !$self->{structures}[$i-1]; 1267 croak "prob(): cannot find structure number $j" if $j && !$self->{structures}[$j-1]; 1268 1269 my $s_i = $self->{structures}[$i-1]; 1270 my $s_j = ($j) ? $self->{structures}[$j-1] : undef; 1271 1272 # A hack to make sure people do not fuck up their probability calculations 1273 # if ($s_j) { 1274 # print $s_i."\n"; 1275 # my $c=0; 1276 # foreach (split //, $s_i) { 1277 # if ($_ ne '.') { 1278 # print "$c, $_ vs ".substr($s_j, $c, 1)."\n"; 1279 # substr($s_j, $c, 1, $_); 1280 # } 1281 # $c++; 1282 # } 1283 # } 1284 1285 my ($dGi, $dGj, $tmp); 1286 if ($RNA::cut_point == -1) { 1287 # seemingly useless string modification 1288 # to avoid ViennaRNA SWIG interface bugs 1289 $tmp=$s_i; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); } 1290 $dGi = RNA::pf_fold($seq, $tmp); 1291 $tmp=$s_j; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); } 1292 $dGj = RNA::pf_fold($seq, $tmp); 1293 } else { 1294 $tmp=$s_i; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); } 1295 $dGi = RNA::co_pf_fold($seq, $tmp); 1296 $tmp=$s_j; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); } 1297 $dGj = RNA::co_pf_fold($seq, $tmp); 1298 } 1299 1300 return exp(($dGj-$dGi)/$kT); 1301} 1302 1303=head3 prob_circ(i, j, t) 1304 1305prob(i,j,t) for circular molecules. 1306 1307=cut 1308 1309sub prob_circ { 1310 my ($self, $seq, $i, $j, $t) = @_; 1311 $t = 37 unless defined $t; 1312 $RNA::temperature=$t; 1313 my $kT=0.6163207755; 1314 if ($t != 37) { 1315 $kT /= 310.15; 1316 $kT *= ($t+273.15); 1317 } 1318 1319 croak "prob_circ(): no structure input" unless $i; 1320 croak "prob_circ(): cannot find structure number $i" if !$self->{structures}[$i-1]; 1321 croak "prob_circ(): cannot find structure number $j" if $j && !$self->{structures}[$j-1]; 1322 carp "prob_circ(): ignoring cut_point" if ($RNA::cut_point != -1); 1323 1324 my $s_i = $self->{structures}[$i-1]; 1325 my $s_j = ($j) ? $self->{structures}[$j-1] : undef; 1326 1327 my ($dGi, $dGj, $tmp); 1328 # seemingly useless string modification 1329 # to avoid ViennaRNA SWIG interface bugs 1330 $tmp=$s_i; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); } 1331 $dGi = RNA::pf_circ_fold($seq, $tmp); 1332 $tmp=$s_j; if ($tmp) { $tmp.='.'; $tmp=substr($tmp,0,-1); } 1333 $dGj = RNA::pf_circ_fold($seq, $tmp); 1334 1335 return exp(($dGj-$dGi)/$kT); 1336} 1337 1338=head3 acc(i, j, t) 1339 1340Accessibility of an RNA/DNA motif. This function is exactly the same as 1341prob(i,j,t), however, it is ment to be used with constraints that use the 1342character 'x' to specify strictly unpaired regions. 1343 1344=cut 1345 1346sub acc { 1347 return prob(@_); 1348} 1349 1350=head3 acc_circ(i, j, t) 1351 1352acc(i,j,t) for circular molecules. 1353 1354=cut 1355 1356sub acc_circ { 1357 return prob_circ(@_); 1358} 1359 1360=head3 eos(i, t) 1361 1362Free energy of input structure number B<i> at temperature B<t>. 1363 1364=cut 1365 1366sub eos { 1367 my ($self, $seq, $i, $t) = @_; 1368 $t = 37 unless defined $t; 1369 $RNA::temperature=$t; 1370 croak "eos(): no structure input" unless $i; 1371 croak "eos(): cannot find structure number $i" unless $self->{structures}[$i-1]; 1372 my $str = $self->{structures}[$i-1]; 1373 # ($str =~ m/[\(\)]/) ? k 1374 return RNA::energy_of_struct($seq, $str); 1375} 1376 1377=head3 eos_circ(i, t) 1378 1379Free energy of the circular input structure number B<i> at temperature B<t>. 1380 1381=cut 1382 1383sub eos_circ { 1384 my ($self, $seq, $i, $t) = @_; 1385 $t = 37 unless defined $t; 1386 $RNA::temperature=$t; 1387 croak "eos_circ(): no structure input" unless $i; 1388 croak "eos_circ(): cannot find structure number $i" unless $self->{structures}[$i-1]; 1389 carp "eos_circ(): ignoring cut_point" if ($RNA::cut_point != -1); 1390 my $str = $self->{structures}[$i-1]; 1391 return RNA::energy_of_circ_struct($seq, $str); 1392} 1393 1394=head3 efe(i, t) 1395 1396Free energy of a secondary structure ensemble, constraint to be compatible with 1397input structure number B<i>. Omitting B<i>, or specifying B<i=0> computes the 1398unconstraint ensemble free energy. 1399 1400=cut 1401 1402sub efe { 1403 my ($self, $seq, $i, $t) = @_; 1404 $t = 37 unless defined $t; 1405 $RNA::temperature=$t; 1406 croak "efe(): cannot find structure number $i" if $i && !$self->{structures}[$i-1]; 1407 1408 my $str = ($i) ? $self->{structures}[$i-1] : undef; 1409 # seemingly useless string modification 1410 # to avoid ViennaRNA SWIG interface bugs 1411 if ($str) { 1412 $str.='.'; $str=substr($str,0,-1); 1413 } 1414 return ($RNA::cut_point == -1) ? RNA::pf_fold($seq, $str) : RNA::co_pf_fold($seq, $str); 1415} 1416 1417=head3 efe_circ(i, t) 1418 1419Free energy of a circular secondary structure ensemble, constraint to be 1420compatible with input structure number B<i>. Omitting B<i>, or specifying 1421B<i=0> computes the unconstraint ensemble free energy. 1422 1423=cut 1424 1425sub efe_circ { 1426 my ($self, $seq, $i, $t) = @_; 1427 $t = 37 unless defined $t; 1428 $RNA::temperature=$t; 1429 croak "efe_circ(): cannot find structure number $i" if $i && !$self->{structures}[$i-1]; 1430 carp "efe_circ(): ignoring cut_point" if ($RNA::cut_point != -1); 1431 1432 my $str = ($i) ? $self->{structures}[$i-1] : undef; 1433 # seemingly useless string modification 1434 # to avoid ViennaRNA SWIG interface bugs 1435 if ($str) { $str.='.'; $str=substr($str,0,-1); } 1436 return RNA::pf_circ_fold($seq, $str); 1437} 1438 14391; 1440 1441package RNA::Design::Tree; 1442 1443use strict; 1444use warnings; 1445use Data::Dumper; 1446 1447sub new { 1448 my $class = shift; 1449 my $self = { 1450 tree => [ 1451 [ 1452 ['root', undef] 1453 ] 1454 ], 1455 }; 1456 1457 bless $self, $class; 1458 return $self; 1459} 1460 1461sub init_new_leaves { 1462 my $self = shift; 1463 push @{$self->{tree}}, []; 1464 return $self->{tree}; 1465} 1466 1467sub push_to_current_leaves { 1468 my $self = shift; 1469 my ($string, $parent) = @_; 1470 1471 my $tree = $self->{tree}; 1472 my $leaves = $$tree[-1]; 1473 1474 push @$leaves, [$string, $parent]; 1475 1476 return $self->{tree}; 1477} 1478 1479sub get_leaves { 1480 my $self = shift; 1481 return @{${$self->{tree}}[-1]}; 1482} 1483 1484sub get_first { 1485 my $self = shift; 1486 return @{${$self->{tree}}[1]}; 1487} 1488 1489sub get_full_path { 1490 my $self = shift; 1491 my $leave = shift; 1492 1493 my $string; 1494 my ($char, $ref) = @$leave; 1495 while ($char ne 'root') { 1496 $string .= $char; 1497 ($char, $ref) = @$ref; 1498 } 1499 return reverse $string; 1500} 1501 15021; 1503 1504=head1 AUTHOR 1505 1506Stefan Badelt (stef@tbi.univie.ac.at) 1507 1508=head1 VERSION-LOG 1509 1510 1.10 -- changed in cost function interface: 1511 *added acc(i,j,t) 1512 *added acc_circ(i,j,t) 1513 *changed score_motifs(): 1514 reads a hash of (motif => score) pairs 1515 and adds the scores to the cost-function 1516 *changed base_prob(): 1517 compares vector of nucleotide frequencies with specified vector 1518 *added base_penalty: 1519 is a factor to weight base_prob() score 1520 *changed eval_sequence(): 1521 now computes score as sum over 1522 objective + base_prob + score_motifs 1523 1524 -- fixed SWIG interface problems for: 1525 prob(i,j,t), 1526 prob_circ(i,j,t) 1527 1528 -- bugfix in find_dependency_paths() 1529 1530 1.00 -- initial release (ViennaRNA-v2.2) 1531 1532=cut 1533