1#!/usr/bin/perl -w 2###################################################### 3# history.pl 4###################################################### 5# Author: liangc 6# $Id: history.pl,v 1.8 2009/08/13 20:42:58 astoltzfus Exp $ 7 8# create ancestral intron states based on an alignment and tree 9# in a nexus file. The BUGS program will run many times with small 10# number of generations to find those can converge (to global optimum) 11# and then run one with a large number of generations (convergence) 12# the ancestral states will be computed from the average value of 13# those generations and write back to file in history block 14 15use strict; 16use Pod::Usage; 17use Data::Dumper; 18use Getopt::Long; 19use Bio::NEXUS; 20 21my $version = "\$Revision\$"; 22 23Getopt::Long::Configure("bundling"); # for short options bundling 24my %opts = (); 25GetOptions(\%opts, 'help|h', 'man', 'v', 'l=s','n=s', 'b=s', 'm=s', 'debug') or pod2usage(2); 26if ( $opts{'man'} ) { pod2usage(-exitval => 0, -verbose => 2) } 27if ( $opts{'help'} ) { pod2usage(1) } 28if ($opts{v}) { 29 print "history.pl $version\n"; exit; 30} 31 32my $infile = shift; 33unless ($infile) {die "history.pl [options] <filename>\n";} 34my $burnin = defined $opts{b} ? $opts{b} : 1000; # burnin generation # 35my $monitor = $opts{m} || 1000; # monitor # 36my $good_num = $opts{n} || 3; # the number of files to be selected 37my $runlevel = defined $opts{l} ? $opts{l} : 0; 38my $debug = $opts{debug}; 39 40my $stem = $infile; 41$stem =~ s/^(.+).nex$/$1/; 42$stem =~ s/^(.+\/)?([^\/]+)$/$2/; 43#print "$1 $stem\n"; exit; 44$stem =~ s/[^a-zA-Z0-9.]/\./g; # remove non alphanumeric characters in name 45my $dir = "$stem.dir"; 46system "mkdir $dir"; 47system "cp $infile $dir/$stem.nex"; 48$infile = "$stem.nex"; 49chdir "$dir"; 50 51my @data; # all monitored data in file 52my ($llike, $alpha, $beta, $r, $k, @node); 53my $maxlike = -1.E+20; 54my $minfile = 0; 55my $maxfile = 2; 56my @like; 57my $names; # name of each node in the index file 58 59#print "$runlevel--$burnin--$monitor\n";exit; 60if ($runlevel == 0) { 61 exec_bugs(); 62 re_exec_bugs($minfile, $maxfile, 50, 50); 63 rename "$stem.$maxfile.bug", "$stem.0.bug"; 64} 65if ($runlevel < 2) { 66 re_exec_bugs(0, 0, $burnin, $monitor); 67}else { 68 read_bugs_out("$stem.0.bugs1"); 69} 70write_data(0); 71modify_nexus(); 72 73exit; 74 75# select runs not trapped locally 76sub exec_bugs { 77 $maxfile = 0; 78 for (my $i = 0; $i < 20; $i++) { 79 system ("bugs.pl $stem.$i $infile > $stem.$i.bug") == 0 or die "BUGS execution error!\n"; 80 system "bugs <$stem.$i.cmd >$stem.$i.bugs.log"; 81 read_bugs_out("bugs1"); 82 print "$i: ", $llike->[0], "\n" if $debug; 83 if ( $alpha->[1] != 0 && abs($alpha->[0]/$alpha->[1]) < 1000 && 84 $beta->[1] != 0 && abs($beta->[0]/$beta->[1]) < 1000 && 85 $r->[1] != 0 && abs($r->[0]/$r->[1]) < 1000 && 86 $k->[1] != 0 && abs($k->[0]/$k->[1]) < 1000 ) { 87 print " move to $stem.$maxfile ", $llike->[0], "\n" if $debug; 88 rename "$stem.$i.bug", "$stem.$maxfile.bug"; 89 $like[$maxfile] = $llike->[0]; 90 $maxfile++; 91 } 92 if ($maxfile == $good_num) {last;} 93 } 94 if (--$maxfile == -1) {print "Error for file: $infile; no good starting was found\n"; exit; } 95 print join(' ', @like), "\n" if $debug; 96} 97 98# select a run with maximum mean probability 99sub re_exec_bugs { 100 my ($bugs1, $bugs2, $burnin, $monitor) = @_; 101# if ($bugs1 == $bugs2) { die "-----Error: $stem\n"; } 102# my ($nchar, $nnodes) = read_node_num("$stem.$bugs1.bug"); 103# $burnin = 1000; #int(sqrt(sqrt(sqrt($nchar*$nnodes+1))))*1000; 104# print "burnin: $burnin\n"; 105 foreach my $i ($bugs1..$bugs2) { 106 print "$i running...\n" if $debug; 107 system ("bugs.cmd.pl $stem.$i $burnin $monitor") == 0 or die "BUGS error: $infile\n"; 108 system "bugs <$stem.$i.cmd >$stem.$i.bugs.log"; 109 rename "bugs.ind", "$stem.$i.bugs.ind"; 110 rename "bugs.out", "$stem.$i.bugs.out"; 111 rename "bugs1.ind", "$stem.$i.bugs1.ind"; 112 rename "bugs1.out", "$stem.$i.bugs1.out"; 113 read_bugs_out("$stem.$i.bugs1"); 114 select_file($i); 115 $like[$i] = $llike->[0]; 116 } 117 print join(' ', @like), "\n" if $debug; 118} 119 120# select the run to be used 121sub select_file { 122 my $i = shift; 123 if ($llike->[0] > $maxlike) { $maxlike = $llike->[0]; $maxfile = $i; } 124} 125 126# read .out file 127sub read_bugs_out { 128 my $bugstem = shift; 129 read_ind($bugstem); 130 open (INFILE, "$bugstem.out") or die "Error open file\n"; 131 132 for (my $i = 1; my $line = <INFILE>; $i++) { 133# print "$line\n"; exit; 134 $line =~ s/^\s+//; 135 ${$data[$i]} = [split /\s+/, $line]; 136 } 137 close (INFILE); 138} 139 140#read index file 141sub read_ind { 142 my $bugstem = shift; 143 open(IND, "$bugstem.ind"); 144 while (my $line = <IND>) { 145 $line =~ s/^s+//; 146 my ($v1, $v2) = split /\s+/, $line; 147 if (!$v1) { print "$line, -- $v1, $v2\n"; exit;} 148 if ($v1 eq 'llike') { 149 $data[$v2] = \$llike; 150 }elsif ($v1 eq 'r') { 151 $data[$v2] = \$r; 152 }elsif ($v1 eq 'k') { 153 $data[$v2] = \$k; 154 }elsif ($v1 eq 'alpha') { 155 $data[$v2] = \$alpha; 156 }elsif ($v1 eq 'beta') { 157 $data[$v2] = \$beta; 158 }elsif ($v1 =~ /^root/) { 159 my $name = $v1; 160 $name =~ /\[(\d+)\]/; 161# print "$1\n"; 162 $data[$v2] = \$node[0][$1]; # [intron#][first node--root], 163 }elsif ($v1 =~ /^node/) { 164 my $name = $v1; 165 $name =~ /\[(\d+),(\d+)\]/; 166# print "$2, $1\n"; 167 $data[$v2] = \$node[$1][$2]; # [intron#][other internal node] 168 } 169 } 170 close(IND); 171} 172 173# write bugs out to a file 174sub write_data { 175 my ($file) = @_; 176# print &Dumper($llike); exit; 177# print scalar @{$node[1]}, "\n"; exit; 178# print &Dumper($node[0]);exit; 179 180 open(OUTFILE, ">$stem.node.bugs.out"); 181 182 my ($nnodes, $nchar) = read_node_num("$stem.$file.bug"); 183 print OUTFILE "inodes\t", $nnodes, "\n"; 184 print OUTFILE "characters\t", $nchar, "\n"; 185 186 my @llike = map {$_->[0]} @{[$llike]}; 187 my @r = map {$_->[0]} @{[$r]}; 188 my @k = map {$_->[0]} @{[$k]}; 189 my @alpha = map {$_->[0]} @{[$alpha]}; 190 my @beta = map {$_->[0]} @{[$beta]}; 191 print OUTFILE "loglikelihood\t", join(' ', @llike), "\n"; 192 print OUTFILE "alpha\t", join(' ', @alpha), "\n"; 193 print OUTFILE "beta\t", join(' ', @beta), "\n"; 194 print OUTFILE "r\t", join(' ', @r), "\n"; 195 print OUTFILE "k\t", join(' ', @k), "\n"; 196 197 $names = read_node_name("$stem.$file.log"); 198 for (my $i = 0; $i < @node; $i++) { 199 print OUTFILE $names->{$i}, "\t"; 200 for (my $j = 1; $j < @{$node[0]}; $j++) { 201 printf OUTFILE "%4.3f\t", $node[$i][$j][0]-1; 202 } 203 print OUTFILE "\n"; 204 } 205 close(OUTFILE); 206} 207 208# convert internal node names used in bugs run 209sub read_node_name { 210 my $file = shift; 211 open(LOG, "$file"); 212 my %names; 213 $names{0} = 'root'; 214 while (<LOG>) { 215 if (/^\s+$/) {last;} 216 } 217 while (<LOG>) { 218 /(\d+):\s*(\S+)/; 219 $names{$1} = $2; 220 } 221 close(LOG); 222# print &Dumper(%names);exit; 223 return \%names; 224} 225 226# find out the number of parameters based on tree size and character num 227sub read_node_num { 228 my $file = shift; 229 open(BUG, "$file"); 230 my ($nchar, $nnodes); 231 while (<BUG>) { 232 if (/nChar = (\d+)/) {$nchar = $1;} 233 elsif (/nNode = (\d+)/) {$nnodes = $1+1;} 234 elsif ($nchar && $nnodes) {last;} 235 } 236 close(BUG); 237 return ($nnodes, $nchar); 238} 239 240# modifying input nexus file (with new name) 241sub modify_nexus { 242 my $nexus = new Bio::NEXUS($infile); 243 create_history_block($nexus, \@node); 244 modify_span_block($nexus); 245 $nexus->write("../${stem}.h.nex"); 246} 247 248# create history block based on BUGS output 249sub create_history_block { 250 my ($nexus, $nodes) = @_; 251 my $otuset = $nexus->get_block('characters', 'intron')->get_otuset()->clone(); 252 253 my @otus; 254 for (my $i = 0; $i < @$nodes; $i++) { 255 my @seq; 256 for (my $j = 1; $j < @{$nodes->[0]}; $j++) { 257 push @seq, [sprintf("%4.3f", 2-$nodes->[$i][$j][0]), (sprintf"%4.3f", $nodes->[$i][$j][0]-1)]; 258 } 259 push @otus, new Bio::NEXUS::TaxUnit($names->{$i}, \@seq); 260 } 261# @otus = sort { $a->get_name cmp $b->get_name } @otus; 262 my $history = new Bio::NEXUS::HistoryBlock('History'); 263 foreach my $otu (@otus) { 264 $otuset->add_otu($otu); 265 } 266 $history->set_title('intron'); 267 $history->add_link('taxa', $nexus->get_name); 268 $history->set_format($nexus->get_block('characters', 'intron')->get_format()); 269 $history->set_otuset($otuset); 270 $history->add_tree($nexus->get_block("trees")->get_tree); 271 $nexus->remove_block('history', 'intron'); 272 $nexus->add_block($history); 273} 274 275# modify span block by adding BUGS parameters 276sub modify_span_block { 277 my $nexus = shift; 278 my %descendant; 279 $descendant{program}='BUGS'; 280 $descendant{version}='unix-0.600'; 281 my $like = $llike->[0]; 282 my $a = $alpha->[0]; 283 my $b = $beta->[0]; 284 $descendant{parameters}="loglikelihood=$like, alpha=$a, beta=$b, burnin=$burnin, monitor=$monitor"; 285 my $spanblock = $nexus->get_block("span"); 286 if (! $spanblock) { 287 $spanblock = new Bio::NEXUS::SpanBlock('span'); 288 $spanblock->{method}{ancestral_inference} = \%descendant; 289 $nexus->add_block($spanblock); 290 }else { 291 $spanblock->{method}{descendant} = \%descendant; 292 } 293} 294 295 296################# POD Documentation ################## 297 298__END__ 299 300=head1 NAME 301 302history.pl 303 304=head1 SYNOPSIS 305 306history.pl [options] <input_file> 307 308 Options: 309 --help -h brief help message 310 --man full documentation 311 --debug output running info 312 -n <run-num> the number of converging initial runs 313 -l <running-level> running level 314 0 new run 315 1 old run with new number of generations 316 2 just retrieve data from existing files 317 -b <burin-in> the number of burnin generations 318 -m <monitor> the number of monitoring generations 319 -v print version information and quit 320 321=head1 Description 322 323 create ancestral intron states based on an alignment and tree 324 in a nexus file. The BUGS program will run many times with small 325 number of generations to find those can converge (to global optimum) 326 and then run one with a large number of generations (convergence) 327 the ancestral states will be computed from the average value of 328 those generations and write back to file in history block 329 330=head1 REQUIRES 331 332BUGS must be installed for running this program. 333 334=head1 VERSION 335 336$Revision: 1.8 $ 337 338=head1 AUTHOR 339 340Chengzhi Liang <liangc@umbi.umd.edu> 341 342=cut 343 344##################### End ########################## 345 346 347