1#!/usr/bin/perl -w
2######################################################
3# history.pl
4######################################################
5# Author: liangc
6# $Id: history.pl,v 1.8 2009/08/13 20:42:58 astoltzfus Exp $
7
8# create ancestral intron states based on an alignment and tree
9# in a nexus file. The BUGS program will run many times with small
10# number of generations to find those can converge (to global optimum)
11# and then run one with a large number of generations (convergence)
12# the ancestral states will be computed from the average value of
13# those generations and write back to file in history block
14
15use strict;
16use Pod::Usage;
17use Data::Dumper;
18use Getopt::Long;
19use Bio::NEXUS;
20
21my $version = "\$Revision\$";
22
23Getopt::Long::Configure("bundling"); # for short options bundling
24my %opts = ();
25GetOptions(\%opts, 'help|h', 'man', 'v', 'l=s','n=s', 'b=s', 'm=s', 'debug') or pod2usage(2);
26if ( $opts{'man'} ) { pod2usage(-exitval => 0, -verbose => 2) }
27if ( $opts{'help'} ) { pod2usage(1) }
28if ($opts{v}) {
29    print "history.pl $version\n"; exit;
30}
31
32my $infile = shift;
33unless ($infile) {die "history.pl [options] <filename>\n";}
34my $burnin = defined $opts{b} ? $opts{b} : 1000; # burnin generation #
35my $monitor = $opts{m} || 1000; # monitor #
36my $good_num = $opts{n} || 3; # the number of files to be selected
37my $runlevel = defined $opts{l} ? $opts{l} : 0;
38my $debug = $opts{debug};
39
40my $stem = $infile;
41$stem =~ s/^(.+).nex$/$1/;
42$stem =~ s/^(.+\/)?([^\/]+)$/$2/;
43#print "$1 $stem\n"; exit;
44$stem =~ s/[^a-zA-Z0-9.]/\./g; # remove non alphanumeric characters in name
45my $dir = "$stem.dir";
46system "mkdir $dir";
47system "cp $infile $dir/$stem.nex";
48$infile = "$stem.nex";
49chdir "$dir";
50
51my @data; # all monitored data in file
52my ($llike, $alpha, $beta, $r, $k, @node);
53my $maxlike = -1.E+20;
54my $minfile = 0;
55my $maxfile = 2;
56my @like;
57my $names; # name of each node in the index file
58
59#print "$runlevel--$burnin--$monitor\n";exit;
60if ($runlevel == 0) {
61    exec_bugs();
62    re_exec_bugs($minfile, $maxfile, 50, 50);
63    rename "$stem.$maxfile.bug", "$stem.0.bug";
64}
65if ($runlevel < 2) {
66    re_exec_bugs(0, 0, $burnin, $monitor);
67}else {
68    read_bugs_out("$stem.0.bugs1");
69}
70write_data(0);
71modify_nexus();
72
73exit;
74
75# select runs not trapped locally
76sub exec_bugs {
77    $maxfile = 0;
78    for (my $i = 0; $i < 20; $i++) {
79	system ("bugs.pl $stem.$i $infile > $stem.$i.bug") == 0 or die "BUGS execution error!\n";
80	system "bugs <$stem.$i.cmd >$stem.$i.bugs.log";
81	read_bugs_out("bugs1");
82	print "$i: ", $llike->[0], "\n" if $debug;
83	if ( $alpha->[1] != 0 && abs($alpha->[0]/$alpha->[1]) < 1000 &&
84	    $beta->[1] != 0 && abs($beta->[0]/$beta->[1]) < 1000 &&
85	    $r->[1] != 0 && abs($r->[0]/$r->[1]) < 1000 &&
86	    $k->[1] != 0 && abs($k->[0]/$k->[1]) < 1000 ) {
87	    print "   move to $stem.$maxfile ", $llike->[0], "\n" if $debug;
88	    rename "$stem.$i.bug", "$stem.$maxfile.bug";
89	    $like[$maxfile] = $llike->[0];
90	    $maxfile++;
91	}
92	if ($maxfile == $good_num) {last;}
93    }
94    if (--$maxfile == -1) {print "Error for file: $infile; no good starting was found\n"; exit; }
95    print join(' ', @like), "\n" if $debug;
96}
97
98# select a run with maximum mean probability
99sub re_exec_bugs {
100    my ($bugs1, $bugs2, $burnin, $monitor) = @_;
101#    if ($bugs1 == $bugs2) { die "-----Error: $stem\n"; }
102#    my ($nchar, $nnodes) = read_node_num("$stem.$bugs1.bug");
103#    $burnin = 1000; #int(sqrt(sqrt(sqrt($nchar*$nnodes+1))))*1000;
104#    print "burnin: $burnin\n";
105    foreach my $i ($bugs1..$bugs2) {
106	print "$i running...\n" if $debug;
107	system ("bugs.cmd.pl $stem.$i $burnin $monitor") == 0 or die "BUGS error: $infile\n";
108	system "bugs <$stem.$i.cmd >$stem.$i.bugs.log";
109	rename "bugs.ind", "$stem.$i.bugs.ind";
110	rename "bugs.out", "$stem.$i.bugs.out";
111	rename "bugs1.ind", "$stem.$i.bugs1.ind";
112	rename "bugs1.out", "$stem.$i.bugs1.out";
113	read_bugs_out("$stem.$i.bugs1");
114	select_file($i);
115	$like[$i] = $llike->[0];
116    }
117    print join(' ', @like), "\n" if $debug;
118}
119
120# select the run to be used
121sub select_file {
122    my $i = shift;
123    if ($llike->[0] > $maxlike) { $maxlike = $llike->[0]; $maxfile = $i; }
124}
125
126# read .out file
127sub read_bugs_out {
128    my $bugstem = shift;
129    read_ind($bugstem);
130    open (INFILE, "$bugstem.out") or die "Error open file\n";
131
132    for (my $i = 1; my $line = <INFILE>; $i++) {
133#	    print "$line\n"; exit;
134	$line =~ s/^\s+//;
135	${$data[$i]} = [split /\s+/, $line];
136    }
137    close (INFILE);
138}
139
140#read index file
141sub read_ind {
142    my $bugstem = shift;
143    open(IND, "$bugstem.ind");
144    while (my $line = <IND>) {
145	$line =~ s/^s+//;
146	my ($v1, $v2) = split /\s+/, $line;
147	if (!$v1) { print "$line, -- $v1, $v2\n"; exit;}
148	if ($v1 eq 'llike') {
149	    $data[$v2] = \$llike;
150	}elsif ($v1 eq 'r') {
151	    $data[$v2] = \$r;
152	}elsif ($v1 eq 'k') {
153	    $data[$v2] = \$k;
154	}elsif ($v1 eq 'alpha') {
155	    $data[$v2] = \$alpha;
156	}elsif ($v1 eq 'beta') {
157	    $data[$v2] = \$beta;
158	}elsif ($v1 =~ /^root/) {
159	    my $name = $v1;
160	    $name =~ /\[(\d+)\]/;
161#	    print "$1\n";
162	    $data[$v2] = \$node[0][$1]; # [intron#][first node--root],
163	}elsif ($v1 =~ /^node/) {
164	    my $name = $v1;
165	    $name =~ /\[(\d+),(\d+)\]/;
166#	    print "$2, $1\n";
167	    $data[$v2] = \$node[$1][$2]; # [intron#][other internal node]
168	}
169    }
170    close(IND);
171}
172
173# write bugs out to a file
174sub write_data {
175    my ($file) = @_;
176#    print &Dumper($llike); exit;
177#    print scalar @{$node[1]}, "\n"; exit;
178#    print &Dumper($node[0]);exit;
179
180    open(OUTFILE, ">$stem.node.bugs.out");
181
182    my ($nnodes, $nchar) = read_node_num("$stem.$file.bug");
183    print OUTFILE "inodes\t", $nnodes, "\n";
184    print OUTFILE "characters\t", $nchar, "\n";
185
186    my @llike = map {$_->[0]} @{[$llike]};
187    my @r = map {$_->[0]} @{[$r]};
188    my @k = map {$_->[0]} @{[$k]};
189    my @alpha = map {$_->[0]} @{[$alpha]};
190    my @beta = map {$_->[0]} @{[$beta]};
191    print OUTFILE "loglikelihood\t", join(' ', @llike), "\n";
192    print OUTFILE "alpha\t", join(' ', @alpha), "\n";
193    print OUTFILE "beta\t", join(' ', @beta), "\n";
194    print OUTFILE "r\t", join(' ', @r), "\n";
195    print OUTFILE "k\t", join(' ', @k), "\n";
196
197    $names = read_node_name("$stem.$file.log");
198    for (my $i = 0; $i < @node; $i++) {
199	print OUTFILE $names->{$i}, "\t";
200	for (my $j = 1; $j < @{$node[0]}; $j++) {
201	    printf OUTFILE "%4.3f\t", $node[$i][$j][0]-1;
202	}
203	print OUTFILE "\n";
204    }
205    close(OUTFILE);
206}
207
208# convert internal node names used in bugs run
209sub read_node_name {
210    my $file = shift;
211    open(LOG, "$file");
212    my %names;
213    $names{0} = 'root';
214    while (<LOG>) {
215	if (/^\s+$/) {last;}
216    }
217    while (<LOG>) {
218	/(\d+):\s*(\S+)/;
219	$names{$1} = $2;
220    }
221    close(LOG);
222#    print &Dumper(%names);exit;
223    return \%names;
224}
225
226# find out the number of parameters based on tree size and character num
227sub read_node_num {
228    my $file = shift;
229    open(BUG, "$file");
230    my ($nchar, $nnodes);
231    while (<BUG>) {
232	if (/nChar = (\d+)/) {$nchar = $1;}
233	elsif (/nNode = (\d+)/) {$nnodes = $1+1;}
234	elsif ($nchar && $nnodes) {last;}
235    }
236    close(BUG);
237    return ($nnodes, $nchar);
238}
239
240# modifying input nexus file (with new name)
241sub modify_nexus {
242    my $nexus = new Bio::NEXUS($infile);
243    create_history_block($nexus, \@node);
244    modify_span_block($nexus);
245    $nexus->write("../${stem}.h.nex");
246}
247
248# create history block based on BUGS output
249sub create_history_block {
250    my ($nexus, $nodes) = @_;
251    my $otuset = $nexus->get_block('characters', 'intron')->get_otuset()->clone();
252
253    my @otus;
254    for (my $i = 0; $i < @$nodes; $i++) {
255	my @seq;
256	for (my $j = 1; $j < @{$nodes->[0]}; $j++) {
257	    push @seq, [sprintf("%4.3f", 2-$nodes->[$i][$j][0]), (sprintf"%4.3f", $nodes->[$i][$j][0]-1)];
258	}
259	push @otus, new Bio::NEXUS::TaxUnit($names->{$i}, \@seq);
260    }
261#    @otus = sort { $a->get_name cmp $b->get_name } @otus;
262    my $history = new Bio::NEXUS::HistoryBlock('History');
263    foreach my $otu (@otus) {
264	$otuset->add_otu($otu);
265    }
266    $history->set_title('intron');
267    $history->add_link('taxa', $nexus->get_name);
268    $history->set_format($nexus->get_block('characters', 'intron')->get_format());
269    $history->set_otuset($otuset);
270    $history->add_tree($nexus->get_block("trees")->get_tree);
271    $nexus->remove_block('history', 'intron');
272    $nexus->add_block($history);
273}
274
275# modify span block by adding BUGS parameters
276sub modify_span_block {
277    my $nexus = shift;
278    my %descendant;
279    $descendant{program}='BUGS';
280    $descendant{version}='unix-0.600';
281    my $like = $llike->[0];
282    my $a = $alpha->[0];
283    my $b = $beta->[0];
284    $descendant{parameters}="loglikelihood=$like, alpha=$a, beta=$b, burnin=$burnin, monitor=$monitor";
285    my $spanblock = $nexus->get_block("span");
286    if (! $spanblock) {
287	$spanblock = new Bio::NEXUS::SpanBlock('span');
288	$spanblock->{method}{ancestral_inference} = \%descendant;
289	$nexus->add_block($spanblock);
290    }else {
291	$spanblock->{method}{descendant} = \%descendant;
292    }
293}
294
295
296################# POD Documentation ##################
297
298__END__
299
300=head1 NAME
301
302history.pl
303
304=head1 SYNOPSIS
305
306history.pl [options] <input_file>
307
308  Options:
309    --help -h       brief help message
310    --man           full documentation
311    --debug         output running info
312    -n <run-num>           the number of converging initial runs
313    -l <running-level>           running level
314       0          new run
315       1          old run with new number of generations
316       2          just retrieve data from existing files
317    -b <burin-in> the number of burnin generations
318    -m <monitor>  the number of monitoring generations
319    -v            print version information and quit
320
321=head1 Description
322
323 create ancestral intron states based on an alignment and tree
324 in a nexus file. The BUGS program will run many times with small
325 number of generations to find those can converge (to global optimum)
326 and then run one with a large number of generations (convergence)
327 the ancestral states will be computed from the average value of
328 those generations and write back to file in history block
329
330=head1 REQUIRES
331
332BUGS must be installed for running this program.
333
334=head1 VERSION
335
336$Revision: 1.8 $
337
338=head1 AUTHOR
339
340Chengzhi Liang <liangc@umbi.umd.edu>
341
342=cut
343
344##################### End ##########################
345
346
347