1#!/usr/local/bin/perl -w
2
3#
4# dbmultistats
5# Copyright (C) 1991-1998 by John Heidemann <johnh@isi.edu>
6# $Id: dbmultistats,v 1.23 2003/05/23 04:17:45 johnh Exp $
7#
8# This program is distributed under terms of the GNU general
9# public license, version 2.  See the file COPYING
10# in $dblibdir for details.
11#
12sub usage {
13	print STDERR <<END;
14usage: $0 [-dm] [-c ConfidencePercent] [-f FormatForm] [-q NumberOfQuartiles] TagField ValueField
15
16Computes a series of stats for a table from stdin,
17consuming the data.
18For each unique value of TagField, mean is run over ValueField.
19
20This program consumes O(1) memory.
21
22Options:
23    -c ConfidencePercent    specify confidence interval
24    -f FormatForm	    specify output format
25    -m      compute median value
26    -q N    compute quartile (if N=4) or n-tile values (the scores
27	    that are 1 Nth of the way across the population)
28    -d			    debugging
29
30Bugs:
31Currently doesn't correctly pass through field seperators.
32
33Sample input:
34#h experiment duration
35ufs_mab_sys 37.2
36ufs_mab_sys 37.3
37ufs_rcp_real 264.5
38ufs_rcp_real 277.9
39
40Sample command:
41cat DATA/stats.jdb | dbmultistats experiment duration
42
43Sample output:
44#h      experiment      mean    stddev  pct_rsd conf_range      conf_low       conf_high        conf_pct        sum     sum_squared     min     max     n
45ufs_mab_sys     37.25 0.070711 0.18983 0.6353 36.615 37.885 0.95 74.5 2775.1 37.2 37.3 2
46ufs_rcp_real    271.2 9.4752 3.4938 85.13 186.07 356.33 0.95 542.4 1.4719e+05 264.5 277.9 2
47#  | /home/johnh/BIN/DB/dbmultistats experiment duration
48
49END
50# '
51	exit 1;
52}
53
54BEGIN {
55    $dbbindir = "/home/johnh/BIN/DB";
56    $dblibdir = "/home/johnh/BIN/DB";
57    push(@INC, $dblibdir);
58}
59use DbGetopt;
60require "$dblibdir/dblib.pl";
61
62@orig_argv = @ARGV;
63my($prog) = &progname;
64my($conf_pct) = undef;
65my($format) = "%.5g";
66my($debug) = undef;
67my($dbopts) = new DbGetopt("c:df:mq:?", \@ARGV);
68my($ntile, $median);
69my($ch);
70while ($dbopts->getopt) {
71    $ch = $dbopts->opt;
72    if ($ch eq 'c') {
73	$conf_pct = $dbopts->optarg;
74    } elsif ($ch eq 'f') {
75	$format = $dbopts->optarg;
76    } elsif ($ch eq 'd') {
77	$debug = 1;
78    } elsif ($ch eq 'm') {
79	$median = 1;
80    } elsif ($ch eq 'q') {
81	$ntile = $dbopts->optarg;
82    } else {
83	&usage;
84    };
85};
86
87&usage if ($#ARGV != 1);
88my($tagcol, $valcol) = @ARGV;
89
90&readprocess_header;
91die ("$prog: unknown column name ``$tagcol''.\n") if (!defined($colnametonum{$tagcol}));
92my($tagf) = $colnametonum{$tagcol};
93my($tagname) = $colnames[$tagf];
94die ("$prog: unknown column name ``$valcol''.\n") if (!defined($colnametonum{$valcol}));
95my($valf) = $colnametonum{$valcol};
96
97my(%tag_files, %tag_counts, $tag, $path);
98
99
100# read data
101while (<STDIN>) {
102    &delayed_pass_comments() && next;
103    &split_cols;
104
105    $tag = $f[$tagf];
106    $val = $f[$valf];
107
108    if (defined($tag_files{$tag})) {
109	$tag_counts{$tag}++;
110    } else {
111	# open a new file
112	$path = $tag_files{$tag} = &db_tmpfile(TMP);
113	close(TMP);
114	open PATH, ">>$path";
115	print PATH "$col_headertag data\n";
116	$tag_counts{$tag} = 1;
117    };
118    $path = $tag_files{$tag};
119    open PATH, ">>$path";
120    print PATH "$val\n";
121};
122
123@dbstats_args = ("$dbbindir/dbstats");
124push(@dbstats_args, '-c', $conf_pct) if (defined($conf_pct));
125push(@dbstats_args, '-q', $ntile) if (defined($ntile));
126push(@dbstats_args, '-m') if (defined($median));
127push(@dbstats_args, '0');
128
129# send each tag to mean
130foreach $tag (sort keys %tag_files) {
131    # close it
132    $path = $tag_files{$tag};
133    close(PATH);
134
135    open(FROMMEAN, join(" ", @dbstats_args) . " <$tag_files{$tag} |") || die "$prog: cannot run dbstats.\n";
136    @meanout = <FROMMEAN>;
137    close(FROMMEAN);
138
139    if (defined($meanoutheader)) {
140        print "# $tag\n" if ($debug);
141    	die("$prog: dbstats header mismatch on tag ``$tag''.\n". join("\n", @meanout) . "\n")
142	    if ($meanout[0] ne $meanoutheader);
143    } else {
144    	$meanoutheader = $meanout[0];
145    	&process_header($meanoutheader);
146	&col_create($tagname, 0);   # create the tag column at the beginning
147	&write_header();
148        print "# $tag\n" if ($debug);
149    };
150
151    print "$tag$outfs$meanout[1]";
152}
153
154# close up shop
155&delayed_flush_comments();
156print "#  | $prog ", join(" ", @orig_argv), "\n";
157exit 0;
158
159if (0) {
160    my($x);
161    $x = $col_headertag;
162    $x = $outfs;
163    $x = $colnames;
164    $x = <TMP>;
165    $x = <TMP>;
166}
167