1#!/usr/local/bin/perl -w 2 3# 4# dbmultistats 5# Copyright (C) 1991-1998 by John Heidemann <johnh@isi.edu> 6# $Id: dbmultistats,v 1.23 2003/05/23 04:17:45 johnh Exp $ 7# 8# This program is distributed under terms of the GNU general 9# public license, version 2. See the file COPYING 10# in $dblibdir for details. 11# 12sub usage { 13 print STDERR <<END; 14usage: $0 [-dm] [-c ConfidencePercent] [-f FormatForm] [-q NumberOfQuartiles] TagField ValueField 15 16Computes a series of stats for a table from stdin, 17consuming the data. 18For each unique value of TagField, mean is run over ValueField. 19 20This program consumes O(1) memory. 21 22Options: 23 -c ConfidencePercent specify confidence interval 24 -f FormatForm specify output format 25 -m compute median value 26 -q N compute quartile (if N=4) or n-tile values (the scores 27 that are 1 Nth of the way across the population) 28 -d debugging 29 30Bugs: 31Currently doesn't correctly pass through field seperators. 32 33Sample input: 34#h experiment duration 35ufs_mab_sys 37.2 36ufs_mab_sys 37.3 37ufs_rcp_real 264.5 38ufs_rcp_real 277.9 39 40Sample command: 41cat DATA/stats.jdb | dbmultistats experiment duration 42 43Sample output: 44#h experiment mean stddev pct_rsd conf_range conf_low conf_high conf_pct sum sum_squared min max n 45ufs_mab_sys 37.25 0.070711 0.18983 0.6353 36.615 37.885 0.95 74.5 2775.1 37.2 37.3 2 46ufs_rcp_real 271.2 9.4752 3.4938 85.13 186.07 356.33 0.95 542.4 1.4719e+05 264.5 277.9 2 47# | /home/johnh/BIN/DB/dbmultistats experiment duration 48 49END 50# ' 51 exit 1; 52} 53 54BEGIN { 55 $dbbindir = "/home/johnh/BIN/DB"; 56 $dblibdir = "/home/johnh/BIN/DB"; 57 push(@INC, $dblibdir); 58} 59use DbGetopt; 60require "$dblibdir/dblib.pl"; 61 62@orig_argv = @ARGV; 63my($prog) = &progname; 64my($conf_pct) = undef; 65my($format) = "%.5g"; 66my($debug) = undef; 67my($dbopts) = new DbGetopt("c:df:mq:?", \@ARGV); 68my($ntile, $median); 69my($ch); 70while ($dbopts->getopt) { 71 $ch = $dbopts->opt; 72 if ($ch eq 'c') { 73 $conf_pct = $dbopts->optarg; 74 } elsif ($ch eq 'f') { 75 $format = $dbopts->optarg; 76 } elsif ($ch eq 'd') { 77 $debug = 1; 78 } elsif ($ch eq 'm') { 79 $median = 1; 80 } elsif ($ch eq 'q') { 81 $ntile = $dbopts->optarg; 82 } else { 83 &usage; 84 }; 85}; 86 87&usage if ($#ARGV != 1); 88my($tagcol, $valcol) = @ARGV; 89 90&readprocess_header; 91die ("$prog: unknown column name ``$tagcol''.\n") if (!defined($colnametonum{$tagcol})); 92my($tagf) = $colnametonum{$tagcol}; 93my($tagname) = $colnames[$tagf]; 94die ("$prog: unknown column name ``$valcol''.\n") if (!defined($colnametonum{$valcol})); 95my($valf) = $colnametonum{$valcol}; 96 97my(%tag_files, %tag_counts, $tag, $path); 98 99 100# read data 101while (<STDIN>) { 102 &delayed_pass_comments() && next; 103 &split_cols; 104 105 $tag = $f[$tagf]; 106 $val = $f[$valf]; 107 108 if (defined($tag_files{$tag})) { 109 $tag_counts{$tag}++; 110 } else { 111 # open a new file 112 $path = $tag_files{$tag} = &db_tmpfile(TMP); 113 close(TMP); 114 open PATH, ">>$path"; 115 print PATH "$col_headertag data\n"; 116 $tag_counts{$tag} = 1; 117 }; 118 $path = $tag_files{$tag}; 119 open PATH, ">>$path"; 120 print PATH "$val\n"; 121}; 122 123@dbstats_args = ("$dbbindir/dbstats"); 124push(@dbstats_args, '-c', $conf_pct) if (defined($conf_pct)); 125push(@dbstats_args, '-q', $ntile) if (defined($ntile)); 126push(@dbstats_args, '-m') if (defined($median)); 127push(@dbstats_args, '0'); 128 129# send each tag to mean 130foreach $tag (sort keys %tag_files) { 131 # close it 132 $path = $tag_files{$tag}; 133 close(PATH); 134 135 open(FROMMEAN, join(" ", @dbstats_args) . " <$tag_files{$tag} |") || die "$prog: cannot run dbstats.\n"; 136 @meanout = <FROMMEAN>; 137 close(FROMMEAN); 138 139 if (defined($meanoutheader)) { 140 print "# $tag\n" if ($debug); 141 die("$prog: dbstats header mismatch on tag ``$tag''.\n". join("\n", @meanout) . "\n") 142 if ($meanout[0] ne $meanoutheader); 143 } else { 144 $meanoutheader = $meanout[0]; 145 &process_header($meanoutheader); 146 &col_create($tagname, 0); # create the tag column at the beginning 147 &write_header(); 148 print "# $tag\n" if ($debug); 149 }; 150 151 print "$tag$outfs$meanout[1]"; 152} 153 154# close up shop 155&delayed_flush_comments(); 156print "# | $prog ", join(" ", @orig_argv), "\n"; 157exit 0; 158 159if (0) { 160 my($x); 161 $x = $col_headertag; 162 $x = $outfs; 163 $x = $colnames; 164 $x = <TMP>; 165 $x = <TMP>; 166} 167