1#!/usr/bin/python 2# 3# Simson's simple stats. If what you want isn't here, use stats. 4 5import time 6import os 7 8class statbag: 9 """A simple statistics package for 1 and two dimensional values. 10 Also does histograms.""" 11 12 def __init__(self): 13 self.x = [] 14 self.y = [] 15 self.hist = {} # single value histogram 16 17 def __add__(self,another): 18 new = stats() 19 new.x = self.x + another.x 20 new.y = self.y + another.y 21 return new 22 23 def addx(self,x): 24 self.x.append(x) 25 self.hist[x] = self.hist.get(x,0) + 1 26 27 def addxy(self,x,y): 28 self.x.append(x) 29 self.y.append(y) 30 31 def count(self): 32 return len(self.x) 33 34 def convert_to_float(self): 35 for i in xrange(len(self.x)): 36 self.x[i] = float(self.x[i]) 37 for i in xrange(len(self.y)): 38 self.y[i] = float(self.y[i]) 39 40 def sumx(self): 41 sum = 0 42 for i in self.x: 43 sum += i 44 return sum 45 46 def sumy(self): 47 sum = 0 48 for i in self.y: 49 sum += i 50 return sum 51 52 def sumxx(self): 53 sum = 0 54 for i in self.x: 55 sum += i*i 56 return sum 57 58 def sumyy(self): 59 sum = 0 60 for i in self.y: 61 sum += i*i 62 return sum 63 64 def average(self): 65 for i in range(len(self.x)): 66 if(type(self.x[i])==type("")): self.x[i] = float(self.x[i]) 67 return float(self.sumx()) / self.count() 68 69 def minx(self): 70 min = self.x[0] 71 for i in self.x: 72 if(i<min): min=i 73 return min 74 75 def maxx(self): 76 max = self.x[0] 77 for i in self.x: 78 if(i>max): max=i 79 return max 80 81 def rangex(self): 82 return self.maxx() - self.minx() 83 84 def variance(self): 85 avg = self.average() 86 var = 0 87 for i in self.x: 88 var += (i - avg) * (i - avg) 89 return var 90 91 def stddev(self): 92 import math 93 return math.sqrt(self.variance() / self.count()) 94 95 # Two variable statistics 96 def sumxy(self): 97 assert(len(self.x)==len(self.y)) 98 sum = 0 99 for i in range(len(self.x)): 100 sum += self.x[i]*self.y[i] 101 return sum 102 103 def correlation(self): 104 import math 105 n = len(self.x) 106 sumx = self.sumx() 107 sumy = self.sumy() 108 sumxx = self.sumxx() 109 sumyy = self.sumyy() 110 sumxy = self.sumxy() 111 top = n * sumxy - sumx*sumy 112 bot = math.sqrt(( n * sumxx - sumx*sumx) * (n * sumyy - sumy*sumy)) 113 if(bot==0): return 0 # not correlated 114 return top / bot 115 116 def xystr(self): 117 """ Return a string of all the xy values """ 118 ret = "" 119 for i in range(len(self.x)): 120 ret += "%g %g\n" % (self.x[i],self.y[i]) 121 return ret 122 123 def stats1(self): 124 ret = "" 125 ret += "Single variable stats:\n" 126 ret += "count= %d\n" % self.count() 127 ret += "min: %g max: %g range: %g\n" % (self.minx(),self.maxx(),self.rangex()) 128 ret += "sum: %g sum of squares: %g \n" % (self.sumx(), self.sumxx()) 129 ret += "average: %g\n" % (self.average()) 130 ret += "variance: %g stddev: %g\n" % (self.variance(),self.stddev()) 131 return ret 132 133 def print_stats1(self): 134 print("Single variable stats:") 135 print("count= %d" % self.count()) 136 print("min: %g max: %g range: %g" % (self.minx(),self.maxx(),self.rangex())) 137 print("sum: %g sum of squares: %g " % (self.sumx(), self.sumxx())) 138 print("average: %g" % (self.average())) 139 print("variance: %g stddev: %g" % (self.variance(),self.stddev())) 140 141 def histogram(self): 142 "Return a histogram --- a hash of (xvalue,count) tuples" 143 return self.hist 144 145 def print_histogram(self,xtitle,ytitle): 146 "Print a histogram given XTITLE and YTITLE" 147 print("%20s %10s" % (xtitle,ytitle)) 148 k = self.hist.keys() 149 k.sort() 150 for i in k: 151 print("%20s %10d" % (i,self.hist[i])) 152 153 def plot_date_histogram(self,fname,title,width,height): 154 def add_days(date,days): 155 return time.localtime(time.mktime(date)+60*60*24*days)[0:3] + (0,0,0,0,0,0) 156 157 first = add_days(self.minx(),-1) # start one day before 158 last = add_days(self.maxx(),1) # go to one day after 159 160 cmd_file = fname+".txt" 161 dat_file = fname+".dat" 162 163 d = open(dat_file,"w") 164 165 # Generate output for every day... 166 # And generate a "0" for every day that we don't have an entry 167 # that follows an actual day... 168 hist = self.histogram() 169 k = hist.keys() 170 k.sort() 171 for i in k: 172 # Check for the previous day 173 yesterday = add_days(i,-1) 174 if(not hist.has_key(yesterday)): 175 d.write("%d/%d/%d 0\n" % (yesterday[1],yesterday[2],yesterday[0])) 176 d.write("%d/%d/%d %d\n" % (i[1],i[2],i[0],hist[i])) 177 # Check for the next day 178 tomorrow = add_days(i,1) 179 if(not hist.has_key(tomorrow)): 180 d.write("%d/%d/%d 0\n" % (tomorrow[1],tomorrow[2],tomorrow[0])) 181 d.close() 182 183 f = open(cmd_file,"w") 184 f.write("set terminal png small size %d,%d\n" % (width,height)) # "small" is fontsize 185 f.write("set output '%s'\n" % fname) 186 f.write("set xdata time\n") 187 f.write("set timefmt '%m/%d/%y'\n") 188 f.write("set xrange ['%d/%d/%d':'%d/%d/%d']\n" % 189 (first[1],first[2],first[0], last[1],last[2],last[0]+1)) 190 f.write("set format x '%m/%d'\n") 191 f.write("set boxwidth 0.5 relative\n") 192 f.write("plot '%s' using 1:2 t '%s' with boxes fs solid\n" % (dat_file,title)) 193 f.write("quit\n") 194 f.close() 195 os.system("gnuplot %s" % cmd_file) 196 #os.unlink(cmd_file) 197 #os.unlink(dat_file) 198 199 200 201if __name__ == "__main__": 202 import sys 203 204 print("Enter your numbers on a line seperated by spaces:") 205 j = sys.stdin.readline() 206 st = statbag() 207 for v in j.strip().split(' '): 208 st.addx(float(v)) 209 st.print_stats1() 210