1#!/bin/python 2import scipy.stats, numpy, subprocess, math, sys, gzip, re 3 4# 5# R requirements: 6# install.packages(c("statmod", "emg", "chi", "evir", "lmomco", "gumbel", "bda")) 7# 8 9# Random variate samples 10samples = 100 11 12# Quantile for probabilities 13pqs = map(lambda x: x/10., range(1, 21, 1)) + [ 1e-5, 1e-10, .1234567, math.pi, math.e, math.pi/10, math.e/10 ] 14pqs.sort() 15 16# Quantiles for quantile function 17qqs = [ 0.0001, 0.001, 0.01, 0.10, 0.25, 0.5, 0.75, 0.90, 0.99, 0.999, 0.9999 ] 18 19dfs = [ 20 ("gamma", "1_1", scipy.stats.gamma(1, scale=1./1), "gamma(x, 1, rate=1 %s)", 1), 21 ("gamma", "2_1", scipy.stats.gamma(2, scale=1./1), "gamma(x, 2, rate=1 %s)", 2), 22 ("gamma", "4_1", scipy.stats.gamma(4, scale=1./1), "gamma(x, 4, rate=1 %s)", 3), 23 ("gamma", "4_10", scipy.stats.gamma(4, scale=1./10), "gamma(x, 4, rate=10 %s)", 4), 24 ("gamma", "01_10", scipy.stats.gamma(.1, scale=1./10), "gamma(x, .1, rate=10 %s)", 5), 25 ("gamma", "01_20", scipy.stats.gamma(.1, scale=1./20), "gamma(x, .1, rate=20 %s)", 6), 26 ("gamma", "01_4", scipy.stats.gamma(.1, scale=1./4), "gamma(x, .1, rate=4 %s)", 7), 27 ("gamma", "01_1", scipy.stats.gamma(.1, scale=1./1), "gamma(x, .1, rate=1 %s)", 8), 28 ("beta", "01_01", scipy.stats.beta(.1, .1), "beta(x, .1, .1 %s)", 9), 29 ("beta", "01_05", scipy.stats.beta(.1, .5), "beta(x, .1, .5 %s)", 10), 30 ("beta", "01_1", scipy.stats.beta(.1, 1), "beta(x, .1, 1 %s)", 11), 31 ("beta", "01_2", scipy.stats.beta(.1, 2), "beta(x, .1, 2 %s)", 12), 32 ("beta", "01_4", scipy.stats.beta(.1, 4), "beta(x, .1, 4 %s)", 13), 33 ("beta", "05_01", scipy.stats.beta(.5, .1), "beta(x, .5, .1 %s)", 14), 34 ("beta", "05_05", scipy.stats.beta(.5, .5), "beta(x, .5, .5 %s)", 15), 35 ("beta", "05_1", scipy.stats.beta(.5, 1), "beta(x, .5, 1 %s)", 16), 36 ("beta", "05_2", scipy.stats.beta(.5, 2), "beta(x, .5, 2 %s)", 17), 37 ("beta", "05_4", scipy.stats.beta(.5, 4), "beta(x, .5, 4 %s)", 18), 38 ("beta", "1_01", scipy.stats.beta(1, .1), "beta(x, 1, .1 %s)", 19), 39 ("beta", "1_05", scipy.stats.beta(1, .5), "beta(x, 1, .5 %s)", 20), 40 ("beta", "1_1", scipy.stats.beta(1, 1), "beta(x, 1, 1 %s)", 21), 41 ("beta", "1_2", scipy.stats.beta(1, 2), "beta(x, 1, 2 %s)", 22), 42 ("beta", "1_4", scipy.stats.beta(1, 4), "beta(x, 1, 4 %s)", 23), 43 ("beta", "2_01", scipy.stats.beta(2, .1), "beta(x, 2, .1 %s)", 24), 44 ("beta", "2_05", scipy.stats.beta(2, .5), "beta(x, 2, .5 %s)", 25), 45 ("beta", "2_1", scipy.stats.beta(2, 1), "beta(x, 2, 1 %s)", 26), 46 ("beta", "2_2", scipy.stats.beta(2, 2), "beta(x, 2, 2 %s)", 27), 47 ("beta", "2_4", scipy.stats.beta(2, 4), "beta(x, 2, 4 %s)", 28), 48 ("beta", "4_01", scipy.stats.beta(4, .1), "beta(x, 4, .1 %s)", 29), 49 ("beta", "4_05", scipy.stats.beta(4, .5), "beta(x, 4, .5 %s)", 30), 50 ("beta", "4_1", scipy.stats.beta(4, 1), "beta(x, 4, 1 %s)", 31), 51 ("beta", "4_2", scipy.stats.beta(4, 2), "beta(x, 4, 2 %s)", 32), 52 ("beta", "4_4", scipy.stats.beta(4, 4), "beta(x, 4, 4 %s)", 33), 53 ("beta", "5000_10000", scipy.stats.beta(5000, 10000), "beta(x, 5000, 10000 %s)", 34), 54 ("chisq", "01", scipy.stats.chi2(.1), "chisq(x, .1 %s)", 35), 55 ("chisq", "1", scipy.stats.chi2(1), "chisq(x, 1 %s)", 36), 56 ("chisq", "2", scipy.stats.chi2(2), "chisq(x, 2 %s)", 37), 57 ("chisq", "4", scipy.stats.chi2(4), "chisq(x, 4 %s)", 38), 58 ("chisq", "10", scipy.stats.chi2(10), "chisq(x, 10 %s)", 39), 59 ("norm", "0_1", scipy.stats.norm(0, 1), "norm(x, 0, 1 %s)", 40), 60 ("norm", "1_3", scipy.stats.norm(1, 3), "norm(x, 1, 3 %s)", 41), 61 ("norm", "01_01", scipy.stats.norm(.1, .1), "norm(x, .1, .1 %s)", 42), 62 ("lognorm", "0_1", scipy.stats.lognorm(1, 0, math.exp(0)), "lnorm(x, 0, 1 %s)", 43), 63 ("lognorm", "1_3", scipy.stats.lognorm(3, 0, math.exp(1)), "lnorm(x, 1, 3 %s)", 44), 64 ("lognorm", "01_01", scipy.stats.lognorm(.1, 0, math.exp(.1)), "lnorm(x, .1, .1 %s)", 45), 65 ("unif", "0_1", scipy.stats.uniform(0, 1), "unif(x, 0, 1 %s)", 46), 66 ("unif", "M1_2", scipy.stats.uniform(-1, 3), "unif(x, -1, 2 %s)", 47), 67 ("exp", "01", scipy.stats.expon(scale=1/.1), "exp(x, .1 %s)", 48), 68 ("exp", "05", scipy.stats.expon(scale=1/.5), "exp(x, .5 %s)", 49), 69 ("exp", "1", scipy.stats.expon(scale=1/1.), "exp(x, 1. %s)", 50), 70 ("exp", "2", scipy.stats.expon(scale=1/2.), "exp(x, 2. %s)", 51), 71 ("exp", "4", scipy.stats.expon(scale=1/4.), "exp(x, 4. %s)", 52), 72 ("weibull", "1_1", scipy.stats.exponweib(1,1,scale=1), "weibull(x, 1, 1 %s)", 53), 73 ("weibull", "2_1", scipy.stats.exponweib(1,2,scale=1), "weibull(x, 2, 1 %s)", 54), 74 ("weibull", "4_1", scipy.stats.exponweib(1,4,scale=1), "weibull(x, 4, 1 %s)", 55), 75 ("weibull", "4_10", scipy.stats.exponweib(1,4,scale=10), "weibull(x, 4, 10 %s)", 56), 76 ("weibull", "01_10", scipy.stats.exponweib(1,.1,scale=10), "weibull(x, .1, 10 %s)", 57), 77 ("weibull", "01_20", scipy.stats.exponweib(1,.1,scale=20), "weibull(x, .1, 20 %s)", 58), 78 ("weibull", "01_4", scipy.stats.exponweib(1,.1,scale=4), "weibull(x, .1, 4 %s)", 59), 79 ("weibull", "01_1", scipy.stats.exponweib(1,.1,scale=1), "weibull(x, .1, 1 %s)", 60), 80 ("gumbel", "1_1", scipy.stats.gumbel_r(1,scale=1), "lmomco(x, vec2par(c(1, 1, 0), type=\"gev\") %s)", 61), 81 ("gumbel", "2_1", scipy.stats.gumbel_r(2,scale=1), "lmomco(x, vec2par(c(2, 1, 0), type=\"gev\") %s)", 62), 82 ("gumbel", "4_1", scipy.stats.gumbel_r(4,scale=1), "lmomco(x, vec2par(c(4, 1, 0), type=\"gev\") %s)", 63), 83 ("gumbel", "4_10", scipy.stats.gumbel_r(4,scale=10), "lmomco(x, vec2par(c(4, 10, 0), type=\"gev\") %s)", 64), 84 ("gumbel", "01_10", scipy.stats.gumbel_r(.1,scale=10), "lmomco(x, vec2par(c(.1, 10, 0), type=\"gev\") %s)", 65), 85 ("gumbel", "01_20", scipy.stats.gumbel_r(.1,scale=20), "lmomco(x, vec2par(c(.1, 20, 0), type=\"gev\") %s)", 66), 86 ("gumbel", "01_4", scipy.stats.gumbel_r(.1,scale=4), "lmomco(x, vec2par(c(.1, 4, 0), type=\"gev\") %s)", 67), 87 ("gumbel", "01_1", scipy.stats.gumbel_r(.1,scale=1), "lmomco(x, vec2par(c(.1, 1, 0), type=\"gev\") %s)", 68), 88 ("gev", "08_02_1", scipy.stats.genextreme(.8,loc=.2,scale=1), "lmomco(x, vec2par(c(.2, 1, .8), type=\"gev\") %s)", 69), 89 ("gev", "1_05_1", scipy.stats.genextreme(1,loc=.5,scale=1), "lmomco(x, vec2par(c(.5, 1, 1), type=\"gev\") %s)", 70), 90 ("gev", "1_05_05", scipy.stats.genextreme(1,loc=.5,scale=.5), "lmomco(x, vec2par(c(.5, .5, 1), type=\"gev\") %s)", 71), 91 ("gev", "2_05_05", scipy.stats.genextreme(2,loc=.5,scale=.5), "lmomco(x, vec2par(c(.5, .5, 2), type=\"gev\") %s)", 72), 92 ("gev", "4_05_05", scipy.stats.genextreme(4,loc=.5,scale=.5), "lmomco(x, vec2par(c(.5, .5, 4), type=\"gev\") %s)", 73), 93 ("gev", "M1_05_1", scipy.stats.genextreme(-1,loc=.5,scale=1), None, 74), 94 ("gev", "M1_05_05", scipy.stats.genextreme(-1,loc=.5,scale=.5), None, 75), 95 ("gev", "M2_05_05", scipy.stats.genextreme(-2,loc=.5,scale=.5), None, 76), 96 ("gev", "M4_05_05", scipy.stats.genextreme(-4,loc=.5,scale=.5), None, 77), 97 ("logistic", "05", scipy.stats.logistic(loc=.5), "logis(x, location=.5 %s)", 78), 98 ("logistic", "01", scipy.stats.logistic(loc=.1), "logis(x, location=.1 %s)", 79), 99 ("glogistic", "1_1", scipy.stats.genlogistic(1, loc=1.), None, 80), 100 ("glogistic", "2_05", scipy.stats.genlogistic(2, loc=.5), None, 81), 101 ("glogistic", "05_05", scipy.stats.genlogistic(.5, loc=.5), None, 82), 102 ("expgamma", "1_1", scipy.stats.loggamma(1, scale=1./1), None, 83), 103 ("expgamma", "2_1", scipy.stats.loggamma(2, scale=1./1), None, 84), 104 ("expgamma", "4_1", scipy.stats.loggamma(4, scale=1./1), None, 85), 105 ("expgamma", "4_10", scipy.stats.loggamma(4, scale=1./10), None, 86), 106 ("expgamma", "01_10", scipy.stats.loggamma(.1, scale=1./10), None, 87), 107 ("expgamma", "01_20", scipy.stats.loggamma(.1, scale=1./20), None, 88), 108 ("expgamma", "01_4", scipy.stats.loggamma(.1, scale=1./4), None, 89), 109 ("expgamma", "01_1", scipy.stats.loggamma(.1, scale=1./1), None, 90), 110 ("gpd", "01_05_01", scipy.stats.genpareto(.1,loc=.1,scale=.5), "lmomco(x, vec2par(c(.1, .5, .1), type=\"gpa\") %s)", 91), 111 ("invgauss", "1_1", scipy.stats.invgauss(1.,scale=1.), "invgauss(x, 1, 1 %s)", 92), 112 ("invgauss", "05_1", scipy.stats.invgauss(.5,scale=1.), "invgauss(x, .5, 1 %s)", 93), 113 ("invgauss", "1_05", scipy.stats.invgauss(2.,scale=.5), "invgauss(x, 1., .5 %s)", 94), 114 ("cauchy", "05_1", scipy.stats.cauchy(loc=.5,scale=1), "cauchy(x, 0.5, 1 %s)", 95), 115 ("cauchy", "1_05", scipy.stats.cauchy(loc=1,scale=.5), "cauchy(x, 1, 0.5 %s)", 96), 116 ("chi", "01", scipy.stats.chi(.1), "chi(x, 0.1 %s)", 97), 117 ("chi", "1", scipy.stats.chi(1), "chi(x, 1 %s)", 98), 118 ("chi", "2", scipy.stats.chi(2), "chi(x, 2 %s)", 99), 119 ("chi", "4", scipy.stats.chi(4), "chi(x, 4 %s)", 100), 120 ("chi", "10", scipy.stats.chi(10), "chi(x, 10 %s)", 101), 121 ("emg", "1_3_05", scipy.stats.exponnorm(1./(0.5*3), loc=1, scale=3), "emg(x, 1, 3, 0.5 %s)", 102), 122 ("emg", "1_3_01", scipy.stats.exponnorm(1./(0.1*3), loc=1, scale=3), "emg(x, 1, 3, 0.1 %s)", 102), 123 ("lap", "1_3", scipy.stats.laplace(3, 1), "lap(x, 3, 1 %s)", 103), 124 ("lap", "4_05", scipy.stats.laplace(.5, 1./4), "lap(x, 0.5, 4 %s)", 104), 125 ("ray", "1", scipy.stats.rayleigh(scale=1), "weibull(x, 2, 1*sqrt(2) %s)", 105), 126 ("ray", "2", scipy.stats.rayleigh(scale=2), "weibull(x, 2, 2*sqrt(2) %s)", 106), 127 ("kappa", "01_02_03_04", scipy.stats.kappa4(.4, .3, loc=.1, scale=.2), "kap(x, vec2par(c(.1,.2,.3,.4), type='kap') %s)", 107), 128 ("loglogistic", "1_1", scipy.stats.fisk(1, scale=1), None, 108), 129 ("loglogistic", "2_05", scipy.stats.fisk(2, scale=.5), None, 109), 130 ("loglogistic", "05_05", scipy.stats.fisk(.5, scale=.5), None, 110), 131 ("skewnorm", "01_02_03", None, "gno(x, vec2par(c(.1,.2,.3), type='gno') %s)", 111), 132 ("skewnorm", "1_2_3", None, "gno(x, vec2par(c(1,2,3), type='gno') %s)", 112), 133] 134 135def fmt(x): 136 if isinstance(x, float): x = "%.17e" % x 137 x = re.sub("\\.?0+e", "e", x) 138 x = re.sub("e\\+00$", "", x) 139 return x 140 141pqstr = ",".join(map(fmt, pqs)) 142qqstr = ",".join(map(fmt, qqs)) 143 144pk, of = None, None 145for n1, n2, df, rfunc, seed in sorted(dfs): 146 if len(sys.argv) > 1 and not n1 in sys.argv[1:]: continue 147 if pk != n1: 148 if of: of.close() 149 pk = n1 150 of = gzip.open("%s.ascii.gz" % n1, "w") 151 if df: 152 print >>of, "random_%s" % (n2,), 153 for x in df.rvs(size=samples, random_state=seed): print >>of, fmt(x), 154 print >>of 155 print >>of, "cdf_scipy_%s" % (n2,), 156 for q in pqs: print >>of, fmt(q), fmt(df.cdf(q)), 157 print >>of 158 print >>of, "pdf_scipy_%s" % (n2,), 159 for q in pqs: print >>of, fmt(q), fmt(df.pdf(q)), 160 print >>of 161 print >>of, "logpdf_scipy_%s" % (n2,), 162 for q in pqs: print >>of, fmt(q), fmt(df.logpdf(q)), 163 print >>of 164 print >>of, "quant_scipy_%s" % (n2,), 165 for q in qqs: print >>of, fmt(q), fmt(df.ppf(q)), 166 print >>of 167 168 if rfunc: 169 c = subprocess.Popen(["R", "--quiet", "--no-save", "--slave"], 170 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 171 rin=""" 172library("stats4") 173library("statmod") 174library("emg") 175library("chi") 176library("gumbel") 177library("evir") 178library("lmomco") 179library("bda") 180 181pkap <- cdfkap 182dkap <- pdfkap 183qkap <- quakap 184pgno <- cdfgno 185dgno <- pdfgno 186qgno <- quagno 187 188x <- c( %s ) 189y <- p%s 190cat(formatC(y, digits=50, format="e")) 191cat(" ") 192y <- d%s 193cat(formatC(y, digits=50, format="e")) 194cat(" ") 195y <- d%s 196cat(formatC(y, digits=50, format="e")) 197q() 198""" % (pqstr, rfunc % "", rfunc % "", rfunc % ", log=TRUE") 199 rout, rerr = c.communicate(rin) 200 vals = map(float, rout.split()) 201 assert(len(vals) == len(pqs) * 3 or len(vals) == len(pqs) * 2) 202 203 print >>of, "cdf_gnur_%s" % (n2,), 204 for i in range(0, len(pqs)): 205 print >>of, fmt(pqs[i]), fmt(vals[i]), 206 print >>of 207 print >>of, "pdf_gnur_%s" % (n2,), 208 for i in range(0, len(pqs)): 209 j = i + len(pqs) 210 print >>of, fmt(pqs[i]), fmt(vals[j]), 211 print >>of 212 if len(vals) == len(pqs) * 3: # GEV, GPD do not have a log mode in R 213 print >>of, "logpdf_gnur_%s" % (n2,), 214 for i in range(0, len(pqs)): 215 j = i + 2 * len(pqs) 216 print >>of, fmt(pqs[i]), fmt(vals[j]), 217 print >>of 218 c = subprocess.Popen(["R", "--quiet", "--no-save", "--slave"], 219 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 220 rin=""" 221library("stats4") 222library("statmod") 223library("emg") 224library("chi") 225library("gumbel") 226library("lmomco") 227library("bda") 228 229pkap <- cdfkap 230dkap <- pdfkap 231qkap <- quakap 232pgno <- cdfgno 233dgno <- pdfgno 234qgno <- quagno 235 236x <- c( %s ) 237y <- q%s 238cat(formatC(y, digits=50, format="e")) 239q() 240""" % (qqstr, rfunc % "") 241 rout, rerr = c.communicate(rin) 242 vals = map(float, rout.split()) 243 if len(vals) == len(qqs): # Gumbel does not have qgumbel 244 print >>of, "quant_gnur_%s" % (n2,), 245 for i in range(0, len(qqs)): 246 print >>of, fmt(qqs[i]), fmt(vals[i]), 247 print >>of 248