1#!/bin/python
2import scipy.stats, numpy, subprocess, math, sys, gzip, re
3
4#
5# R requirements:
6# install.packages(c("statmod", "emg", "chi", "evir", "lmomco", "gumbel", "bda"))
7#
8
9# Random variate samples
10samples = 100
11
12# Quantile for probabilities
13pqs = map(lambda x: x/10., range(1, 21, 1)) + [ 1e-5, 1e-10, .1234567, math.pi, math.e, math.pi/10, math.e/10 ]
14pqs.sort()
15
16# Quantiles for quantile function
17qqs = [ 0.0001, 0.001, 0.01, 0.10, 0.25, 0.5, 0.75, 0.90, 0.99, 0.999, 0.9999 ]
18
19dfs = [
20	("gamma", "1_1", scipy.stats.gamma(1, scale=1./1), "gamma(x, 1, rate=1 %s)", 1),
21	("gamma", "2_1", scipy.stats.gamma(2, scale=1./1), "gamma(x, 2, rate=1 %s)", 2),
22	("gamma", "4_1", scipy.stats.gamma(4, scale=1./1), "gamma(x, 4, rate=1 %s)", 3),
23	("gamma", "4_10", scipy.stats.gamma(4, scale=1./10), "gamma(x, 4, rate=10 %s)", 4),
24	("gamma", "01_10", scipy.stats.gamma(.1, scale=1./10), "gamma(x, .1, rate=10 %s)", 5),
25	("gamma", "01_20", scipy.stats.gamma(.1, scale=1./20), "gamma(x, .1, rate=20 %s)", 6),
26	("gamma", "01_4", scipy.stats.gamma(.1, scale=1./4), "gamma(x, .1, rate=4 %s)", 7),
27	("gamma", "01_1", scipy.stats.gamma(.1, scale=1./1), "gamma(x, .1, rate=1 %s)", 8),
28	("beta", "01_01", scipy.stats.beta(.1, .1), "beta(x, .1, .1 %s)", 9),
29	("beta", "01_05", scipy.stats.beta(.1, .5), "beta(x, .1, .5 %s)", 10),
30	("beta", "01_1", scipy.stats.beta(.1, 1), "beta(x, .1, 1 %s)", 11),
31	("beta", "01_2", scipy.stats.beta(.1, 2), "beta(x, .1, 2 %s)", 12),
32	("beta", "01_4", scipy.stats.beta(.1, 4), "beta(x, .1, 4 %s)", 13),
33	("beta", "05_01", scipy.stats.beta(.5, .1), "beta(x, .5, .1 %s)", 14),
34	("beta", "05_05", scipy.stats.beta(.5, .5), "beta(x, .5, .5 %s)", 15),
35	("beta", "05_1", scipy.stats.beta(.5, 1), "beta(x, .5, 1 %s)", 16),
36	("beta", "05_2", scipy.stats.beta(.5, 2), "beta(x, .5, 2 %s)", 17),
37	("beta", "05_4", scipy.stats.beta(.5, 4), "beta(x, .5, 4 %s)", 18),
38	("beta", "1_01", scipy.stats.beta(1, .1), "beta(x, 1, .1 %s)", 19),
39	("beta", "1_05", scipy.stats.beta(1, .5), "beta(x, 1, .5 %s)", 20),
40	("beta", "1_1", scipy.stats.beta(1, 1), "beta(x, 1, 1 %s)", 21),
41	("beta", "1_2", scipy.stats.beta(1, 2), "beta(x, 1, 2 %s)", 22),
42	("beta", "1_4", scipy.stats.beta(1, 4), "beta(x, 1, 4 %s)", 23),
43	("beta", "2_01", scipy.stats.beta(2, .1), "beta(x, 2, .1 %s)", 24),
44	("beta", "2_05", scipy.stats.beta(2, .5), "beta(x, 2, .5 %s)", 25),
45	("beta", "2_1", scipy.stats.beta(2, 1), "beta(x, 2, 1 %s)", 26),
46	("beta", "2_2", scipy.stats.beta(2, 2), "beta(x, 2, 2 %s)", 27),
47	("beta", "2_4", scipy.stats.beta(2, 4), "beta(x, 2, 4 %s)", 28),
48	("beta", "4_01", scipy.stats.beta(4, .1), "beta(x, 4, .1 %s)", 29),
49	("beta", "4_05", scipy.stats.beta(4, .5), "beta(x, 4, .5 %s)", 30),
50	("beta", "4_1", scipy.stats.beta(4, 1), "beta(x, 4, 1 %s)", 31),
51	("beta", "4_2", scipy.stats.beta(4, 2), "beta(x, 4, 2 %s)", 32),
52	("beta", "4_4", scipy.stats.beta(4, 4), "beta(x, 4, 4 %s)", 33),
53	("beta", "5000_10000", scipy.stats.beta(5000, 10000), "beta(x, 5000, 10000 %s)", 34),
54	("chisq", "01", scipy.stats.chi2(.1), "chisq(x, .1 %s)", 35),
55	("chisq", "1", scipy.stats.chi2(1), "chisq(x, 1 %s)", 36),
56	("chisq", "2", scipy.stats.chi2(2), "chisq(x, 2 %s)", 37),
57	("chisq", "4", scipy.stats.chi2(4), "chisq(x, 4 %s)", 38),
58	("chisq", "10", scipy.stats.chi2(10), "chisq(x, 10 %s)", 39),
59	("norm", "0_1", scipy.stats.norm(0, 1), "norm(x, 0, 1 %s)", 40),
60	("norm", "1_3", scipy.stats.norm(1, 3), "norm(x, 1, 3 %s)", 41),
61	("norm", "01_01", scipy.stats.norm(.1, .1), "norm(x, .1, .1 %s)", 42),
62	("lognorm", "0_1", scipy.stats.lognorm(1, 0, math.exp(0)), "lnorm(x, 0, 1 %s)", 43),
63	("lognorm", "1_3", scipy.stats.lognorm(3, 0, math.exp(1)), "lnorm(x, 1, 3 %s)", 44),
64	("lognorm", "01_01", scipy.stats.lognorm(.1, 0, math.exp(.1)), "lnorm(x, .1, .1 %s)", 45),
65	("unif", "0_1", scipy.stats.uniform(0, 1), "unif(x, 0, 1 %s)", 46),
66	("unif", "M1_2", scipy.stats.uniform(-1, 3), "unif(x, -1, 2 %s)", 47),
67	("exp", "01", scipy.stats.expon(scale=1/.1), "exp(x, .1 %s)", 48),
68	("exp", "05", scipy.stats.expon(scale=1/.5), "exp(x, .5 %s)", 49),
69	("exp", "1", scipy.stats.expon(scale=1/1.), "exp(x, 1. %s)", 50),
70	("exp", "2", scipy.stats.expon(scale=1/2.), "exp(x, 2. %s)", 51),
71	("exp", "4", scipy.stats.expon(scale=1/4.), "exp(x, 4. %s)", 52),
72	("weibull", "1_1", scipy.stats.exponweib(1,1,scale=1), "weibull(x, 1, 1 %s)", 53),
73	("weibull", "2_1", scipy.stats.exponweib(1,2,scale=1), "weibull(x, 2, 1 %s)", 54),
74	("weibull", "4_1", scipy.stats.exponweib(1,4,scale=1), "weibull(x, 4, 1 %s)", 55),
75	("weibull", "4_10", scipy.stats.exponweib(1,4,scale=10), "weibull(x, 4, 10 %s)", 56),
76	("weibull", "01_10", scipy.stats.exponweib(1,.1,scale=10), "weibull(x, .1, 10 %s)", 57),
77	("weibull", "01_20", scipy.stats.exponweib(1,.1,scale=20), "weibull(x, .1, 20 %s)", 58),
78	("weibull", "01_4", scipy.stats.exponweib(1,.1,scale=4), "weibull(x, .1, 4 %s)", 59),
79	("weibull", "01_1", scipy.stats.exponweib(1,.1,scale=1), "weibull(x, .1, 1 %s)", 60),
80	("gumbel", "1_1", scipy.stats.gumbel_r(1,scale=1), "lmomco(x, vec2par(c(1, 1, 0), type=\"gev\") %s)", 61),
81	("gumbel", "2_1", scipy.stats.gumbel_r(2,scale=1), "lmomco(x, vec2par(c(2, 1, 0), type=\"gev\") %s)", 62),
82	("gumbel", "4_1", scipy.stats.gumbel_r(4,scale=1), "lmomco(x, vec2par(c(4, 1, 0), type=\"gev\") %s)", 63),
83	("gumbel", "4_10", scipy.stats.gumbel_r(4,scale=10), "lmomco(x, vec2par(c(4, 10, 0), type=\"gev\") %s)", 64),
84	("gumbel", "01_10", scipy.stats.gumbel_r(.1,scale=10), "lmomco(x, vec2par(c(.1, 10, 0), type=\"gev\") %s)", 65),
85	("gumbel", "01_20", scipy.stats.gumbel_r(.1,scale=20), "lmomco(x, vec2par(c(.1, 20, 0), type=\"gev\") %s)", 66),
86	("gumbel", "01_4", scipy.stats.gumbel_r(.1,scale=4), "lmomco(x, vec2par(c(.1, 4, 0), type=\"gev\") %s)", 67),
87	("gumbel", "01_1", scipy.stats.gumbel_r(.1,scale=1), "lmomco(x, vec2par(c(.1, 1, 0), type=\"gev\") %s)", 68),
88	("gev", "08_02_1", scipy.stats.genextreme(.8,loc=.2,scale=1), "lmomco(x, vec2par(c(.2, 1, .8), type=\"gev\") %s)", 69),
89	("gev", "1_05_1", scipy.stats.genextreme(1,loc=.5,scale=1), "lmomco(x, vec2par(c(.5, 1, 1), type=\"gev\") %s)", 70),
90	("gev", "1_05_05", scipy.stats.genextreme(1,loc=.5,scale=.5), "lmomco(x, vec2par(c(.5, .5, 1), type=\"gev\") %s)", 71),
91	("gev", "2_05_05", scipy.stats.genextreme(2,loc=.5,scale=.5), "lmomco(x, vec2par(c(.5, .5, 2), type=\"gev\") %s)", 72),
92	("gev", "4_05_05", scipy.stats.genextreme(4,loc=.5,scale=.5), "lmomco(x, vec2par(c(.5, .5, 4), type=\"gev\") %s)", 73),
93	("gev", "M1_05_1", scipy.stats.genextreme(-1,loc=.5,scale=1), None, 74),
94	("gev", "M1_05_05", scipy.stats.genextreme(-1,loc=.5,scale=.5), None, 75),
95	("gev", "M2_05_05", scipy.stats.genextreme(-2,loc=.5,scale=.5), None, 76),
96	("gev", "M4_05_05", scipy.stats.genextreme(-4,loc=.5,scale=.5), None, 77),
97	("logistic", "05", scipy.stats.logistic(loc=.5), "logis(x, location=.5 %s)", 78),
98	("logistic", "01", scipy.stats.logistic(loc=.1), "logis(x, location=.1 %s)", 79),
99	("glogistic", "1_1", scipy.stats.genlogistic(1, loc=1.), None, 80),
100	("glogistic", "2_05", scipy.stats.genlogistic(2, loc=.5), None, 81),
101	("glogistic", "05_05", scipy.stats.genlogistic(.5, loc=.5), None, 82),
102	("expgamma", "1_1", scipy.stats.loggamma(1, scale=1./1), None, 83),
103	("expgamma", "2_1", scipy.stats.loggamma(2, scale=1./1), None, 84),
104	("expgamma", "4_1", scipy.stats.loggamma(4, scale=1./1), None, 85),
105	("expgamma", "4_10", scipy.stats.loggamma(4, scale=1./10), None, 86),
106	("expgamma", "01_10", scipy.stats.loggamma(.1, scale=1./10), None, 87),
107	("expgamma", "01_20", scipy.stats.loggamma(.1, scale=1./20), None, 88),
108	("expgamma", "01_4", scipy.stats.loggamma(.1, scale=1./4), None, 89),
109	("expgamma", "01_1", scipy.stats.loggamma(.1, scale=1./1), None, 90),
110	("gpd", "01_05_01", scipy.stats.genpareto(.1,loc=.1,scale=.5), "lmomco(x, vec2par(c(.1, .5, .1), type=\"gpa\") %s)", 91),
111	("invgauss", "1_1", scipy.stats.invgauss(1.,scale=1.), "invgauss(x, 1, 1 %s)", 92),
112	("invgauss", "05_1", scipy.stats.invgauss(.5,scale=1.), "invgauss(x, .5, 1 %s)", 93),
113	("invgauss", "1_05", scipy.stats.invgauss(2.,scale=.5), "invgauss(x, 1., .5 %s)", 94),
114	("cauchy", "05_1", scipy.stats.cauchy(loc=.5,scale=1), "cauchy(x, 0.5, 1 %s)", 95),
115	("cauchy", "1_05", scipy.stats.cauchy(loc=1,scale=.5), "cauchy(x, 1, 0.5 %s)", 96),
116	("chi", "01", scipy.stats.chi(.1), "chi(x, 0.1 %s)", 97),
117	("chi", "1", scipy.stats.chi(1), "chi(x, 1 %s)", 98),
118	("chi", "2", scipy.stats.chi(2), "chi(x, 2 %s)", 99),
119	("chi", "4", scipy.stats.chi(4), "chi(x, 4 %s)", 100),
120	("chi", "10", scipy.stats.chi(10), "chi(x, 10 %s)", 101),
121	("emg", "1_3_05", scipy.stats.exponnorm(1./(0.5*3), loc=1, scale=3), "emg(x, 1, 3, 0.5 %s)", 102),
122	("emg", "1_3_01", scipy.stats.exponnorm(1./(0.1*3), loc=1, scale=3), "emg(x, 1, 3, 0.1 %s)", 102),
123	("lap", "1_3", scipy.stats.laplace(3, 1), "lap(x, 3, 1 %s)", 103),
124	("lap", "4_05", scipy.stats.laplace(.5, 1./4), "lap(x, 0.5, 4 %s)", 104),
125	("ray", "1", scipy.stats.rayleigh(scale=1), "weibull(x, 2, 1*sqrt(2) %s)", 105),
126	("ray", "2", scipy.stats.rayleigh(scale=2), "weibull(x, 2, 2*sqrt(2) %s)", 106),
127	("kappa", "01_02_03_04", scipy.stats.kappa4(.4, .3, loc=.1, scale=.2), "kap(x, vec2par(c(.1,.2,.3,.4), type='kap') %s)", 107),
128	("loglogistic", "1_1", scipy.stats.fisk(1, scale=1), None, 108),
129	("loglogistic", "2_05", scipy.stats.fisk(2, scale=.5), None, 109),
130	("loglogistic", "05_05", scipy.stats.fisk(.5, scale=.5), None, 110),
131	("skewnorm", "01_02_03", None, "gno(x, vec2par(c(.1,.2,.3), type='gno') %s)", 111),
132	("skewnorm", "1_2_3", None, "gno(x, vec2par(c(1,2,3), type='gno') %s)", 112),
133]
134
135def fmt(x):
136	if isinstance(x, float): x = "%.17e" % x
137	x = re.sub("\\.?0+e", "e", x)
138	x = re.sub("e\\+00$", "", x)
139	return x
140
141pqstr = ",".join(map(fmt, pqs))
142qqstr = ",".join(map(fmt, qqs))
143
144pk, of = None, None
145for n1, n2, df, rfunc, seed in sorted(dfs):
146	if len(sys.argv) > 1 and not n1 in sys.argv[1:]: continue
147	if pk != n1:
148		if of: of.close()
149		pk = n1
150		of = gzip.open("%s.ascii.gz" % n1, "w")
151	if df:
152		print >>of, "random_%s" % (n2,),
153		for x in df.rvs(size=samples, random_state=seed): print >>of, fmt(x),
154		print >>of
155		print >>of, "cdf_scipy_%s" % (n2,),
156		for q in pqs: print >>of, fmt(q), fmt(df.cdf(q)),
157		print >>of
158		print >>of, "pdf_scipy_%s" % (n2,),
159		for q in pqs: print >>of, fmt(q), fmt(df.pdf(q)),
160		print >>of
161		print >>of, "logpdf_scipy_%s" % (n2,),
162		for q in pqs: print >>of, fmt(q), fmt(df.logpdf(q)),
163		print >>of
164		print >>of, "quant_scipy_%s" % (n2,),
165		for q in qqs: print >>of, fmt(q), fmt(df.ppf(q)),
166		print >>of
167
168	if rfunc:
169		c = subprocess.Popen(["R", "--quiet", "--no-save", "--slave"],
170			stdin=subprocess.PIPE, stdout=subprocess.PIPE)
171		rin="""
172library("stats4")
173library("statmod")
174library("emg")
175library("chi")
176library("gumbel")
177library("evir")
178library("lmomco")
179library("bda")
180
181pkap <- cdfkap
182dkap <- pdfkap
183qkap <- quakap
184pgno <- cdfgno
185dgno <- pdfgno
186qgno <- quagno
187
188x <- c( %s )
189y <- p%s
190cat(formatC(y, digits=50, format="e"))
191cat(" ")
192y <- d%s
193cat(formatC(y, digits=50, format="e"))
194cat(" ")
195y <- d%s
196cat(formatC(y, digits=50, format="e"))
197q()
198""" % (pqstr, rfunc % "", rfunc % "", rfunc % ", log=TRUE")
199		rout, rerr = c.communicate(rin)
200		vals = map(float, rout.split())
201		assert(len(vals) == len(pqs) * 3 or len(vals) == len(pqs) * 2)
202
203		print >>of, "cdf_gnur_%s" % (n2,),
204		for i in range(0, len(pqs)):
205			print >>of, fmt(pqs[i]), fmt(vals[i]),
206		print >>of
207		print >>of, "pdf_gnur_%s" % (n2,),
208		for i in range(0, len(pqs)):
209			j = i + len(pqs)
210			print >>of, fmt(pqs[i]), fmt(vals[j]),
211		print >>of
212		if len(vals) == len(pqs) * 3: # GEV, GPD do not have a log mode in R
213			print >>of, "logpdf_gnur_%s" % (n2,),
214			for i in range(0, len(pqs)):
215				j = i + 2 * len(pqs)
216				print >>of, fmt(pqs[i]), fmt(vals[j]),
217			print >>of
218		c = subprocess.Popen(["R", "--quiet", "--no-save", "--slave"],
219			stdin=subprocess.PIPE, stdout=subprocess.PIPE)
220		rin="""
221library("stats4")
222library("statmod")
223library("emg")
224library("chi")
225library("gumbel")
226library("lmomco")
227library("bda")
228
229pkap <- cdfkap
230dkap <- pdfkap
231qkap <- quakap
232pgno <- cdfgno
233dgno <- pdfgno
234qgno <- quagno
235
236x <- c( %s )
237y <- q%s
238cat(formatC(y, digits=50, format="e"))
239q()
240""" % (qqstr, rfunc % "")
241		rout, rerr = c.communicate(rin)
242		vals = map(float, rout.split())
243		if len(vals) == len(qqs): # Gumbel does not have qgumbel
244			print >>of, "quant_gnur_%s" % (n2,),
245			for i in range(0, len(qqs)):
246				print >>of, fmt(qqs[i]), fmt(vals[i]),
247			print >>of
248