1# -*- coding: utf-8 -*-
2"""
3
4Created on Thu Feb 28 15:37:53 2013
5
6Author: Josef Perktold
7"""
8
9import numpy as np
10from scipy import stats
11from statsmodels.stats.gof import (chisquare, chisquare_power,
12                                  chisquare_effectsize)
13
14from numpy.testing import assert_almost_equal
15
16
17nobs = 30000
18n_bins = 5
19probs = 1./np.arange(2, n_bins + 2)
20probs /= probs.sum()
21#nicer
22probs = np.round(probs, 2)
23probs[-1] = 1 - probs[:-1].sum()
24print("probs", probs)
25probs_d = probs.copy()
26delta = 0.01
27probs_d[0] += delta
28probs_d[1] -= delta
29probs_cs = probs.cumsum()
30#rvs = np.random.multinomial(n_bins, probs, size=10)
31#rvs = np.round(np.random.randn(10), 2)
32rvs = np.argmax(np.random.rand(nobs,1) < probs_cs, 1)
33print(probs)
34print(np.bincount(rvs) * (1. / nobs))
35
36
37freq = np.bincount(rvs)
38print(stats.chisquare(freq, nobs*probs))
39print('null', chisquare(freq, nobs*probs))
40print('delta', chisquare(freq, nobs*probs_d))
41chisq_null, pval_null = chisquare(freq, nobs*probs)
42
43# effect size ?
44d_null = ((freq / float(nobs) - probs)**2 / probs).sum()
45print(d_null)
46d_delta = ((freq / float(nobs) - probs_d)**2 / probs_d).sum()
47print(d_delta)
48d_null_alt = ((probs - probs_d)**2 / probs_d).sum()
49print(d_null_alt)
50
51print('\nchisquare with value')
52chisq, pval = chisquare(freq, nobs*probs_d)
53print(stats.ncx2.sf(chisq_null, n_bins, 0.001 * nobs))
54print(stats.ncx2.sf(chisq, n_bins, 0.001 * nobs))
55print(stats.ncx2.sf(chisq, n_bins, d_delta * nobs))
56print(chisquare(freq, nobs*probs_d, value=np.sqrt(d_delta)))
57print(chisquare(freq, nobs*probs_d, value=np.sqrt(chisq / nobs)))
58print()
59
60assert_almost_equal(stats.chi2.sf(d_delta * nobs, n_bins - 1),
61                    chisquare(freq, nobs*probs_d)[1], decimal=13)
62
63crit = stats.chi2.isf(0.05, n_bins - 1)
64power = stats.ncx2.sf(crit, n_bins-1, 0.001**2 * nobs)
65#> library(pwr)
66#> tr = pwr.chisq.test(w =0.001, N =30000 , df = 5-1, sig.level = 0.05, power = NULL)
67assert_almost_equal(power, 0.05147563, decimal=7)
68effect_size = 0.001
69power = chisquare_power(effect_size, nobs, n_bins, alpha=0.05)
70assert_almost_equal(power, 0.05147563, decimal=7)
71print(chisquare(freq, nobs*probs, value=0, ddof=0))
72d_null_alt = ((probs - probs_d)**2 / probs).sum()
73print(chisquare(freq, nobs*probs, value=np.sqrt(d_null_alt), ddof=0))
74
75
76#Monte Carlo to check correct size and power of test
77
78d_delta_r = chisquare_effectsize(probs, probs_d)
79n_rep = 10000
80nobs = 3000
81res_boots = np.zeros((n_rep, 6))
82for i in range(n_rep):
83    rvs = np.argmax(np.random.rand(nobs,1) < probs_cs, 1)
84    freq = np.bincount(rvs)
85    res1 = chisquare(freq, nobs*probs)
86    res2 = chisquare(freq, nobs*probs_d)
87    res3 = chisquare(freq, nobs*probs_d, value=d_delta_r)
88    res_boots[i] = [res1[0], res2[0], res3[0], res1[1], res2[1], res3[1]]
89
90alpha = np.array([0.01, 0.05, 0.1, 0.25, 0.5])
91chi2_power = chisquare_power(chisquare_effectsize(probs, probs_d), 3000, n_bins,
92                             alpha=[0.01, 0.05, 0.1, 0.25, 0.5])
93print((res_boots[:, 3:] < 0.05).mean(0))
94reject_freq = (res_boots[:, 3:, None] < alpha).mean(0)
95reject = (res_boots[:, 3:, None] < alpha).sum(0)
96
97desired = np.column_stack((alpha, chi2_power, alpha)).T
98
99print('relative difference Monte Carlo rejection and expected (in %)')
100print((reject_freq / desired - 1) * 100)
101