1# -*- coding: utf-8 -*- 2""" 3 4Created on Thu Feb 28 15:37:53 2013 5 6Author: Josef Perktold 7""" 8 9import numpy as np 10from scipy import stats 11from statsmodels.stats.gof import (chisquare, chisquare_power, 12 chisquare_effectsize) 13 14from numpy.testing import assert_almost_equal 15 16 17nobs = 30000 18n_bins = 5 19probs = 1./np.arange(2, n_bins + 2) 20probs /= probs.sum() 21#nicer 22probs = np.round(probs, 2) 23probs[-1] = 1 - probs[:-1].sum() 24print("probs", probs) 25probs_d = probs.copy() 26delta = 0.01 27probs_d[0] += delta 28probs_d[1] -= delta 29probs_cs = probs.cumsum() 30#rvs = np.random.multinomial(n_bins, probs, size=10) 31#rvs = np.round(np.random.randn(10), 2) 32rvs = np.argmax(np.random.rand(nobs,1) < probs_cs, 1) 33print(probs) 34print(np.bincount(rvs) * (1. / nobs)) 35 36 37freq = np.bincount(rvs) 38print(stats.chisquare(freq, nobs*probs)) 39print('null', chisquare(freq, nobs*probs)) 40print('delta', chisquare(freq, nobs*probs_d)) 41chisq_null, pval_null = chisquare(freq, nobs*probs) 42 43# effect size ? 44d_null = ((freq / float(nobs) - probs)**2 / probs).sum() 45print(d_null) 46d_delta = ((freq / float(nobs) - probs_d)**2 / probs_d).sum() 47print(d_delta) 48d_null_alt = ((probs - probs_d)**2 / probs_d).sum() 49print(d_null_alt) 50 51print('\nchisquare with value') 52chisq, pval = chisquare(freq, nobs*probs_d) 53print(stats.ncx2.sf(chisq_null, n_bins, 0.001 * nobs)) 54print(stats.ncx2.sf(chisq, n_bins, 0.001 * nobs)) 55print(stats.ncx2.sf(chisq, n_bins, d_delta * nobs)) 56print(chisquare(freq, nobs*probs_d, value=np.sqrt(d_delta))) 57print(chisquare(freq, nobs*probs_d, value=np.sqrt(chisq / nobs))) 58print() 59 60assert_almost_equal(stats.chi2.sf(d_delta * nobs, n_bins - 1), 61 chisquare(freq, nobs*probs_d)[1], decimal=13) 62 63crit = stats.chi2.isf(0.05, n_bins - 1) 64power = stats.ncx2.sf(crit, n_bins-1, 0.001**2 * nobs) 65#> library(pwr) 66#> tr = pwr.chisq.test(w =0.001, N =30000 , df = 5-1, sig.level = 0.05, power = NULL) 67assert_almost_equal(power, 0.05147563, decimal=7) 68effect_size = 0.001 69power = chisquare_power(effect_size, nobs, n_bins, alpha=0.05) 70assert_almost_equal(power, 0.05147563, decimal=7) 71print(chisquare(freq, nobs*probs, value=0, ddof=0)) 72d_null_alt = ((probs - probs_d)**2 / probs).sum() 73print(chisquare(freq, nobs*probs, value=np.sqrt(d_null_alt), ddof=0)) 74 75 76#Monte Carlo to check correct size and power of test 77 78d_delta_r = chisquare_effectsize(probs, probs_d) 79n_rep = 10000 80nobs = 3000 81res_boots = np.zeros((n_rep, 6)) 82for i in range(n_rep): 83 rvs = np.argmax(np.random.rand(nobs,1) < probs_cs, 1) 84 freq = np.bincount(rvs) 85 res1 = chisquare(freq, nobs*probs) 86 res2 = chisquare(freq, nobs*probs_d) 87 res3 = chisquare(freq, nobs*probs_d, value=d_delta_r) 88 res_boots[i] = [res1[0], res2[0], res3[0], res1[1], res2[1], res3[1]] 89 90alpha = np.array([0.01, 0.05, 0.1, 0.25, 0.5]) 91chi2_power = chisquare_power(chisquare_effectsize(probs, probs_d), 3000, n_bins, 92 alpha=[0.01, 0.05, 0.1, 0.25, 0.5]) 93print((res_boots[:, 3:] < 0.05).mean(0)) 94reject_freq = (res_boots[:, 3:, None] < alpha).mean(0) 95reject = (res_boots[:, 3:, None] < alpha).sum(0) 96 97desired = np.column_stack((alpha, chi2_power, alpha)).T 98 99print('relative difference Monte Carlo rejection and expected (in %)') 100print((reject_freq / desired - 1) * 100) 101