1# -*- coding: utf-8 -*- 2""" 3Created on Sat Oct 01 20:20:16 2011 4 5Author: Josef Perktold 6License: BSD-3 7 8TODO: 9check orientation, size and alpha should be increasing for interp1d, 10but what is alpha? can be either sf or cdf probability 11change it to use one consistent notation 12 13check: instead of bound checking I could use the fill-value of the 14interpolators 15""" 16import numpy as np 17from scipy.interpolate import interp1d, interp2d, Rbf 18 19from statsmodels.tools.decorators import cache_readonly 20 21 22class TableDist(object): 23 """ 24 Distribution, critical values and p-values from tables 25 26 currently only 1 extra parameter, e.g. sample size 27 28 Parameters 29 ---------- 30 alpha : array_like, 1d 31 probabiliy in the table, could be either sf (right tail) or cdf (left 32 tail) 33 size : array_like, 1d 34 The sample sizes for the table 35 crit_table : array_like, 2d 36 The sample sizes in the table 37 array with critical values for sample size in rows and probability in 38 columns 39 asymptotic : callable, optional 40 Callable function with the form fn(nobs) that returns len(alpha) 41 critical values where the critical value in position i corresponds to 42 alpha[i] 43 min_nobs : int, optional 44 Minimum number of observations to use the asymptotic distribution. If 45 not provided, uses max(size). 46 max_nobs : int, optional 47 Maximum number of observations to use the tabular distribution. If not 48 provided, uses max(size) 49 50 Notes 51 ----- 52 size and alpha must be sorted and increasing. 53 54 If both min_nobs and max_nobs are provided, then 55 the critical values from the tabular distribution and the asymptotic 56 distribution are linearly blended using the formula 57 :math:`w cv_a + (1-w) cv_t` where the weight is 58 :math:`w = (n - a_{min}) / (a_{max} - a_{min})`. This ensures the 59 transition between the tabular and the asymptotic critical values is 60 continuous. If these are not provided, then the asymptotic critical value 61 is used for nobs > max(size). 62 """ 63 64 def __init__(self, alpha, size, crit_table, asymptotic=None, 65 min_nobs=None, max_nobs=None): 66 self.alpha = np.asarray(alpha) 67 if self.alpha.ndim != 1: 68 raise ValueError('alpha is not 1d') 69 elif (np.diff(self.alpha) <= 0).any(): 70 raise ValueError('alpha is not sorted') 71 self.size = np.asarray(size) 72 if self.size.ndim != 1: 73 raise ValueError('size is not 1d') 74 elif (np.diff(self.size) <= 0).any(): 75 raise ValueError('size is not sorted') 76 if self.size.ndim == 1: 77 if (np.diff(alpha) <= 0).any(): 78 raise ValueError('alpha is not sorted') 79 self.crit_table = np.asarray(crit_table) 80 if self.crit_table.shape != (self.size.shape[0], self.alpha.shape[0]): 81 raise ValueError('crit_table must have shape' 82 '(len(size), len(alpha))') 83 84 self.n_alpha = len(alpha) 85 self.signcrit = np.sign(np.diff(self.crit_table, 1).mean()) 86 if self.signcrit > 0: # increasing 87 self.critv_bounds = self.crit_table[:, [0, 1]] 88 else: 89 self.critv_bounds = self.crit_table[:, [1, 0]] 90 self.asymptotic = None 91 max_size = self.max_size = max(size) 92 93 if asymptotic is not None: 94 try: 95 cv = asymptotic(self.max_size + 1) 96 except Exception as exc: 97 raise type(exc)('Calling asymptotic(self.size+1) failed. The ' 98 'error message was:' 99 '\n\n{err_msg}'.format(err_msg=exc.args[0])) 100 if len(cv) != len(alpha): 101 raise ValueError('asymptotic does not return len(alpha) ' 102 'values') 103 self.asymptotic = asymptotic 104 105 self.min_nobs = max_size if min_nobs is None else min_nobs 106 self.max_nobs = max_size if max_nobs is None else max_nobs 107 if self.min_nobs > max_size: 108 raise ValueError('min_nobs > max(size)') 109 if self.max_nobs > max_size: 110 raise ValueError('max_nobs > max(size)') 111 112 @cache_readonly 113 def polyn(self): 114 polyn = [interp1d(self.size, self.crit_table[:, i]) 115 for i in range(self.n_alpha)] 116 return polyn 117 118 @cache_readonly 119 def poly2d(self): 120 # check for monotonicity ? 121 # fix this, interp needs increasing 122 poly2d = interp2d(self.size, self.alpha, self.crit_table) 123 return poly2d 124 125 @cache_readonly 126 def polyrbf(self): 127 xs, xa = np.meshgrid(self.size.astype(float), self.alpha) 128 polyrbf = Rbf(xs.ravel(), xa.ravel(), self.crit_table.T.ravel(), 129 function='linear') 130 return polyrbf 131 132 def _critvals(self, n): 133 """ 134 Rows of the table, linearly interpolated for given sample size 135 136 Parameters 137 ---------- 138 n : float 139 sample size, second parameter of the table 140 141 Returns 142 ------- 143 critv : ndarray, 1d 144 critical values (ppf) corresponding to a row of the table 145 146 Notes 147 ----- 148 This is used in two step interpolation, or if we want to know the 149 critical values for all alphas for any sample size that we can obtain 150 through interpolation 151 """ 152 if n > self.max_size: 153 if self.asymptotic is not None: 154 cv = self.asymptotic(n) 155 else: 156 raise ValueError('n is above max(size) and no asymptotic ' 157 'distribtuion is provided') 158 else: 159 cv = ([p(n) for p in self.polyn]) 160 if n > self.min_nobs: 161 w = (n - self.min_nobs) / (self.max_nobs - self.min_nobs) 162 w = min(1.0, w) 163 a_cv = self.asymptotic(n) 164 cv = w * a_cv + (1 - w) * cv 165 166 return cv 167 168 def prob(self, x, n): 169 """ 170 Find pvalues by interpolation, either cdf(x) 171 172 Returns extreme probabilities, 0.001 and 0.2, for out of range 173 174 Parameters 175 ---------- 176 x : array_like 177 observed value, assumed to follow the distribution in the table 178 n : float 179 sample size, second parameter of the table 180 181 Returns 182 ------- 183 prob : array_like 184 This is the probability for each value of x, the p-value in 185 underlying distribution is for a statistical test. 186 """ 187 critv = self._critvals(n) 188 alpha = self.alpha 189 190 if self.signcrit < 1: 191 # reverse if critv is decreasing 192 critv, alpha = critv[::-1], alpha[::-1] 193 194 # now critv is increasing 195 if np.size(x) == 1: 196 if x < critv[0]: 197 return alpha[0] 198 elif x > critv[-1]: 199 return alpha[-1] 200 return interp1d(critv, alpha)(x)[()] 201 else: 202 # vectorized 203 cond_low = (x < critv[0]) 204 cond_high = (x > critv[-1]) 205 cond_interior = ~np.logical_or(cond_low, cond_high) 206 207 probs = np.nan * np.ones(x.shape) # mistake if nan left 208 probs[cond_low] = alpha[0] 209 probs[cond_low] = alpha[-1] 210 probs[cond_interior] = interp1d(critv, alpha)(x[cond_interior]) 211 212 return probs 213 214 def crit(self, prob, n): 215 """ 216 Returns interpolated quantiles, similar to ppf or isf 217 218 use two sequential 1d interpolation, first by n then by prob 219 220 Parameters 221 ---------- 222 prob : array_like 223 probabilities corresponding to the definition of table columns 224 n : int or float 225 sample size, second parameter of the table 226 227 Returns 228 ------- 229 ppf : array_like 230 critical values with same shape as prob 231 """ 232 prob = np.asarray(prob) 233 alpha = self.alpha 234 critv = self._critvals(n) 235 236 # vectorized 237 cond_ilow = (prob > alpha[0]) 238 cond_ihigh = (prob < alpha[-1]) 239 cond_interior = np.logical_or(cond_ilow, cond_ihigh) 240 241 # scalar 242 if prob.size == 1: 243 if cond_interior: 244 return interp1d(alpha, critv)(prob) 245 else: 246 return np.nan 247 248 # vectorized 249 quantile = np.nan * np.ones(prob.shape) # nans for outside 250 quantile[cond_interior] = interp1d(alpha, critv)(prob[cond_interior]) 251 return quantile 252 253 def crit3(self, prob, n): 254 """ 255 Returns interpolated quantiles, similar to ppf or isf 256 257 uses Rbf to interpolate critical values as function of `prob` and `n` 258 259 Parameters 260 ---------- 261 prob : array_like 262 probabilities corresponding to the definition of table columns 263 n : int or float 264 sample size, second parameter of the table 265 266 Returns 267 ------- 268 ppf : array_like 269 critical values with same shape as prob, returns nan for arguments 270 that are outside of the table bounds 271 """ 272 prob = np.asarray(prob) 273 alpha = self.alpha 274 275 # vectorized 276 cond_ilow = (prob > alpha[0]) 277 cond_ihigh = (prob < alpha[-1]) 278 cond_interior = np.logical_or(cond_ilow, cond_ihigh) 279 280 # scalar 281 if prob.size == 1: 282 if cond_interior: 283 return self.polyrbf(n, prob) 284 else: 285 return np.nan 286 287 # vectorized 288 quantile = np.nan * np.ones(prob.shape) # nans for outside 289 290 quantile[cond_interior] = self.polyrbf(n, prob[cond_interior]) 291 return quantile 292