1# -*- coding: utf-8 -*- 2"""Tests and Confidence Intervals for Binomial Proportions 3 4Created on Fri Mar 01 00:23:07 2013 5 6Author: Josef Perktold 7License: BSD-3 8""" 9 10from statsmodels.compat.python import lzip 11import numpy as np 12from scipy import stats, optimize 13from sys import float_info 14 15from statsmodels.stats.base import AllPairsResults 16from statsmodels.tools.sm_exceptions import HypothesisTestWarning 17from statsmodels.stats.weightstats import _zstat_generic2 18from statsmodels.stats.base import HolderTuple 19from statsmodels.tools.testing import Holder 20 21 22def proportion_confint(count, nobs, alpha=0.05, method='normal'): 23 '''confidence interval for a binomial proportion 24 25 Parameters 26 ---------- 27 count : int or array_array_like 28 number of successes, can be pandas Series or DataFrame 29 nobs : int 30 total number of trials 31 alpha : float in (0, 1) 32 significance level, default 0.05 33 method : {'normal', 'agresti_coull', 'beta', 'wilson', 'binom_test'} 34 default: 'normal' 35 method to use for confidence interval, 36 currently available methods : 37 38 - `normal` : asymptotic normal approximation 39 - `agresti_coull` : Agresti-Coull interval 40 - `beta` : Clopper-Pearson interval based on Beta distribution 41 - `wilson` : Wilson Score interval 42 - `jeffreys` : Jeffreys Bayesian Interval 43 - `binom_test` : experimental, inversion of binom_test 44 45 Returns 46 ------- 47 ci_low, ci_upp : float, ndarray, or pandas Series or DataFrame 48 lower and upper confidence level with coverage (approximately) 1-alpha. 49 When a pandas object is returned, then the index is taken from the 50 `count`. 51 52 Notes 53 ----- 54 Beta, the Clopper-Pearson exact interval has coverage at least 1-alpha, 55 but is in general conservative. Most of the other methods have average 56 coverage equal to 1-alpha, but will have smaller coverage in some cases. 57 58 The 'beta' and 'jeffreys' interval are central, they use alpha/2 in each 59 tail, and alpha is not adjusted at the boundaries. In the extreme case 60 when `count` is zero or equal to `nobs`, then the coverage will be only 61 1 - alpha/2 in the case of 'beta'. 62 63 The confidence intervals are clipped to be in the [0, 1] interval in the 64 case of 'normal' and 'agresti_coull'. 65 66 Method "binom_test" directly inverts the binomial test in scipy.stats. 67 which has discrete steps. 68 69 TODO: binom_test intervals raise an exception in small samples if one 70 interval bound is close to zero or one. 71 72 References 73 ---------- 74 https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval 75 76 Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001). "Interval 77 Estimation for a Binomial Proportion", 78 Statistical Science 16 (2): 101–133. doi:10.1214/ss/1009213286. 79 80 ''' 81 82 pd_index = getattr(count, 'index', None) 83 if pd_index is not None and callable(pd_index): 84 # this rules out lists, lists have an index method 85 pd_index = None 86 count = np.asarray(count) 87 nobs = np.asarray(nobs) 88 89 q_ = count * 1. / nobs 90 alpha_2 = 0.5 * alpha 91 92 if method == 'normal': 93 std_ = np.sqrt(q_ * (1 - q_) / nobs) 94 dist = stats.norm.isf(alpha / 2.) * std_ 95 ci_low = q_ - dist 96 ci_upp = q_ + dist 97 98 elif method == 'binom_test': 99 # inverting the binomial test 100 def func(qi): 101 return stats.binom_test(q_ * nobs, nobs, p=qi) - alpha 102 if count == 0: 103 ci_low = 0 104 else: 105 ci_low = optimize.brentq(func, float_info.min, q_) 106 if count == nobs: 107 ci_upp = 1 108 else: 109 ci_upp = optimize.brentq(func, q_, 1. - float_info.epsilon) 110 111 elif method == 'beta': 112 ci_low = stats.beta.ppf(alpha_2, count, nobs - count + 1) 113 ci_upp = stats.beta.isf(alpha_2, count + 1, nobs - count) 114 115 if np.ndim(ci_low) > 0: 116 ci_low[q_ == 0] = 0 117 ci_upp[q_ == 1] = 1 118 else: 119 ci_low = ci_low if (q_ != 0) else 0 120 ci_upp = ci_upp if (q_ != 1) else 1 121 122 elif method == 'agresti_coull': 123 crit = stats.norm.isf(alpha / 2.) 124 nobs_c = nobs + crit**2 125 q_c = (count + crit**2 / 2.) / nobs_c 126 std_c = np.sqrt(q_c * (1. - q_c) / nobs_c) 127 dist = crit * std_c 128 ci_low = q_c - dist 129 ci_upp = q_c + dist 130 131 elif method == 'wilson': 132 crit = stats.norm.isf(alpha / 2.) 133 crit2 = crit**2 134 denom = 1 + crit2 / nobs 135 center = (q_ + crit2 / (2 * nobs)) / denom 136 dist = crit * np.sqrt(q_ * (1. - q_) / nobs + crit2 / (4. * nobs**2)) 137 dist /= denom 138 ci_low = center - dist 139 ci_upp = center + dist 140 141 # method adjusted to be more forgiving of misspellings or incorrect option name 142 elif method[:4] == 'jeff': 143 ci_low, ci_upp = stats.beta.interval(1 - alpha, count + 0.5, 144 nobs - count + 0.5) 145 146 else: 147 raise NotImplementedError('method "%s" is not available' % method) 148 149 if method in ['normal', 'agresti_coull']: 150 ci_low = np.clip(ci_low, 0, 1) 151 ci_upp = np.clip(ci_upp, 0, 1) 152 if pd_index is not None and np.ndim(ci_low) > 0: 153 import pandas as pd 154 if np.ndim(ci_low) == 1: 155 ci_low = pd.Series(ci_low, index=pd_index) 156 ci_upp = pd.Series(ci_upp, index=pd_index) 157 if np.ndim(ci_low) == 2: 158 ci_low = pd.DataFrame(ci_low, index=pd_index) 159 ci_upp = pd.DataFrame(ci_upp, index=pd_index) 160 161 return ci_low, ci_upp 162 163 164def multinomial_proportions_confint(counts, alpha=0.05, method='goodman'): 165 '''Confidence intervals for multinomial proportions. 166 167 Parameters 168 ---------- 169 counts : array_like of int, 1-D 170 Number of observations in each category. 171 alpha : float in (0, 1), optional 172 Significance level, defaults to 0.05. 173 method : {'goodman', 'sison-glaz'}, optional 174 Method to use to compute the confidence intervals; available methods 175 are: 176 177 - `goodman`: based on a chi-squared approximation, valid if all 178 values in `counts` are greater or equal to 5 [2]_ 179 - `sison-glaz`: less conservative than `goodman`, but only valid if 180 `counts` has 7 or more categories (``len(counts) >= 7``) [3]_ 181 182 Returns 183 ------- 184 confint : ndarray, 2-D 185 Array of [lower, upper] confidence levels for each category, such that 186 overall coverage is (approximately) `1-alpha`. 187 188 Raises 189 ------ 190 ValueError 191 If `alpha` is not in `(0, 1)` (bounds excluded), or if the values in 192 `counts` are not all positive or null. 193 NotImplementedError 194 If `method` is not kown. 195 Exception 196 When ``method == 'sison-glaz'``, if for some reason `c` cannot be 197 computed; this signals a bug and should be reported. 198 199 Notes 200 ----- 201 The `goodman` method [2]_ is based on approximating a statistic based on 202 the multinomial as a chi-squared random variable. The usual recommendation 203 is that this is valid if all the values in `counts` are greater than or 204 equal to 5. There is no condition on the number of categories for this 205 method. 206 207 The `sison-glaz` method [3]_ approximates the multinomial probabilities, 208 and evaluates that with a maximum-likelihood estimator. The first 209 approximation is an Edgeworth expansion that converges when the number of 210 categories goes to infinity, and the maximum-likelihood estimator converges 211 when the number of observations (``sum(counts)``) goes to infinity. In 212 their paper, Sison & Glaz demo their method with at least 7 categories, so 213 ``len(counts) >= 7`` with all values in `counts` at or above 5 can be used 214 as a rule of thumb for the validity of this method. This method is less 215 conservative than the `goodman` method (i.e. it will yield confidence 216 intervals closer to the desired significance level), but produces 217 confidence intervals of uniform width over all categories (except when the 218 intervals reach 0 or 1, in which case they are truncated), which makes it 219 most useful when proportions are of similar magnitude. 220 221 Aside from the original sources ([1]_, [2]_, and [3]_), the implementation 222 uses the formulas (though not the code) presented in [4]_ and [5]_. 223 224 References 225 ---------- 226 .. [1] Levin, Bruce, "A representation for multinomial cumulative 227 distribution functions," The Annals of Statistics, Vol. 9, No. 5, 228 1981, pp. 1123-1126. 229 230 .. [2] Goodman, L.A., "On simultaneous confidence intervals for multinomial 231 proportions," Technometrics, Vol. 7, No. 2, 1965, pp. 247-254. 232 233 .. [3] Sison, Cristina P., and Joseph Glaz, "Simultaneous Confidence 234 Intervals and Sample Size Determination for Multinomial 235 Proportions," Journal of the American Statistical Association, 236 Vol. 90, No. 429, 1995, pp. 366-369. 237 238 .. [4] May, Warren L., and William D. Johnson, "A SAS® macro for 239 constructing simultaneous confidence intervals for multinomial 240 proportions," Computer methods and programs in Biomedicine, Vol. 53, 241 No. 3, 1997, pp. 153-162. 242 243 .. [5] May, Warren L., and William D. Johnson, "Constructing two-sided 244 simultaneous confidence intervals for multinomial proportions for 245 small counts in a large number of cells," Journal of Statistical 246 Software, Vol. 5, No. 6, 2000, pp. 1-24. 247 ''' 248 if alpha <= 0 or alpha >= 1: 249 raise ValueError('alpha must be in (0, 1), bounds excluded') 250 counts = np.array(counts, dtype=float) 251 if (counts < 0).any(): 252 raise ValueError('counts must be >= 0') 253 254 n = counts.sum() 255 k = len(counts) 256 proportions = counts / n 257 if method == 'goodman': 258 chi2 = stats.chi2.ppf(1 - alpha / k, 1) 259 delta = chi2 ** 2 + (4 * n * proportions * chi2 * (1 - proportions)) 260 region = ((2 * n * proportions + chi2 + 261 np.array([- np.sqrt(delta), np.sqrt(delta)])) / 262 (2 * (chi2 + n))).T 263 elif method[:5] == 'sison': # We accept any name starting with 'sison' 264 # Define a few functions we'll use a lot. 265 def poisson_interval(interval, p): 266 """Compute P(b <= Z <= a) where Z ~ Poisson(p) and 267 `interval = (b, a)`.""" 268 b, a = interval 269 prob = stats.poisson.cdf(a, p) - stats.poisson.cdf(b - 1, p) 270 return prob 271 272 def truncated_poisson_factorial_moment(interval, r, p): 273 """Compute mu_r, the r-th factorial moment of a poisson random 274 variable of parameter `p` truncated to `interval = (b, a)`.""" 275 b, a = interval 276 return p ** r * (1 - ((poisson_interval((a - r + 1, a), p) - 277 poisson_interval((b - r, b - 1), p)) / 278 poisson_interval((b, a), p))) 279 280 def edgeworth(intervals): 281 """Compute the Edgeworth expansion term of Sison & Glaz's formula 282 (1) (approximated probability for multinomial proportions in a 283 given box).""" 284 # Compute means and central moments of the truncated poisson 285 # variables. 286 mu_r1, mu_r2, mu_r3, mu_r4 = [ 287 np.array([truncated_poisson_factorial_moment(interval, r, p) 288 for (interval, p) in zip(intervals, counts)]) 289 for r in range(1, 5) 290 ] 291 mu = mu_r1 292 mu2 = mu_r2 + mu - mu ** 2 293 mu3 = mu_r3 + mu_r2 * (3 - 3 * mu) + mu - 3 * mu ** 2 + 2 * mu ** 3 294 mu4 = (mu_r4 + mu_r3 * (6 - 4 * mu) + 295 mu_r2 * (7 - 12 * mu + 6 * mu ** 2) + 296 mu - 4 * mu ** 2 + 6 * mu ** 3 - 3 * mu ** 4) 297 298 # Compute expansion factors, gamma_1 and gamma_2. 299 g1 = mu3.sum() / mu2.sum() ** 1.5 300 g2 = (mu4.sum() - 3 * (mu2 ** 2).sum()) / mu2.sum() ** 2 301 302 # Compute the expansion itself. 303 x = (n - mu.sum()) / np.sqrt(mu2.sum()) 304 phi = np.exp(- x ** 2 / 2) / np.sqrt(2 * np.pi) 305 H3 = x ** 3 - 3 * x 306 H4 = x ** 4 - 6 * x ** 2 + 3 307 H6 = x ** 6 - 15 * x ** 4 + 45 * x ** 2 - 15 308 f = phi * (1 + g1 * H3 / 6 + g2 * H4 / 24 + g1 ** 2 * H6 / 72) 309 return f / np.sqrt(mu2.sum()) 310 311 312 def approximated_multinomial_interval(intervals): 313 """Compute approximated probability for Multinomial(n, proportions) 314 to be in `intervals` (Sison & Glaz's formula (1)).""" 315 return np.exp( 316 np.sum(np.log([poisson_interval(interval, p) 317 for (interval, p) in zip(intervals, counts)])) + 318 np.log(edgeworth(intervals)) - 319 np.log(stats.poisson._pmf(n, n)) 320 ) 321 322 def nu(c): 323 """Compute interval coverage for a given `c` (Sison & Glaz's 324 formula (7)).""" 325 return approximated_multinomial_interval( 326 [(np.maximum(count - c, 0), np.minimum(count + c, n)) 327 for count in counts]) 328 329 # Find the value of `c` that will give us the confidence intervals 330 # (solving nu(c) <= 1 - alpha < nu(c + 1). 331 c = 1.0 332 nuc = nu(c) 333 nucp1 = nu(c + 1) 334 while not (nuc <= (1 - alpha) < nucp1): 335 if c > n: 336 raise Exception("Couldn't find a value for `c` that " 337 "solves nu(c) <= 1 - alpha < nu(c + 1)") 338 c += 1 339 nuc = nucp1 340 nucp1 = nu(c + 1) 341 342 # Compute gamma and the corresponding confidence intervals. 343 g = (1 - alpha - nuc) / (nucp1 - nuc) 344 ci_lower = np.maximum(proportions - c / n, 0) 345 ci_upper = np.minimum(proportions + (c + 2 * g) / n, 1) 346 region = np.array([ci_lower, ci_upper]).T 347 else: 348 raise NotImplementedError('method "%s" is not available' % method) 349 return region 350 351 352def samplesize_confint_proportion(proportion, half_length, alpha=0.05, 353 method='normal'): 354 '''find sample size to get desired confidence interval length 355 356 Parameters 357 ---------- 358 proportion : float in (0, 1) 359 proportion or quantile 360 half_length : float in (0, 1) 361 desired half length of the confidence interval 362 alpha : float in (0, 1) 363 significance level, default 0.05, 364 coverage of the two-sided interval is (approximately) ``1 - alpha`` 365 method : str in ['normal'] 366 method to use for confidence interval, 367 currently only normal approximation 368 369 Returns 370 ------- 371 n : float 372 sample size to get the desired half length of the confidence interval 373 374 Notes 375 ----- 376 this is mainly to store the formula. 377 possible application: number of replications in bootstrap samples 378 379 ''' 380 q_ = proportion 381 if method == 'normal': 382 n = q_ * (1 - q_) / (half_length / stats.norm.isf(alpha / 2.))**2 383 else: 384 raise NotImplementedError('only "normal" is available') 385 386 return n 387 388 389def proportion_effectsize(prop1, prop2, method='normal'): 390 ''' 391 Effect size for a test comparing two proportions 392 393 for use in power function 394 395 Parameters 396 ---------- 397 prop1, prop2 : float or array_like 398 The proportion value(s). 399 400 Returns 401 ------- 402 es : float or ndarray 403 effect size for (transformed) prop1 - prop2 404 405 Notes 406 ----- 407 only method='normal' is implemented to match pwr.p2.test 408 see http://www.statmethods.net/stats/power.html 409 410 Effect size for `normal` is defined as :: 411 412 2 * (arcsin(sqrt(prop1)) - arcsin(sqrt(prop2))) 413 414 I think other conversions to normality can be used, but I need to check. 415 416 Examples 417 -------- 418 >>> import statsmodels.api as sm 419 >>> sm.stats.proportion_effectsize(0.5, 0.4) 420 0.20135792079033088 421 >>> sm.stats.proportion_effectsize([0.3, 0.4, 0.5], 0.4) 422 array([-0.21015893, 0. , 0.20135792]) 423 424 ''' 425 if method != 'normal': 426 raise ValueError('only "normal" is implemented') 427 428 es = 2 * (np.arcsin(np.sqrt(prop1)) - np.arcsin(np.sqrt(prop2))) 429 return es 430 431 432def std_prop(prop, nobs): 433 '''standard error for the estimate of a proportion 434 435 This is just ``np.sqrt(p * (1. - p) / nobs)`` 436 437 Parameters 438 ---------- 439 prop : array_like 440 proportion 441 nobs : int, array_like 442 number of observations 443 444 Returns 445 ------- 446 std : array_like 447 standard error for a proportion of nobs independent observations 448 ''' 449 return np.sqrt(prop * (1. - prop) / nobs) 450 451 452def _std_diff_prop(p1, p2, ratio=1): 453 return np.sqrt(p1 * (1 - p1) + p2 * (1 - p2) / ratio) 454 455 456def _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt, 457 alpha=0.05, discrete=True, dist='norm', nobs=None, 458 continuity=0, critval_continuity=0): 459 '''Generic statistical power function for normal based equivalence test 460 461 This includes options to adjust the normal approximation and can use 462 the binomial to evaluate the probability of the rejection region 463 464 see power_ztost_prob for a description of the options 465 ''' 466 # TODO: refactor structure, separate norm and binom better 467 if not isinstance(continuity, tuple): 468 continuity = (continuity, continuity) 469 crit = stats.norm.isf(alpha) 470 k_low = mean_low + np.sqrt(var_low) * crit 471 k_upp = mean_upp - np.sqrt(var_upp) * crit 472 if discrete or dist == 'binom': 473 k_low = np.ceil(k_low * nobs + 0.5 * critval_continuity) 474 k_upp = np.trunc(k_upp * nobs - 0.5 * critval_continuity) 475 if dist == 'norm': 476 #need proportion 477 k_low = (k_low) * 1. / nobs #-1 to match PASS 478 k_upp = k_upp * 1. / nobs 479# else: 480# if dist == 'binom': 481# #need counts 482# k_low *= nobs 483# k_upp *= nobs 484 #print mean_low, np.sqrt(var_low), crit, var_low 485 #print mean_upp, np.sqrt(var_upp), crit, var_upp 486 if np.any(k_low > k_upp): #vectorize 487 import warnings 488 warnings.warn("no overlap, power is zero", HypothesisTestWarning) 489 std_alt = np.sqrt(var_alt) 490 z_low = (k_low - mean_alt - continuity[0] * 0.5 / nobs) / std_alt 491 z_upp = (k_upp - mean_alt + continuity[1] * 0.5 / nobs) / std_alt 492 if dist == 'norm': 493 power = stats.norm.cdf(z_upp) - stats.norm.cdf(z_low) 494 elif dist == 'binom': 495 power = (stats.binom.cdf(k_upp, nobs, mean_alt) - 496 stats.binom.cdf(k_low-1, nobs, mean_alt)) 497 return power, (k_low, k_upp, z_low, z_upp) 498 499 500def binom_tost(count, nobs, low, upp): 501 '''exact TOST test for one proportion using binomial distribution 502 503 Parameters 504 ---------- 505 count : {int, array_like} 506 the number of successes in nobs trials. 507 nobs : int 508 the number of trials or observations. 509 low, upp : floats 510 lower and upper limit of equivalence region 511 512 Returns 513 ------- 514 pvalue : float 515 p-value of equivalence test 516 pval_low, pval_upp : floats 517 p-values of lower and upper one-sided tests 518 519 ''' 520 # binom_test_stat only returns pval 521 tt1 = binom_test(count, nobs, alternative='larger', prop=low) 522 tt2 = binom_test(count, nobs, alternative='smaller', prop=upp) 523 return np.maximum(tt1, tt2), tt1, tt2, 524 525 526def binom_tost_reject_interval(low, upp, nobs, alpha=0.05): 527 '''rejection region for binomial TOST 528 529 The interval includes the end points, 530 `reject` if and only if `r_low <= x <= r_upp`. 531 532 The interval might be empty with `r_upp < r_low`. 533 534 Parameters 535 ---------- 536 low, upp : floats 537 lower and upper limit of equivalence region 538 nobs : int 539 the number of trials or observations. 540 541 Returns 542 ------- 543 x_low, x_upp : float 544 lower and upper bound of rejection region 545 546 ''' 547 x_low = stats.binom.isf(alpha, nobs, low) + 1 548 x_upp = stats.binom.ppf(alpha, nobs, upp) - 1 549 return x_low, x_upp 550 551 552def binom_test_reject_interval(value, nobs, alpha=0.05, alternative='two-sided'): 553 '''rejection region for binomial test for one sample proportion 554 555 The interval includes the end points of the rejection region. 556 557 Parameters 558 ---------- 559 value : float 560 proportion under the Null hypothesis 561 nobs : int 562 the number of trials or observations. 563 564 565 Returns 566 ------- 567 x_low, x_upp : float 568 lower and upper bound of rejection region 569 570 571 ''' 572 if alternative in ['2s', 'two-sided']: 573 alternative = '2s' # normalize alternative name 574 alpha = alpha / 2 575 576 if alternative in ['2s', 'smaller']: 577 x_low = stats.binom.ppf(alpha, nobs, value) - 1 578 else: 579 x_low = 0 580 if alternative in ['2s', 'larger']: 581 x_upp = stats.binom.isf(alpha, nobs, value) + 1 582 else : 583 x_upp = nobs 584 585 return x_low, x_upp 586 587 588def binom_test(count, nobs, prop=0.5, alternative='two-sided'): 589 '''Perform a test that the probability of success is p. 590 591 This is an exact, two-sided test of the null hypothesis 592 that the probability of success in a Bernoulli experiment 593 is `p`. 594 595 Parameters 596 ---------- 597 count : {int, array_like} 598 the number of successes in nobs trials. 599 nobs : int 600 the number of trials or observations. 601 prop : float, optional 602 The probability of success under the null hypothesis, 603 `0 <= prop <= 1`. The default value is `prop = 0.5` 604 alternative : str in ['two-sided', 'smaller', 'larger'] 605 alternative hypothesis, which can be two-sided or either one of the 606 one-sided tests. 607 608 Returns 609 ------- 610 p-value : float 611 The p-value of the hypothesis test 612 613 Notes 614 ----- 615 This uses scipy.stats.binom_test for the two-sided alternative. 616 617 ''' 618 619 if np.any(prop > 1.0) or np.any(prop < 0.0): 620 raise ValueError("p must be in range [0,1]") 621 if alternative in ['2s', 'two-sided']: 622 pval = stats.binom_test(count, n=nobs, p=prop) 623 elif alternative in ['l', 'larger']: 624 pval = stats.binom.sf(count-1, nobs, prop) 625 elif alternative in ['s', 'smaller']: 626 pval = stats.binom.cdf(count, nobs, prop) 627 else: 628 raise ValueError('alternative not recognized\n' 629 'should be two-sided, larger or smaller') 630 return pval 631 632 633def power_binom_tost(low, upp, nobs, p_alt=None, alpha=0.05): 634 if p_alt is None: 635 p_alt = 0.5 * (low + upp) 636 x_low, x_upp = binom_tost_reject_interval(low, upp, nobs, alpha=alpha) 637 power = (stats.binom.cdf(x_upp, nobs, p_alt) - 638 stats.binom.cdf(x_low-1, nobs, p_alt)) 639 return power 640 641 642def power_ztost_prop(low, upp, nobs, p_alt, alpha=0.05, dist='norm', 643 variance_prop=None, discrete=True, continuity=0, 644 critval_continuity=0): 645 '''Power of proportions equivalence test based on normal distribution 646 647 Parameters 648 ---------- 649 low, upp : floats 650 lower and upper limit of equivalence region 651 nobs : int 652 number of observations 653 p_alt : float in (0,1) 654 proportion under the alternative 655 alpha : float in (0,1) 656 significance level of the test 657 dist : str in ['norm', 'binom'] 658 This defines the distribution to evaluate the power of the test. The 659 critical values of the TOST test are always based on the normal 660 approximation, but the distribution for the power can be either the 661 normal (default) or the binomial (exact) distribution. 662 variance_prop : None or float in (0,1) 663 If this is None, then the variances for the two one sided tests are 664 based on the proportions equal to the equivalence limits. 665 If variance_prop is given, then it is used to calculate the variance 666 for the TOST statistics. If this is based on an sample, then the 667 estimated proportion can be used. 668 discrete : bool 669 If true, then the critical values of the rejection region are converted 670 to integers. If dist is "binom", this is automatically assumed. 671 If discrete is false, then the TOST critical values are used as 672 floating point numbers, and the power is calculated based on the 673 rejection region that is not discretized. 674 continuity : bool or float 675 adjust the rejection region for the normal power probability. This has 676 and effect only if ``dist='norm'`` 677 critval_continuity : bool or float 678 If this is non-zero, then the critical values of the tost rejection 679 region are adjusted before converting to integers. This affects both 680 distributions, ``dist='norm'`` and ``dist='binom'``. 681 682 Returns 683 ------- 684 power : float 685 statistical power of the equivalence test. 686 (k_low, k_upp, z_low, z_upp) : tuple of floats 687 critical limits in intermediate steps 688 temporary return, will be changed 689 690 Notes 691 ----- 692 In small samples the power for the ``discrete`` version, has a sawtooth 693 pattern as a function of the number of observations. As a consequence, 694 small changes in the number of observations or in the normal approximation 695 can have a large effect on the power. 696 697 ``continuity`` and ``critval_continuity`` are added to match some results 698 of PASS, and are mainly to investigate the sensitivity of the ztost power 699 to small changes in the rejection region. From my interpretation of the 700 equations in the SAS manual, both are zero in SAS. 701 702 works vectorized 703 704 **verification:** 705 706 The ``dist='binom'`` results match PASS, 707 The ``dist='norm'`` results look reasonable, but no benchmark is available. 708 709 References 710 ---------- 711 SAS Manual: Chapter 68: The Power Procedure, Computational Resources 712 PASS Chapter 110: Equivalence Tests for One Proportion. 713 714 ''' 715 mean_low = low 716 var_low = std_prop(low, nobs)**2 717 mean_upp = upp 718 var_upp = std_prop(upp, nobs)**2 719 mean_alt = p_alt 720 var_alt = std_prop(p_alt, nobs)**2 721 if variance_prop is not None: 722 var_low = var_upp = std_prop(variance_prop, nobs)**2 723 power = _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt, 724 alpha=alpha, discrete=discrete, dist=dist, nobs=nobs, 725 continuity=continuity, critval_continuity=critval_continuity) 726 return np.maximum(power[0], 0), power[1:] 727 728 729def _table_proportion(count, nobs): 730 '''create a k by 2 contingency table for proportion 731 732 helper function for proportions_chisquare 733 734 Parameters 735 ---------- 736 count : {int, array_like} 737 the number of successes in nobs trials. 738 nobs : int 739 the number of trials or observations. 740 741 Returns 742 ------- 743 table : ndarray 744 (k, 2) contingency table 745 746 Notes 747 ----- 748 recent scipy has more elaborate contingency table functions 749 750 ''' 751 count = np.asarray(count) 752 dt = np.promote_types(count.dtype, np.float64) 753 count = np.asarray(count, dtype=dt) 754 table = np.column_stack((count, nobs - count)) 755 expected = table.sum(0) * table.sum(1)[:, None] * 1. / table.sum() 756 n_rows = table.shape[0] 757 return table, expected, n_rows 758 759 760def proportions_ztest(count, nobs, value=None, alternative='two-sided', 761 prop_var=False): 762 """ 763 Test for proportions based on normal (z) test 764 765 Parameters 766 ---------- 767 count : {int, array_like} 768 the number of successes in nobs trials. If this is array_like, then 769 the assumption is that this represents the number of successes for 770 each independent sample 771 nobs : {int, array_like} 772 the number of trials or observations, with the same length as 773 count. 774 value : float, array_like or None, optional 775 This is the value of the null hypothesis equal to the proportion in the 776 case of a one sample test. In the case of a two-sample test, the 777 null hypothesis is that prop[0] - prop[1] = value, where prop is the 778 proportion in the two samples. If not provided value = 0 and the null 779 is prop[0] = prop[1] 780 alternative : str in ['two-sided', 'smaller', 'larger'] 781 The alternative hypothesis can be either two-sided or one of the one- 782 sided tests, smaller means that the alternative hypothesis is 783 ``prop < value`` and larger means ``prop > value``. In the two sample 784 test, smaller means that the alternative hypothesis is ``p1 < p2`` and 785 larger means ``p1 > p2`` where ``p1`` is the proportion of the first 786 sample and ``p2`` of the second one. 787 prop_var : False or float in (0, 1) 788 If prop_var is false, then the variance of the proportion estimate is 789 calculated based on the sample proportion. Alternatively, a proportion 790 can be specified to calculate this variance. Common use case is to 791 use the proportion under the Null hypothesis to specify the variance 792 of the proportion estimate. 793 794 Returns 795 ------- 796 zstat : float 797 test statistic for the z-test 798 p-value : float 799 p-value for the z-test 800 801 Examples 802 -------- 803 >>> count = 5 804 >>> nobs = 83 805 >>> value = .05 806 >>> stat, pval = proportions_ztest(count, nobs, value) 807 >>> print('{0:0.3f}'.format(pval)) 808 0.695 809 810 >>> import numpy as np 811 >>> from statsmodels.stats.proportion import proportions_ztest 812 >>> count = np.array([5, 12]) 813 >>> nobs = np.array([83, 99]) 814 >>> stat, pval = proportions_ztest(count, nobs) 815 >>> print('{0:0.3f}'.format(pval)) 816 0.159 817 818 Notes 819 ----- 820 This uses a simple normal test for proportions. It should be the same as 821 running the mean z-test on the data encoded 1 for event and 0 for no event 822 so that the sum corresponds to the count. 823 824 In the one and two sample cases with two-sided alternative, this test 825 produces the same p-value as ``proportions_chisquare``, since the 826 chisquare is the distribution of the square of a standard normal 827 distribution. 828 """ 829 # TODO: verify that this really holds 830 # TODO: add continuity correction or other improvements for small samples 831 # TODO: change options similar to propotion_ztost ? 832 833 count = np.asarray(count) 834 nobs = np.asarray(nobs) 835 836 if nobs.size == 1: 837 nobs = nobs * np.ones_like(count) 838 839 prop = count * 1. / nobs 840 k_sample = np.size(prop) 841 if value is None: 842 if k_sample == 1: 843 raise ValueError('value must be provided for a 1-sample test') 844 value = 0 845 if k_sample == 1: 846 diff = prop - value 847 elif k_sample == 2: 848 diff = prop[0] - prop[1] - value 849 else: 850 msg = 'more than two samples are not implemented yet' 851 raise NotImplementedError(msg) 852 853 p_pooled = np.sum(count) * 1. / np.sum(nobs) 854 855 nobs_fact = np.sum(1. / nobs) 856 if prop_var: 857 p_pooled = prop_var 858 var_ = p_pooled * (1 - p_pooled) * nobs_fact 859 std_diff = np.sqrt(var_) 860 from statsmodels.stats.weightstats import _zstat_generic2 861 return _zstat_generic2(diff, std_diff, alternative) 862 863 864def proportions_ztost(count, nobs, low, upp, prop_var='sample'): 865 '''Equivalence test based on normal distribution 866 867 Parameters 868 ---------- 869 count : {int, array_like} 870 the number of successes in nobs trials. If this is array_like, then 871 the assumption is that this represents the number of successes for 872 each independent sample 873 nobs : int 874 the number of trials or observations, with the same length as 875 count. 876 low, upp : float 877 equivalence interval low < prop1 - prop2 < upp 878 prop_var : str or float in (0, 1) 879 prop_var determines which proportion is used for the calculation 880 of the standard deviation of the proportion estimate 881 The available options for string are 'sample' (default), 'null' and 882 'limits'. If prop_var is a float, then it is used directly. 883 884 Returns 885 ------- 886 pvalue : float 887 pvalue of the non-equivalence test 888 t1, pv1 : tuple of floats 889 test statistic and pvalue for lower threshold test 890 t2, pv2 : tuple of floats 891 test statistic and pvalue for upper threshold test 892 893 Notes 894 ----- 895 checked only for 1 sample case 896 897 ''' 898 if prop_var == 'limits': 899 prop_var_low = low 900 prop_var_upp = upp 901 elif prop_var == 'sample': 902 prop_var_low = prop_var_upp = False #ztest uses sample 903 elif prop_var == 'null': 904 prop_var_low = prop_var_upp = 0.5 * (low + upp) 905 elif np.isreal(prop_var): 906 prop_var_low = prop_var_upp = prop_var 907 908 tt1 = proportions_ztest(count, nobs, alternative='larger', 909 prop_var=prop_var_low, value=low) 910 tt2 = proportions_ztest(count, nobs, alternative='smaller', 911 prop_var=prop_var_upp, value=upp) 912 return np.maximum(tt1[1], tt2[1]), tt1, tt2, 913 914 915def proportions_chisquare(count, nobs, value=None): 916 '''test for proportions based on chisquare test 917 918 Parameters 919 ---------- 920 count : {int, array_like} 921 the number of successes in nobs trials. If this is array_like, then 922 the assumption is that this represents the number of successes for 923 each independent sample 924 nobs : int 925 the number of trials or observations, with the same length as 926 count. 927 value : None or float or array_like 928 929 Returns 930 ------- 931 chi2stat : float 932 test statistic for the chisquare test 933 p-value : float 934 p-value for the chisquare test 935 (table, expected) 936 table is a (k, 2) contingency table, ``expected`` is the corresponding 937 table of counts that are expected under independence with given 938 margins 939 940 Notes 941 ----- 942 Recent version of scipy.stats have a chisquare test for independence in 943 contingency tables. 944 945 This function provides a similar interface to chisquare tests as 946 ``prop.test`` in R, however without the option for Yates continuity 947 correction. 948 949 count can be the count for the number of events for a single proportion, 950 or the counts for several independent proportions. If value is given, then 951 all proportions are jointly tested against this value. If value is not 952 given and count and nobs are not scalar, then the null hypothesis is 953 that all samples have the same proportion. 954 955 ''' 956 nobs = np.atleast_1d(nobs) 957 table, expected, n_rows = _table_proportion(count, nobs) 958 if value is not None: 959 expected = np.column_stack((nobs * value, nobs * (1 - value))) 960 ddof = n_rows - 1 961 else: 962 ddof = n_rows 963 964 #print table, expected 965 chi2stat, pval = stats.chisquare(table.ravel(), expected.ravel(), 966 ddof=ddof) 967 return chi2stat, pval, (table, expected) 968 969 970def proportions_chisquare_allpairs(count, nobs, multitest_method='hs'): 971 '''chisquare test of proportions for all pairs of k samples 972 973 Performs a chisquare test for proportions for all pairwise comparisons. 974 The alternative is two-sided 975 976 Parameters 977 ---------- 978 count : {int, array_like} 979 the number of successes in nobs trials. 980 nobs : int 981 the number of trials or observations. 982 prop : float, optional 983 The probability of success under the null hypothesis, 984 `0 <= prop <= 1`. The default value is `prop = 0.5` 985 multitest_method : str 986 This chooses the method for the multiple testing p-value correction, 987 that is used as default in the results. 988 It can be any method that is available in ``multipletesting``. 989 The default is Holm-Sidak 'hs'. 990 991 Returns 992 ------- 993 result : AllPairsResults instance 994 The returned results instance has several statistics, such as p-values, 995 attached, and additional methods for using a non-default 996 ``multitest_method``. 997 998 Notes 999 ----- 1000 Yates continuity correction is not available. 1001 ''' 1002 #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1))) 1003 all_pairs = lzip(*np.triu_indices(len(count), 1)) 1004 pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)])[1] 1005 for pair in all_pairs] 1006 return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method) 1007 1008 1009def proportions_chisquare_pairscontrol(count, nobs, value=None, 1010 multitest_method='hs', alternative='two-sided'): 1011 '''chisquare test of proportions for pairs of k samples compared to control 1012 1013 Performs a chisquare test for proportions for pairwise comparisons with a 1014 control (Dunnet's test). The control is assumed to be the first element 1015 of ``count`` and ``nobs``. The alternative is two-sided, larger or 1016 smaller. 1017 1018 Parameters 1019 ---------- 1020 count : {int, array_like} 1021 the number of successes in nobs trials. 1022 nobs : int 1023 the number of trials or observations. 1024 prop : float, optional 1025 The probability of success under the null hypothesis, 1026 `0 <= prop <= 1`. The default value is `prop = 0.5` 1027 multitest_method : str 1028 This chooses the method for the multiple testing p-value correction, 1029 that is used as default in the results. 1030 It can be any method that is available in ``multipletesting``. 1031 The default is Holm-Sidak 'hs'. 1032 alternative : str in ['two-sided', 'smaller', 'larger'] 1033 alternative hypothesis, which can be two-sided or either one of the 1034 one-sided tests. 1035 1036 Returns 1037 ------- 1038 result : AllPairsResults instance 1039 The returned results instance has several statistics, such as p-values, 1040 attached, and additional methods for using a non-default 1041 ``multitest_method``. 1042 1043 1044 Notes 1045 ----- 1046 Yates continuity correction is not available. 1047 1048 ``value`` and ``alternative`` options are not yet implemented. 1049 1050 ''' 1051 if (value is not None) or (alternative not in ['two-sided', '2s']): 1052 raise NotImplementedError 1053 #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1))) 1054 all_pairs = [(0, k) for k in range(1, len(count))] 1055 pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)], 1056 #alternative=alternative)[1] 1057 )[1] 1058 for pair in all_pairs] 1059 return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method) 1060 1061 1062def confint_proportions_2indep(count1, nobs1, count2, nobs2, method=None, 1063 compare='diff', alpha=0.05, correction=True): 1064 """Confidence intervals for comparing two independent proportions 1065 1066 This assumes that we have two independent binomial samples. 1067 1068 Parameters 1069 ---------- 1070 count1, nobs1 : float 1071 Count and sample size for first sample. 1072 count2, nobs2 : float 1073 Count and sample size for the second sample. 1074 method : str 1075 Method for computing confidence interval. If method is None, then a 1076 default method is used. The default might change as more methods are 1077 added. 1078 1079 diff: 1080 - 'wald', 1081 - 'agresti-caffo' 1082 - 'newcomb' (default) 1083 - 'score' 1084 1085 ratio: 1086 - 'log' 1087 - 'log-adjusted' (default) 1088 - 'score' 1089 1090 odds-ratio: 1091 - 'logit' 1092 - 'logit-adjusted' (default) 1093 - 'score' 1094 1095 compare : string in ['diff', 'ratio' 'odds-ratio'] 1096 If compare is diff, then the confidence interval is for diff = p1 - p2. 1097 If compare is ratio, then the confidence interval is for the risk ratio 1098 defined by ratio = p1 / p2. 1099 If compare is odds-ratio, then the confidence interval is for the 1100 odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2). 1101 alpha : float 1102 Significance level for the confidence interval, default is 0.05. 1103 The nominal coverage probability is 1 - alpha. 1104 1105 Returns 1106 ------- 1107 low, upp 1108 1109 Notes 1110 ----- 1111 Status: experimental, API and defaults might still change. 1112 more ``methods`` will be added. 1113 1114 References 1115 ---------- 1116 .. [1] Fagerland, Morten W., Stian Lydersen, and Petter Laake. 2015. 1117 “Recommended Confidence Intervals for Two Independent Binomial 1118 Proportions.” Statistical Methods in Medical Research 24 (2): 224–54. 1119 https://doi.org/10.1177/0962280211415469. 1120 .. [2] Koopman, P. A. R. 1984. “Confidence Intervals for the Ratio of Two 1121 Binomial Proportions.” Biometrics 40 (2): 513–17. 1122 https://doi.org/10.2307/2531405. 1123 .. [3] Miettinen, Olli, and Markku Nurminen. "Comparative analysis of two 1124 rates." Statistics in medicine 4, no. 2 (1985): 213-226. 1125 .. [4] Newcombe, Robert G. 1998. “Interval Estimation for the Difference 1126 between Independent Proportions: Comparison of Eleven Methods.” 1127 Statistics in Medicine 17 (8): 873–90. 1128 https://doi.org/10.1002/(SICI)1097-0258(19980430)17:8<873::AID- 1129 SIM779>3.0.CO;2-I. 1130 .. [5] Newcombe, Robert G., and Markku M. Nurminen. 2011. “In Defence of 1131 Score Intervals for Proportions and Their Differences.” Communications 1132 in Statistics - Theory and Methods 40 (7): 1271–82. 1133 https://doi.org/10.1080/03610920903576580. 1134 """ 1135 method_default = {'diff': 'newcomb', 1136 'ratio': 'log-adjusted', 1137 'odds-ratio': 'logit-adjusted'} 1138 # normalize compare name 1139 if compare.lower() == 'or': 1140 compare = 'odds-ratio' 1141 if method is None: 1142 method = method_default[compare] 1143 1144 method = method.lower() 1145 if method.startswith('agr'): 1146 method = 'agresti-caffo' 1147 1148 p1 = count1 / nobs1 1149 p2 = count2 / nobs2 1150 diff = p1 - p2 1151 addone = 1 if method == 'agresti-caffo' else 0 1152 1153 if compare == 'diff': 1154 if method in ['wald', 'agresti-caffo']: 1155 count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone 1156 count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone 1157 p1_ = count1_ / nobs1_ 1158 p2_ = count2_ / nobs2_ 1159 diff_ = p1_ - p2_ 1160 var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_ 1161 z = stats.norm.isf(alpha / 2) 1162 d_wald = z * np.sqrt(var) 1163 low = diff_ - d_wald 1164 upp = diff_ + d_wald 1165 1166 elif method.startswith('newcomb'): 1167 low1, upp1 = proportion_confint(count1, nobs1, 1168 method='wilson', alpha=alpha) 1169 low2, upp2 = proportion_confint(count2, nobs2, 1170 method='wilson', alpha=alpha) 1171 d_low = np.sqrt((p1 - low1)**2 + (upp2 - p2)**2) 1172 d_upp = np.sqrt((p2 - low2)**2 + (upp1 - p1)**2) 1173 low = diff - d_low 1174 upp = diff + d_upp 1175 1176 elif method == "score": 1177 low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2, 1178 compare=compare, alpha=alpha, 1179 correction=correction) 1180 1181 else: 1182 raise ValueError('method not recognized') 1183 1184 elif compare == 'ratio': 1185 # ratio = p1 / p2 1186 if method in ['log', 'log-adjusted']: 1187 addhalf = 0.5 if method == 'log-adjusted' else 0 1188 count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf 1189 count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf 1190 p1_ = count1_ / nobs1_ 1191 p2_ = count2_ / nobs2_ 1192 ratio_ = p1_ / p2_ 1193 var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_ 1194 z = stats.norm.isf(alpha / 2) 1195 d_log = z * np.sqrt(var) 1196 low = np.exp(np.log(ratio_) - d_log) 1197 upp = np.exp(np.log(ratio_) + d_log) 1198 1199 elif method == 'score': 1200 res = _confint_riskratio_koopman(count1, nobs1, count2, nobs2, 1201 alpha=alpha, 1202 correction=correction) 1203 low, upp = res.confint 1204 1205 else: 1206 raise ValueError('method not recognized') 1207 1208 elif compare == 'odds-ratio': 1209 # odds_ratio = p1 / (1 - p1) / p2 * (1 - p2) 1210 if method in ['logit', 'logit-adjusted', 'logit-smoothed']: 1211 if method in ['logit-smoothed']: 1212 adjusted = _shrink_prob(count1, nobs1, count2, nobs2, 1213 shrink_factor=2, return_corr=False)[0] 1214 count1_, nobs1_, count2_, nobs2_ = adjusted 1215 1216 else: 1217 addhalf = 0.5 if method == 'logit-adjusted' else 0 1218 count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf 1219 count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf 1220 p1_ = count1_ / nobs1_ 1221 p2_ = count2_ / nobs2_ 1222 odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_) 1223 var = (1 / count1_ + 1 / (nobs1_ - count1_) + 1224 1 / count2_ + 1 / (nobs2_ - count2_)) 1225 z = stats.norm.isf(alpha / 2) 1226 d_log = z * np.sqrt(var) 1227 low = np.exp(np.log(odds_ratio_) - d_log) 1228 upp = np.exp(np.log(odds_ratio_) + d_log) 1229 1230 elif method == "score": 1231 low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2, 1232 compare=compare, alpha=alpha, 1233 correction=correction) 1234 1235 else: 1236 raise ValueError('method not recognized') 1237 1238 else: 1239 raise ValueError('compare not recognized') 1240 1241 return low, upp 1242 1243 1244def _shrink_prob(count1, nobs1, count2, nobs2, shrink_factor=2, 1245 return_corr=True): 1246 """shrink observed counts towards independence 1247 1248 Helper function for 'logit-smoothed' inference for the odds-ratio of two 1249 independent proportions. 1250 1251 Parameters 1252 ---------- 1253 count1, nobs1 : float or int 1254 count and sample size for first sample 1255 count2, nobs2 : float or int 1256 count and sample size for the second sample 1257 shrink_factor : float 1258 This corresponds to the number of observations that are added in total 1259 proportional to the probabilities under independence. 1260 return_corr : bool 1261 If true, then only the correction term is returned 1262 If false, then the corrected counts, i.e. original counts plus 1263 correction term, are returned. 1264 1265 Returns 1266 ------- 1267 count1_corr, nobs1_corr, count2_corr, nobs2_corr : float 1268 correction or corrected counts 1269 prob_indep : 1270 TODO/Warning : this will change most likely 1271 probabilities under independence, only returned if return_corr is 1272 false. 1273 1274 """ 1275 nobs_col = np.array([count1 + count2, nobs1 - count1 + nobs2 - count2]) 1276 nobs_row = np.array([nobs1, nobs2]) 1277 nobs = nobs1 + nobs2 1278 prob_indep = (nobs_col * nobs_row[:, None]) / nobs**2 1279 corr = shrink_factor * prob_indep 1280 if return_corr: 1281 return (corr[0, 0], corr[0].sum(), corr[1, 0], corr[1].sum()) 1282 else: 1283 return (count1 + corr[0, 0], nobs1 + corr[0].sum(), 1284 count2 + corr[1, 0], nobs2 + corr[1].sum()), prob_indep 1285 1286 1287def score_test_proportions_2indep(count1, nobs1, count2, nobs2, value=None, 1288 compare='diff', alternative='two-sided', 1289 correction=True, return_results=True): 1290 """score_test for two independent proportions 1291 1292 This uses the constrained estimate of the proportions to compute 1293 the variance under the Null hypothesis. 1294 1295 Parameters 1296 ---------- 1297 count1, nobs1 : 1298 count and sample size for first sample 1299 count2, nobs2 : 1300 count and sample size for the second sample 1301 value : float 1302 diff, ratio or odds-ratio under the null hypothesis. If value is None, 1303 then equality of proportions under the Null is assumed, 1304 i.e. value=0 for 'diff' or value=1 for either rate or odds-ratio. 1305 compare : string in ['diff', 'ratio' 'odds-ratio'] 1306 If compare is diff, then the confidence interval is for diff = p1 - p2. 1307 If compare is ratio, then the confidence interval is for the risk ratio 1308 defined by ratio = p1 / p2. 1309 If compare is odds-ratio, then the confidence interval is for the 1310 odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2) 1311 return_results : bool 1312 If true, then a results instance with extra information is returned, 1313 otherwise a tuple with statistic and pvalue is returned. 1314 1315 Returns 1316 ------- 1317 results : results instance or tuple 1318 If return_results is True, then a results instance with the 1319 information in attributes is returned. 1320 If return_results is False, then only ``statistic`` and ``pvalue`` 1321 are returned. 1322 1323 statistic : float 1324 test statistic asymptotically normal distributed N(0, 1) 1325 pvalue : float 1326 p-value based on normal distribution 1327 other attributes : 1328 additional information about the hypothesis test 1329 1330 Notes 1331 ----- 1332 Status: experimental, the type or extra information in the return might 1333 change. 1334 1335 """ 1336 1337 value_default = 0 if compare == 'diff' else 1 1338 if value is None: 1339 # TODO: odds ratio does not work if value=1 1340 value = value_default 1341 1342 nobs = nobs1 + nobs2 1343 count = count1 + count2 1344 p1 = count1 / nobs1 1345 p2 = count2 / nobs2 1346 if value == value_default: 1347 # use pooled estimator if equality test 1348 # shortcut, but required for odds ratio 1349 prop0 = prop1 = count / nobs 1350 # this uses index 0 from Miettinen Nurminned 1985 1351 count0, nobs0 = count2, nobs2 1352 p0 = p2 1353 1354 if compare == 'diff': 1355 diff = value # hypothesis value 1356 1357 if diff != 0: 1358 tmp3 = nobs 1359 tmp2 = (nobs1 + 2 * nobs0) * diff - nobs - count 1360 tmp1 = (count0 * diff - nobs - 2 * count0) * diff + count 1361 tmp0 = count0 * diff * (1 - diff) 1362 q = ((tmp2 / (3 * tmp3))**3 - tmp1 * tmp2 / (6 * tmp3**2) + 1363 tmp0 / (2 * tmp3)) 1364 p = np.sign(q) * np.sqrt((tmp2 / (3 * tmp3))**2 - 1365 tmp1 / (3 * tmp3)) 1366 a = (np.pi + np.arccos(q / p**3)) / 3 1367 1368 prop0 = 2 * p * np.cos(a) - tmp2 / (3 * tmp3) 1369 prop1 = prop0 + diff 1370 1371 correction = True 1372 var = prop1 * (1 - prop1) / nobs1 + prop0 * (1 - prop0) / nobs0 1373 if correction: 1374 var *= nobs / (nobs - 1) 1375 1376 diff_stat = (p1 - p0 - diff) 1377 1378 elif compare == 'ratio': 1379 # risk ratio 1380 ratio = value 1381 1382 if ratio != 1: 1383 a = nobs * ratio 1384 b = -(nobs1 * ratio + count1 + nobs2 + count0 * ratio) 1385 c = count 1386 prop0 = (-b - np.sqrt(b**2 - 4 * a * c)) / (2 * a) 1387 prop1 = prop0 * ratio 1388 1389 var = (prop1 * (1 - prop1) / nobs1 + 1390 ratio**2 * prop0 * (1 - prop0) / nobs0) 1391 if correction: 1392 var *= nobs / (nobs - 1) 1393 1394 # NCSS looks incorrect for var, but it is what should be reported 1395 # diff_stat = (p1 / p0 - ratio) # NCSS/PASS 1396 diff_stat = (p1 - ratio * p0) # Miettinen Nurminen 1397 1398 elif compare in ['or', 'odds-ratio']: 1399 # odds ratio 1400 oratio = value 1401 1402 if oratio != 1: 1403 # Note the constraint estimator does not handle odds-ratio = 1 1404 a = nobs0 * (oratio - 1) 1405 b = nobs1 * oratio + nobs0 - count * (oratio - 1) 1406 c = -count 1407 prop0 = (-b + np.sqrt(b**2 - 4 * a * c)) / (2 * a) 1408 prop1 = prop0 * oratio / (1 + prop0 * (oratio - 1)) 1409 1410 # try to avoid 0 and 1 proportions, 1411 # those raise Zero Division Runtime Warnings 1412 eps = 1e-10 1413 prop0 = np.clip(prop0, eps, 1 - eps) 1414 prop1 = np.clip(prop1, eps, 1 - eps) 1415 1416 var = (1 / (prop1 * (1 - prop1) * nobs1) + 1417 1 / (prop0 * (1 - prop0) * nobs0)) 1418 if correction: 1419 var *= nobs / (nobs - 1) 1420 1421 diff_stat = ((p1 - prop1) / (prop1 * (1 - prop1)) - 1422 (p0 - prop0) / (prop0 * (1 - prop0))) 1423 1424 statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var), 1425 alternative=alternative) 1426 1427 if return_results: 1428 res = HolderTuple(statistic=statistic, 1429 pvalue=pvalue, 1430 compare=compare, 1431 method='score', 1432 variance=var, 1433 alternative=alternative, 1434 prop1_null=prop1, 1435 prop2_null=prop0, 1436 ) 1437 return res 1438 else: 1439 return statistic, pvalue 1440 1441 1442def test_proportions_2indep(count1, nobs1, count2, nobs2, value=None, 1443 method=None, compare='diff', 1444 alternative='two-sided', correction=True, 1445 return_results=True): 1446 """Hypothesis test for comparing two independent proportions 1447 1448 This assumes that we have two independent binomial samples. 1449 1450 The Null and alternative hypothesis are 1451 1452 for compare = 'diff' 1453 1454 - H0: prop1 - prop2 - value = 0 1455 - H1: prop1 - prop2 - value != 0 if alternative = 'two-sided' 1456 - H1: prop1 - prop2 - value > 0 if alternative = 'larger' 1457 - H1: prop1 - prop2 - value < 0 if alternative = 'smaller' 1458 1459 for compare = 'ratio' 1460 1461 - H0: prop1 / prop2 - value = 0 1462 - H1: prop1 / prop2 - value != 0 if alternative = 'two-sided' 1463 - H1: prop1 / prop2 - value > 0 if alternative = 'larger' 1464 - H1: prop1 / prop2 - value < 0 if alternative = 'smaller' 1465 1466 for compare = 'odds-ratio' 1467 1468 - H0: or - value = 0 1469 - H1: or - value != 0 if alternative = 'two-sided' 1470 - H1: or - value > 0 if alternative = 'larger' 1471 - H1: or - value < 0 if alternative = 'smaller' 1472 1473 where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2)) 1474 1475 Parameters 1476 ---------- 1477 count1 : int 1478 Count for first sample. 1479 nobs1 : int 1480 Sample size for first sample. 1481 count2 : int 1482 Count for the second sample. 1483 nobs2 : int 1484 Sample size for the second sample. 1485 method : string 1486 Method for computing confidence interval. If method is None, then a 1487 default method is used. The default might change as more methods are 1488 added. 1489 1490 diff: 1491 1492 - 'wald', 1493 - 'agresti-caffo' 1494 - 'score' if correction is True, then this uses the degrees of freedom 1495 correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 1496 1497 ratio: 1498 1499 - 'log': wald test using log transformation 1500 - 'log-adjusted': wald test using log transformation, 1501 adds 0.5 to counts 1502 - 'score': if correction is True, then this uses the degrees of freedom 1503 correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 1504 1505 odds-ratio: 1506 1507 - 'logit': wald test using logit transformation 1508 - 'logit-adjusted': wald test using logit transformation, 1509 adds 0.5 to counts 1510 - 'logit-smoothed': wald test using logit transformation, biases 1511 cell counts towards independence by adding two observations in 1512 total. 1513 - 'score' if correction is True, then this uses the degrees of freedom 1514 correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 1515 1516 compare : {'diff', 'ratio' 'odds-ratio'} 1517 If compare is `diff`, then the confidence interval is for 1518 diff = p1 - p2. 1519 If compare is `ratio`, then the confidence interval is for the 1520 risk ratio defined by ratio = p1 / p2. 1521 If compare is `odds-ratio`, then the confidence interval is for the 1522 odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2) 1523 alternative : {'two-sided', 'smaller', 'larger'} 1524 alternative hypothesis, which can be two-sided or either one of the 1525 one-sided tests. 1526 correction : bool 1527 If correction is True (default), then the Miettinen and Nurminen 1528 small sample correction to the variance nobs / (nobs - 1) is used. 1529 Applies only if method='score'. 1530 return_results : bool 1531 If true, then a results instance with extra information is returned, 1532 otherwise a tuple with statistic and pvalue is returned. 1533 1534 Returns 1535 ------- 1536 results : results instance or tuple 1537 If return_results is True, then a results instance with the 1538 information in attributes is returned. 1539 If return_results is False, then only ``statistic`` and ``pvalue`` 1540 are returned. 1541 1542 statistic : float 1543 test statistic asymptotically normal distributed N(0, 1) 1544 pvalue : float 1545 p-value based on normal distribution 1546 other attributes : 1547 additional information about the hypothesis test 1548 1549 Notes 1550 ----- 1551 Status: experimental, API and defaults might still change. 1552 More ``methods`` will be added. 1553 1554 """ 1555 method_default = {'diff': 'agresti-caffo', 1556 'ratio': 'log-adjusted', 1557 'odds-ratio': 'logit-adjusted'} 1558 # normalize compare name 1559 if compare.lower() == 'or': 1560 compare = 'odds-ratio' 1561 if method is None: 1562 method = method_default[compare] 1563 1564 method = method.lower() 1565 if method.startswith('agr'): 1566 method = 'agresti-caffo' 1567 1568 if value is None: 1569 # TODO: odds ratio does not work if value=1 for score test 1570 value = 0 if compare == 'diff' else 1 1571 1572 p1 = count1 / nobs1 1573 p2 = count2 / nobs2 1574 diff = p1 - p2 1575 ratio = p1 / p2 1576 odds_ratio = p1 / (1 - p1) / p2 * (1 - p2) 1577 res = None 1578 1579 if compare == 'diff': 1580 if method in ['wald', 'agresti-caffo']: 1581 addone = 1 if method == 'agresti-caffo' else 0 1582 count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone 1583 count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone 1584 p1_ = count1_ / nobs1_ 1585 p2_ = count2_ / nobs2_ 1586 diff_stat = p1_ - p2_ - value 1587 var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_ 1588 statistic = diff_stat / np.sqrt(var) 1589 distr = 'normal' 1590 1591 elif method.startswith('newcomb'): 1592 msg = 'newcomb not available for hypothesis test' 1593 raise NotImplementedError(msg) 1594 1595 elif method == 'score': 1596 # Note score part is the same call for all compare 1597 res = score_test_proportions_2indep(count1, nobs1, count2, nobs2, 1598 value=value, compare=compare, 1599 alternative=alternative, 1600 correction=correction, 1601 return_results=return_results) 1602 if return_results is False: 1603 statistic, pvalue = res[:2] 1604 distr = 'normal' 1605 # TODO/Note score_test_proportion_2samp returns statistic and 1606 # not diff_stat 1607 diff_stat = None 1608 else: 1609 raise ValueError('method not recognized') 1610 1611 elif compare == 'ratio': 1612 if method in ['log', 'log-adjusted']: 1613 addhalf = 0.5 if method == 'log-adjusted' else 0 1614 count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf 1615 count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf 1616 p1_ = count1_ / nobs1_ 1617 p2_ = count2_ / nobs2_ 1618 ratio_ = p1_ / p2_ 1619 var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_ 1620 diff_stat = np.log(ratio_) - np.log(value) 1621 statistic = diff_stat / np.sqrt(var) 1622 distr = 'normal' 1623 1624 elif method == 'score': 1625 res = score_test_proportions_2indep(count1, nobs1, count2, nobs2, 1626 value=value, compare=compare, 1627 alternative=alternative, 1628 correction=correction, 1629 return_results=return_results) 1630 if return_results is False: 1631 statistic, pvalue = res[:2] 1632 distr = 'normal' 1633 diff_stat = None 1634 1635 else: 1636 raise ValueError('method not recognized') 1637 1638 elif compare == "odds-ratio": 1639 1640 if method in ['logit', 'logit-adjusted', 'logit-smoothed']: 1641 if method in ['logit-smoothed']: 1642 adjusted = _shrink_prob(count1, nobs1, count2, nobs2, 1643 shrink_factor=2, return_corr=False)[0] 1644 count1_, nobs1_, count2_, nobs2_ = adjusted 1645 1646 else: 1647 addhalf = 0.5 if method == 'logit-adjusted' else 0 1648 count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf 1649 count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf 1650 p1_ = count1_ / nobs1_ 1651 p2_ = count2_ / nobs2_ 1652 odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_) 1653 var = (1 / count1_ + 1 / (nobs1_ - count1_) + 1654 1 / count2_ + 1 / (nobs2_ - count2_)) 1655 1656 diff_stat = np.log(odds_ratio_) - np.log(value) 1657 statistic = diff_stat / np.sqrt(var) 1658 distr = 'normal' 1659 1660 elif method == 'score': 1661 res = score_test_proportions_2indep(count1, nobs1, count2, nobs2, 1662 value=value, compare=compare, 1663 alternative=alternative, 1664 correction=correction, 1665 return_results=return_results) 1666 if return_results is False: 1667 statistic, pvalue = res[:2] 1668 distr = 'normal' 1669 diff_stat = None 1670 else: 1671 raise ValueError('method "%s" not recognized' % method) 1672 1673 else: 1674 raise ValueError('compare "%s" not recognized' % compare) 1675 1676 if distr == 'normal' and diff_stat is not None: 1677 statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var), 1678 alternative=alternative) 1679 1680 if return_results: 1681 if res is None: 1682 res = HolderTuple(statistic=statistic, 1683 pvalue=pvalue, 1684 compare=compare, 1685 method=method, 1686 diff=diff, 1687 ratio=ratio, 1688 odds_ratio=odds_ratio, 1689 variance=var, 1690 alternative=alternative, 1691 value=value, 1692 ) 1693 else: 1694 # we already have a return result from score test 1695 # add missing attributes 1696 res.diff = diff 1697 res.ratio = ratio 1698 res.odds_ratio = odds_ratio 1699 res.value = value 1700 return res 1701 else: 1702 return statistic, pvalue 1703 1704 1705def tost_proportions_2indep(count1, nobs1, count2, nobs2, low, upp, 1706 method=None, compare='diff', correction=True): 1707 """ 1708 Equivalence test based on two one-sided `test_proportions_2indep` 1709 1710 This assumes that we have two independent binomial samples. 1711 1712 The Null and alternative hypothesis for equivalence testing are 1713 1714 for compare = 'diff' 1715 1716 - H0: prop1 - prop2 <= low or upp <= prop1 - prop2 1717 - H1: low < prop1 - prop2 < upp 1718 1719 for compare = 'ratio' 1720 1721 - H0: prop1 / prop2 <= low or upp <= prop1 / prop2 1722 - H1: low < prop1 / prop2 < upp 1723 1724 1725 for compare = 'odds-ratio' 1726 1727 - H0: or <= low or upp <= or 1728 - H1: low < or < upp 1729 1730 where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2)) 1731 1732 Parameters 1733 ---------- 1734 count1, nobs1 : 1735 count and sample size for first sample 1736 count2, nobs2 : 1737 count and sample size for the second sample 1738 low, upp : 1739 equivalence margin for diff, risk ratio or odds ratio 1740 method : string 1741 method for computing confidence interval. If method is None, then a 1742 default method is used. The default might change as more methods are 1743 added. 1744 1745 diff: 1746 - 'wald', 1747 - 'agresti-caffo' 1748 - 'score' if correction is True, then this uses the degrees of freedom 1749 correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985. 1750 1751 ratio: 1752 - 'log': wald test using log transformation 1753 - 'log-adjusted': wald test using log transformation, 1754 adds 0.5 to counts 1755 - 'score' if correction is True, then this uses the degrees of freedom 1756 correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985. 1757 1758 odds-ratio: 1759 - 'logit': wald test using logit transformation 1760 - 'logit-adjusted': : wald test using logit transformation, 1761 adds 0.5 to counts 1762 - 'logit-smoothed': : wald test using logit transformation, biases 1763 cell counts towards independence by adding two observations in 1764 total. 1765 - 'score' if correction is True, then this uses the degrees of freedom 1766 correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 1767 1768 compare : string in ['diff', 'ratio' 'odds-ratio'] 1769 If compare is `diff`, then the confidence interval is for 1770 diff = p1 - p2. 1771 If compare is `ratio`, then the confidence interval is for the 1772 risk ratio defined by ratio = p1 / p2. 1773 If compare is `odds-ratio`, then the confidence interval is for the 1774 odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2). 1775 correction : bool 1776 If correction is True (default), then the Miettinen and Nurminen 1777 small sample correction to the variance nobs / (nobs - 1) is used. 1778 Applies only if method='score'. 1779 1780 Returns 1781 ------- 1782 pvalue : float 1783 p-value is the max of the pvalues of the two one-sided tests 1784 t1 : test results 1785 results instance for one-sided hypothesis at the lower margin 1786 t1 : test results 1787 results instance for one-sided hypothesis at the upper margin 1788 1789 Notes 1790 ----- 1791 Status: experimental, API and defaults might still change. 1792 1793 """ 1794 1795 tt1 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=low, 1796 method=method, compare=compare, 1797 alternative='larger', 1798 correction=correction, 1799 return_results=True) 1800 tt2 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=upp, 1801 method=method, compare=compare, 1802 alternative='smaller', 1803 correction=correction, 1804 return_results=True) 1805 1806 idx_max = 0 if tt1.pvalue < tt2.pvalue else 1 1807 res = HolderTuple(statistic=[tt1.statistic, tt2.statistic][idx_max], 1808 pvalue=[tt1.pvalue, tt2.pvalue][idx_max], 1809 compare=compare, 1810 method=method, 1811 results_larger=tt1, 1812 results_smaller=tt2, 1813 title="Equivalence test for 2 independent proportions" 1814 ) 1815 1816 return res 1817 1818 1819def _std_2prop_power(diff, p2, ratio=1, alpha=0.05, value=0): 1820 """compute standard error under null and alternative for 2 proportions 1821 1822 helper function for power and sample size computation 1823 1824 """ 1825 if value != 0: 1826 msg = 'non-zero diff under null, value, is not yet implemented' 1827 raise NotImplementedError(msg) 1828 1829 nobs_ratio = ratio 1830 p1 = p2 + diff 1831 # The following contains currently redundant variables that will 1832 # be useful for different options for the null variance 1833 p_pooled = (p1 + p2 * ratio) / (1 + ratio) 1834 # probabilities for the variance for the null statistic 1835 p1_vnull, p2_vnull = p_pooled, p_pooled 1836 p2_alt = p2 1837 p1_alt = p2_alt + diff 1838 1839 std_null = _std_diff_prop(p1_vnull, p2_vnull) 1840 std_alt = _std_diff_prop(p1_alt, p2_alt, ratio=nobs_ratio) 1841 return p_pooled, std_null, std_alt 1842 1843 1844def power_proportions_2indep(diff, prop2, nobs1, ratio=1, alpha=0.05, 1845 value=0, alternative='two-sided', 1846 return_results=True): 1847 """power for ztest that two independent proportions are equal 1848 1849 This assumes that the variance is based on the pooled proportion 1850 under the null and the non-pooled variance under the alternative 1851 1852 Parameters 1853 ---------- 1854 diff : float 1855 difference between proportion 1 and 2 under the alternative 1856 prop2 : float 1857 proportion for the reference case, prop2, proportions for the 1858 first case will be computing using p2 and diff 1859 p1 = p2 + diff 1860 nobs1 : float or int 1861 number of observations in sample 1 1862 ratio : float 1863 sample size ratio, nobs2 = ratio * nobs1 1864 alpha : float in interval (0,1) 1865 Significance level, e.g. 0.05, is the probability of a type I 1866 error, that is wrong rejections if the Null Hypothesis is true. 1867 value : float 1868 currently only `value=0`, i.e. equality testing, is supported 1869 alternative : string, 'two-sided' (default), 'larger', 'smaller' 1870 Alternative hypothesis whether the power is calculated for a 1871 two-sided (default) or one sided test. The one-sided test can be 1872 either 'larger', 'smaller'. 1873 return_results : bool 1874 If true, then a results instance with extra information is returned, 1875 otherwise only the computed power is returned. 1876 1877 Returns 1878 ------- 1879 results : results instance or float 1880 If return_results is True, then a results instance with the 1881 information in attributes is returned. 1882 If return_results is False, then only the power is returned. 1883 1884 power : float 1885 Power of the test, e.g. 0.8, is one minus the probability of a 1886 type II error. Power is the probability that the test correctly 1887 rejects the Null Hypothesis if the Alternative Hypothesis is true. 1888 1889 Other attributes in results instance include : 1890 1891 p_pooled 1892 pooled proportion, used for std_null 1893 std_null 1894 standard error of difference under the null hypothesis (without 1895 sqrt(nobs)) 1896 std_alt 1897 standard error of difference under the alternative hypothesis 1898 (without sqrt(nobs)) 1899 """ 1900 # TODO: avoid possible circular import, check if needed 1901 from statsmodels.stats.power import normal_power_het 1902 1903 p_pooled, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=1, 1904 alpha=0.05, value=0) 1905 1906 pow_ = normal_power_het(diff, nobs1, alpha, std_null=std_null, 1907 std_alternative=std_alt, 1908 alternative=alternative) 1909 1910 if return_results: 1911 res = Holder(power=pow_, 1912 p_pooled=p_pooled, 1913 std_null=std_null, 1914 std_alt=std_alt, 1915 nobs1=nobs1, 1916 nobs2=ratio * nobs1, 1917 nobs_ratio=ratio, 1918 alpha=alpha, 1919 ) 1920 return res 1921 else: 1922 return pow_ 1923 1924 1925def samplesize_proportions_2indep_onetail(diff, prop2, power, ratio=1, 1926 alpha=0.05, value=0, 1927 alternative='two-sided'): 1928 """required sample size assuming normal distribution based on one tail 1929 1930 This uses an explicit computation for the sample size that is required 1931 to achieve a given power corresponding to the appropriate tails of the 1932 normal distribution. This ignores the far tail in a two-sided test 1933 which is negligible in the common case when alternative and null are 1934 far apart. 1935 1936 Parameters 1937 ---------- 1938 diff : float 1939 Difference between proportion 1 and 2 under the alternative 1940 prop2 : float 1941 proportion for the reference case, prop2, proportions for the 1942 first case will be computing using p2 and diff 1943 p1 = p2 + diff 1944 power : float 1945 Power for which sample size is computed. 1946 ratio : float 1947 Sample size ratio, nobs2 = ratio * nobs1 1948 alpha : float in interval (0,1) 1949 Significance level, e.g. 0.05, is the probability of a type I 1950 error, that is wrong rejections if the Null Hypothesis is true. 1951 value : float 1952 Currently only `value=0`, i.e. equality testing, is supported 1953 alternative : string, 'two-sided' (default), 'larger', 'smaller' 1954 Alternative hypothesis whether the power is calculated for a 1955 two-sided (default) or one sided test. In the case of a one-sided 1956 alternative, it is assumed that the test is in the appropriate tail. 1957 1958 Returns 1959 ------- 1960 nobs1 : float 1961 Number of observations in sample 1. 1962 """ 1963 # TODO: avoid possible circular import, check if needed 1964 from statsmodels.stats.power import normal_sample_size_one_tail 1965 1966 if alternative in ['two-sided', '2s']: 1967 alpha = alpha / 2 1968 1969 _, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=ratio, 1970 alpha=alpha, value=value) 1971 1972 nobs = normal_sample_size_one_tail(diff, power, alpha, std_null=std_null, 1973 std_alternative=std_alt) 1974 return nobs 1975 1976 1977def _score_confint_inversion(count1, nobs1, count2, nobs2, compare='diff', 1978 alpha=0.05, correction=True): 1979 """Compute score confidence interval by inverting score test 1980 1981 Parameters 1982 ---------- 1983 count1, nobs1 : 1984 Count and sample size for first sample. 1985 count2, nobs2 : 1986 Count and sample size for the second sample. 1987 compare : string in ['diff', 'ratio' 'odds-ratio'] 1988 If compare is `diff`, then the confidence interval is for 1989 diff = p1 - p2. 1990 If compare is `ratio`, then the confidence interval is for the 1991 risk ratio defined by ratio = p1 / p2. 1992 If compare is `odds-ratio`, then the confidence interval is for the 1993 odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2). 1994 alpha : float in interval (0,1) 1995 Significance level, e.g. 0.05, is the probability of a type I 1996 error, that is wrong rejections if the Null Hypothesis is true. 1997 correction : bool 1998 If correction is True (default), then the Miettinen and Nurminen 1999 small sample correction to the variance nobs / (nobs - 1) is used. 2000 Applies only if method='score'. 2001 2002 Returns 2003 ------- 2004 low : float 2005 Lower confidence bound. 2006 upp : float 2007 Upper confidence bound. 2008 """ 2009 2010 def func(v): 2011 r = test_proportions_2indep(count1, nobs1, count2, nobs2, 2012 value=v, compare=compare, method='score', 2013 correction=correction, 2014 alternative="two-sided") 2015 return r.pvalue - alpha 2016 2017 rt0 = test_proportions_2indep(count1, nobs1, count2, nobs2, 2018 value=0, compare=compare, method='score', 2019 correction=correction, 2020 alternative="two-sided") 2021 2022 # use default method to get starting values 2023 # this will not work if score confint becomes default 2024 # maybe use "wald" as alias that works for all compare statistics 2025 use_method = {"diff": "wald", "ratio": "log", "odds-ratio": "logit"} 2026 rci0 = confint_proportions_2indep(count1, nobs1, count2, nobs2, 2027 method=use_method[compare], 2028 compare=compare, alpha=alpha) 2029 2030 # Note diff might be negative 2031 ub = rci0[1] + np.abs(rci0[1]) * 0.5 2032 lb = rci0[0] - np.abs(rci0[0]) * 0.25 2033 if compare == 'diff': 2034 param = rt0.diff 2035 # 1 might not be the correct upper bound because 2036 # rootfinding is for the `diff` and not for a probability. 2037 ub = min(ub, 0.99999) 2038 elif compare == 'ratio': 2039 param = rt0.ratio 2040 ub *= 2 # add more buffer 2041 if compare == 'odds-ratio': 2042 param = rt0.odds_ratio 2043 2044 # root finding for confint bounds 2045 upp = optimize.brentq(func, param, ub) 2046 low = optimize.brentq(func, lb, param) 2047 return low, upp 2048 2049 2050def _confint_riskratio_koopman(count1, nobs1, count2, nobs2, alpha=0.05, 2051 correction=True): 2052 """score confidence interval for ratio or proportions, Koopman/Nam 2053 2054 signature not consistent with other functions 2055 2056 When correction is True, then the small sample correction nobs / (nobs - 1) 2057 by Miettinen/Nurminen is used. 2058 """ 2059 # The names below follow Nam 2060 x0, x1, n0, n1 = count2, count1, nobs2, nobs1 2061 x = x0 + x1 2062 n = n0 + n1 2063 z = stats.norm.isf(alpha / 2)**2 2064 if correction: 2065 # Mietinnen/Nurminen small sample correction 2066 z *= n / (n - 1) 2067 # z = stats.chi2.isf(alpha, 1) 2068 # equ 6 in Nam 1995 2069 a1 = n0 * (n0 * n * x1 + n1 * (n0 + x1) * z) 2070 a2 = - n0 * (n0 * n1 * x + 2 * n * x0 * x1 + n1 * (n0 + x0 + 2 * x1) * z) 2071 a3 = 2 * n0 * n1 * x0 * x + n * x0 * x0 * x1 + n0 * n1 * x * z 2072 a4 = - n1 * x0 * x0 * x 2073 2074 p_roots_ = np.sort(np.roots([a1, a2, a3, a4])) 2075 p_roots = p_roots_[:2][::-1] 2076 2077 # equ 5 2078 ci = (1 - (n1 - x1) * (1 - p_roots) / (x0 + n1 - n * p_roots)) / p_roots 2079 2080 res = Holder() 2081 res.confint = ci 2082 res._p_roots = p_roots_ # for unit tests, can be dropped 2083 return res 2084 2085 2086def _confint_riskratio_paired_nam(table, alpha=0.05): 2087 """confidence interval for marginal risk ratio for matched pairs 2088 2089 need full table 2090 2091 success fail marginal 2092 success x11 x10 x1. 2093 fail x01 x00 x0. 2094 marginal x.1 x.0 n 2095 2096 The confidence interval is for the ratio p1 / p0 where 2097 p1 = x1. / n and 2098 p0 - x.1 / n 2099 Todo: rename p1 to pa and p2 to pb, so we have a, b for treatment and 2100 0, 1 for success/failure 2101 2102 current namings follow Nam 2009 2103 2104 status 2105 testing: 2106 compared to example in Nam 2009 2107 internal polynomial coefficients in calculation correspond at around 2108 4 decimals 2109 confidence interval agrees only at 2 decimals 2110 2111 """ 2112 x11, x10, x01, x00 = np.ravel(table) 2113 n = np.sum(table) # nobs 2114 p10, p01 = x10 / n, x01 / n 2115 p1 = (x11 + x10) / n 2116 p0 = (x11 + x01) / n 2117 q00 = 1 - x00 / n 2118 2119 z2 = stats.norm.isf(alpha / 2)**2 2120 # z = stats.chi2.isf(alpha, 1) 2121 # before equ 3 in Nam 2009 2122 2123 g1 = (n * p0 + z2 / 2) * p0 2124 g2 = - (2 * n * p1 * p0 + z2 * q00) 2125 g3 = (n * p1 + z2 / 2) * p1 2126 2127 a0 = g1**2 - (z2 * p0 / 2)**2 2128 a1 = 2 * g1 * g2 2129 a2 = g2**2 + 2 * g1 * g3 + z2**2 * (p1 * p0 - 2 * p10 * p01) / 2 2130 a3 = 2 * g2 * g3 2131 a4 = g3**2 - (z2 * p1 / 2)**2 2132 2133 p_roots = np.sort(np.roots([a0, a1, a2, a3, a4])) 2134 # p_roots = np.sort(np.roots([1, a1 / a0, a2 / a0, a3 / a0, a4 / a0])) 2135 2136 ci = [p_roots.min(), p_roots.max()] 2137 res = Holder() 2138 res.confint = ci 2139 res.p = p1, p0 2140 res._p_roots = p_roots # for unit tests, can be dropped 2141 return res 2142