1# -*- coding: utf-8 -*-
2"""Tests and Confidence Intervals for Binomial Proportions
3
4Created on Fri Mar 01 00:23:07 2013
5
6Author: Josef Perktold
7License: BSD-3
8"""
9
10from statsmodels.compat.python import lzip
11import numpy as np
12from scipy import stats, optimize
13from sys import float_info
14
15from statsmodels.stats.base import AllPairsResults
16from statsmodels.tools.sm_exceptions import HypothesisTestWarning
17from statsmodels.stats.weightstats import _zstat_generic2
18from statsmodels.stats.base import HolderTuple
19from statsmodels.tools.testing import Holder
20
21
22def proportion_confint(count, nobs, alpha=0.05, method='normal'):
23    '''confidence interval for a binomial proportion
24
25    Parameters
26    ----------
27    count : int or array_array_like
28        number of successes, can be pandas Series or DataFrame
29    nobs : int
30        total number of trials
31    alpha : float in (0, 1)
32        significance level, default 0.05
33    method : {'normal', 'agresti_coull', 'beta', 'wilson', 'binom_test'}
34        default: 'normal'
35        method to use for confidence interval,
36        currently available methods :
37
38         - `normal` : asymptotic normal approximation
39         - `agresti_coull` : Agresti-Coull interval
40         - `beta` : Clopper-Pearson interval based on Beta distribution
41         - `wilson` : Wilson Score interval
42         - `jeffreys` : Jeffreys Bayesian Interval
43         - `binom_test` : experimental, inversion of binom_test
44
45    Returns
46    -------
47    ci_low, ci_upp : float, ndarray, or pandas Series or DataFrame
48        lower and upper confidence level with coverage (approximately) 1-alpha.
49        When a pandas object is returned, then the index is taken from the
50        `count`.
51
52    Notes
53    -----
54    Beta, the Clopper-Pearson exact interval has coverage at least 1-alpha,
55    but is in general conservative. Most of the other methods have average
56    coverage equal to 1-alpha, but will have smaller coverage in some cases.
57
58    The 'beta' and 'jeffreys' interval are central, they use alpha/2 in each
59    tail, and alpha is not adjusted at the boundaries. In the extreme case
60    when `count` is zero or equal to `nobs`, then the coverage will be only
61    1 - alpha/2 in the case of 'beta'.
62
63    The confidence intervals are clipped to be in the [0, 1] interval in the
64    case of 'normal' and 'agresti_coull'.
65
66    Method "binom_test" directly inverts the binomial test in scipy.stats.
67    which has discrete steps.
68
69    TODO: binom_test intervals raise an exception in small samples if one
70       interval bound is close to zero or one.
71
72    References
73    ----------
74    https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
75
76    Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001). "Interval
77        Estimation for a Binomial Proportion",
78        Statistical Science 16 (2): 101–133. doi:10.1214/ss/1009213286.
79
80    '''
81
82    pd_index = getattr(count, 'index', None)
83    if pd_index is not None and callable(pd_index):
84        # this rules out lists, lists have an index method
85        pd_index = None
86    count = np.asarray(count)
87    nobs = np.asarray(nobs)
88
89    q_ = count * 1. / nobs
90    alpha_2 = 0.5 * alpha
91
92    if method == 'normal':
93        std_ = np.sqrt(q_ * (1 - q_) / nobs)
94        dist = stats.norm.isf(alpha / 2.) * std_
95        ci_low = q_ - dist
96        ci_upp = q_ + dist
97
98    elif method == 'binom_test':
99        # inverting the binomial test
100        def func(qi):
101            return stats.binom_test(q_ * nobs, nobs, p=qi) - alpha
102        if count == 0:
103            ci_low = 0
104        else:
105            ci_low = optimize.brentq(func, float_info.min, q_)
106        if count == nobs:
107            ci_upp = 1
108        else:
109            ci_upp = optimize.brentq(func, q_, 1. - float_info.epsilon)
110
111    elif method == 'beta':
112        ci_low = stats.beta.ppf(alpha_2, count, nobs - count + 1)
113        ci_upp = stats.beta.isf(alpha_2, count + 1, nobs - count)
114
115        if np.ndim(ci_low) > 0:
116            ci_low[q_ == 0] = 0
117            ci_upp[q_ == 1] = 1
118        else:
119            ci_low = ci_low if (q_ != 0) else 0
120            ci_upp = ci_upp if (q_ != 1) else 1
121
122    elif method == 'agresti_coull':
123        crit = stats.norm.isf(alpha / 2.)
124        nobs_c = nobs + crit**2
125        q_c = (count + crit**2 / 2.) / nobs_c
126        std_c = np.sqrt(q_c * (1. - q_c) / nobs_c)
127        dist = crit * std_c
128        ci_low = q_c - dist
129        ci_upp = q_c + dist
130
131    elif method == 'wilson':
132        crit = stats.norm.isf(alpha / 2.)
133        crit2 = crit**2
134        denom = 1 + crit2 / nobs
135        center = (q_ + crit2 / (2 * nobs)) / denom
136        dist = crit * np.sqrt(q_ * (1. - q_) / nobs + crit2 / (4. * nobs**2))
137        dist /= denom
138        ci_low = center - dist
139        ci_upp = center + dist
140
141    # method adjusted to be more forgiving of misspellings or incorrect option name
142    elif method[:4] == 'jeff':
143        ci_low, ci_upp = stats.beta.interval(1 - alpha, count + 0.5,
144                                             nobs - count + 0.5)
145
146    else:
147        raise NotImplementedError('method "%s" is not available' % method)
148
149    if method in ['normal', 'agresti_coull']:
150        ci_low = np.clip(ci_low, 0, 1)
151        ci_upp = np.clip(ci_upp, 0, 1)
152    if pd_index is not None and np.ndim(ci_low) > 0:
153        import pandas as pd
154        if np.ndim(ci_low) == 1:
155            ci_low = pd.Series(ci_low, index=pd_index)
156            ci_upp = pd.Series(ci_upp, index=pd_index)
157        if np.ndim(ci_low) == 2:
158            ci_low = pd.DataFrame(ci_low, index=pd_index)
159            ci_upp = pd.DataFrame(ci_upp, index=pd_index)
160
161    return ci_low, ci_upp
162
163
164def multinomial_proportions_confint(counts, alpha=0.05, method='goodman'):
165    '''Confidence intervals for multinomial proportions.
166
167    Parameters
168    ----------
169    counts : array_like of int, 1-D
170        Number of observations in each category.
171    alpha : float in (0, 1), optional
172        Significance level, defaults to 0.05.
173    method : {'goodman', 'sison-glaz'}, optional
174        Method to use to compute the confidence intervals; available methods
175        are:
176
177         - `goodman`: based on a chi-squared approximation, valid if all
178           values in `counts` are greater or equal to 5 [2]_
179         - `sison-glaz`: less conservative than `goodman`, but only valid if
180           `counts` has 7 or more categories (``len(counts) >= 7``) [3]_
181
182    Returns
183    -------
184    confint : ndarray, 2-D
185        Array of [lower, upper] confidence levels for each category, such that
186        overall coverage is (approximately) `1-alpha`.
187
188    Raises
189    ------
190    ValueError
191        If `alpha` is not in `(0, 1)` (bounds excluded), or if the values in
192        `counts` are not all positive or null.
193    NotImplementedError
194        If `method` is not kown.
195    Exception
196        When ``method == 'sison-glaz'``, if for some reason `c` cannot be
197        computed; this signals a bug and should be reported.
198
199    Notes
200    -----
201    The `goodman` method [2]_ is based on approximating a statistic based on
202    the multinomial as a chi-squared random variable. The usual recommendation
203    is that this is valid if all the values in `counts` are greater than or
204    equal to 5. There is no condition on the number of categories for this
205    method.
206
207    The `sison-glaz` method [3]_ approximates the multinomial probabilities,
208    and evaluates that with a maximum-likelihood estimator. The first
209    approximation is an Edgeworth expansion that converges when the number of
210    categories goes to infinity, and the maximum-likelihood estimator converges
211    when the number of observations (``sum(counts)``) goes to infinity. In
212    their paper, Sison & Glaz demo their method with at least 7 categories, so
213    ``len(counts) >= 7`` with all values in `counts` at or above 5 can be used
214    as a rule of thumb for the validity of this method. This method is less
215    conservative than the `goodman` method (i.e. it will yield confidence
216    intervals closer to the desired significance level), but produces
217    confidence intervals of uniform width over all categories (except when the
218    intervals reach 0 or 1, in which case they are truncated), which makes it
219    most useful when proportions are of similar magnitude.
220
221    Aside from the original sources ([1]_, [2]_, and [3]_), the implementation
222    uses the formulas (though not the code) presented in [4]_ and [5]_.
223
224    References
225    ----------
226    .. [1] Levin, Bruce, "A representation for multinomial cumulative
227           distribution functions," The Annals of Statistics, Vol. 9, No. 5,
228           1981, pp. 1123-1126.
229
230    .. [2] Goodman, L.A., "On simultaneous confidence intervals for multinomial
231           proportions," Technometrics, Vol. 7, No. 2, 1965, pp. 247-254.
232
233    .. [3] Sison, Cristina P., and Joseph Glaz, "Simultaneous Confidence
234           Intervals and Sample Size Determination for Multinomial
235           Proportions," Journal of the American Statistical Association,
236           Vol. 90, No. 429, 1995, pp. 366-369.
237
238    .. [4] May, Warren L., and William D. Johnson, "A SAS® macro for
239           constructing simultaneous confidence intervals  for multinomial
240           proportions," Computer methods and programs in Biomedicine, Vol. 53,
241           No. 3, 1997, pp. 153-162.
242
243    .. [5] May, Warren L., and William D. Johnson, "Constructing two-sided
244           simultaneous confidence intervals for multinomial proportions for
245           small counts in a large number of cells," Journal of Statistical
246           Software, Vol. 5, No. 6, 2000, pp. 1-24.
247    '''
248    if alpha <= 0 or alpha >= 1:
249        raise ValueError('alpha must be in (0, 1), bounds excluded')
250    counts = np.array(counts, dtype=float)
251    if (counts < 0).any():
252        raise ValueError('counts must be >= 0')
253
254    n = counts.sum()
255    k = len(counts)
256    proportions = counts / n
257    if method == 'goodman':
258        chi2 = stats.chi2.ppf(1 - alpha / k, 1)
259        delta = chi2 ** 2 + (4 * n * proportions * chi2 * (1 - proportions))
260        region = ((2 * n * proportions + chi2 +
261                   np.array([- np.sqrt(delta), np.sqrt(delta)])) /
262                  (2 * (chi2 + n))).T
263    elif method[:5] == 'sison':  # We accept any name starting with 'sison'
264        # Define a few functions we'll use a lot.
265        def poisson_interval(interval, p):
266            """Compute P(b <= Z <= a) where Z ~ Poisson(p) and
267            `interval = (b, a)`."""
268            b, a = interval
269            prob = stats.poisson.cdf(a, p) - stats.poisson.cdf(b - 1, p)
270            return prob
271
272        def truncated_poisson_factorial_moment(interval, r, p):
273            """Compute mu_r, the r-th factorial moment of a poisson random
274            variable of parameter `p` truncated to `interval = (b, a)`."""
275            b, a = interval
276            return p ** r * (1 - ((poisson_interval((a - r + 1, a), p) -
277                                   poisson_interval((b - r, b - 1), p)) /
278                                  poisson_interval((b, a), p)))
279
280        def edgeworth(intervals):
281            """Compute the Edgeworth expansion term of Sison & Glaz's formula
282            (1) (approximated probability for multinomial proportions in a
283            given box)."""
284            # Compute means and central moments of the truncated poisson
285            # variables.
286            mu_r1, mu_r2, mu_r3, mu_r4 = [
287                np.array([truncated_poisson_factorial_moment(interval, r, p)
288                          for (interval, p) in zip(intervals, counts)])
289                for r in range(1, 5)
290            ]
291            mu = mu_r1
292            mu2 = mu_r2 + mu - mu ** 2
293            mu3 = mu_r3 + mu_r2 * (3 - 3 * mu) + mu - 3 * mu ** 2 + 2 * mu ** 3
294            mu4 = (mu_r4 + mu_r3 * (6 - 4 * mu) +
295                   mu_r2 * (7 - 12 * mu + 6 * mu ** 2) +
296                   mu - 4 * mu ** 2 + 6 * mu ** 3 - 3 * mu ** 4)
297
298            # Compute expansion factors, gamma_1 and gamma_2.
299            g1 = mu3.sum() / mu2.sum() ** 1.5
300            g2 = (mu4.sum() - 3 * (mu2 ** 2).sum()) / mu2.sum() ** 2
301
302            # Compute the expansion itself.
303            x = (n - mu.sum()) / np.sqrt(mu2.sum())
304            phi = np.exp(- x ** 2 / 2) / np.sqrt(2 * np.pi)
305            H3 = x ** 3 - 3 * x
306            H4 = x ** 4 - 6 * x ** 2 + 3
307            H6 = x ** 6 - 15 * x ** 4 + 45 * x ** 2 - 15
308            f = phi * (1 + g1 * H3 / 6 + g2 * H4 / 24 + g1 ** 2 * H6 / 72)
309            return f / np.sqrt(mu2.sum())
310
311
312        def approximated_multinomial_interval(intervals):
313            """Compute approximated probability for Multinomial(n, proportions)
314            to be in `intervals` (Sison & Glaz's formula (1))."""
315            return np.exp(
316                np.sum(np.log([poisson_interval(interval, p)
317                               for (interval, p) in zip(intervals, counts)])) +
318                np.log(edgeworth(intervals)) -
319                np.log(stats.poisson._pmf(n, n))
320            )
321
322        def nu(c):
323            """Compute interval coverage for a given `c` (Sison & Glaz's
324            formula (7))."""
325            return approximated_multinomial_interval(
326                [(np.maximum(count - c, 0), np.minimum(count + c, n))
327                 for count in counts])
328
329        # Find the value of `c` that will give us the confidence intervals
330        # (solving nu(c) <= 1 - alpha < nu(c + 1).
331        c = 1.0
332        nuc = nu(c)
333        nucp1 = nu(c + 1)
334        while not (nuc <= (1 - alpha) < nucp1):
335            if c > n:
336                raise Exception("Couldn't find a value for `c` that "
337                                "solves nu(c) <= 1 - alpha < nu(c + 1)")
338            c += 1
339            nuc = nucp1
340            nucp1 = nu(c + 1)
341
342        # Compute gamma and the corresponding confidence intervals.
343        g = (1 - alpha - nuc) / (nucp1 - nuc)
344        ci_lower = np.maximum(proportions - c / n, 0)
345        ci_upper = np.minimum(proportions + (c + 2 * g) / n, 1)
346        region = np.array([ci_lower, ci_upper]).T
347    else:
348        raise NotImplementedError('method "%s" is not available' % method)
349    return region
350
351
352def samplesize_confint_proportion(proportion, half_length, alpha=0.05,
353                                  method='normal'):
354    '''find sample size to get desired confidence interval length
355
356    Parameters
357    ----------
358    proportion : float in (0, 1)
359        proportion or quantile
360    half_length : float in (0, 1)
361        desired half length of the confidence interval
362    alpha : float in (0, 1)
363        significance level, default 0.05,
364        coverage of the two-sided interval is (approximately) ``1 - alpha``
365    method : str in ['normal']
366        method to use for confidence interval,
367        currently only normal approximation
368
369    Returns
370    -------
371    n : float
372        sample size to get the desired half length of the confidence interval
373
374    Notes
375    -----
376    this is mainly to store the formula.
377    possible application: number of replications in bootstrap samples
378
379    '''
380    q_ = proportion
381    if method == 'normal':
382        n = q_ * (1 - q_) / (half_length / stats.norm.isf(alpha / 2.))**2
383    else:
384        raise NotImplementedError('only "normal" is available')
385
386    return n
387
388
389def proportion_effectsize(prop1, prop2, method='normal'):
390    '''
391    Effect size for a test comparing two proportions
392
393    for use in power function
394
395    Parameters
396    ----------
397    prop1, prop2 : float or array_like
398        The proportion value(s).
399
400    Returns
401    -------
402    es : float or ndarray
403        effect size for (transformed) prop1 - prop2
404
405    Notes
406    -----
407    only method='normal' is implemented to match pwr.p2.test
408    see http://www.statmethods.net/stats/power.html
409
410    Effect size for `normal` is defined as ::
411
412        2 * (arcsin(sqrt(prop1)) - arcsin(sqrt(prop2)))
413
414    I think other conversions to normality can be used, but I need to check.
415
416    Examples
417    --------
418    >>> import statsmodels.api as sm
419    >>> sm.stats.proportion_effectsize(0.5, 0.4)
420    0.20135792079033088
421    >>> sm.stats.proportion_effectsize([0.3, 0.4, 0.5], 0.4)
422    array([-0.21015893,  0.        ,  0.20135792])
423
424    '''
425    if method != 'normal':
426        raise ValueError('only "normal" is implemented')
427
428    es = 2 * (np.arcsin(np.sqrt(prop1)) - np.arcsin(np.sqrt(prop2)))
429    return es
430
431
432def std_prop(prop, nobs):
433    '''standard error for the estimate of a proportion
434
435    This is just ``np.sqrt(p * (1. - p) / nobs)``
436
437    Parameters
438    ----------
439    prop : array_like
440        proportion
441    nobs : int, array_like
442        number of observations
443
444    Returns
445    -------
446    std : array_like
447        standard error for a proportion of nobs independent observations
448    '''
449    return np.sqrt(prop * (1. - prop) / nobs)
450
451
452def _std_diff_prop(p1, p2, ratio=1):
453    return np.sqrt(p1 * (1 - p1) + p2 * (1 - p2) / ratio)
454
455
456def _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt,
457                 alpha=0.05, discrete=True, dist='norm', nobs=None,
458                 continuity=0, critval_continuity=0):
459    '''Generic statistical power function for normal based equivalence test
460
461    This includes options to adjust the normal approximation and can use
462    the binomial to evaluate the probability of the rejection region
463
464    see power_ztost_prob for a description of the options
465    '''
466    # TODO: refactor structure, separate norm and binom better
467    if not isinstance(continuity, tuple):
468        continuity = (continuity, continuity)
469    crit = stats.norm.isf(alpha)
470    k_low = mean_low + np.sqrt(var_low) * crit
471    k_upp = mean_upp - np.sqrt(var_upp) * crit
472    if discrete or dist == 'binom':
473        k_low = np.ceil(k_low * nobs + 0.5 * critval_continuity)
474        k_upp = np.trunc(k_upp * nobs - 0.5 * critval_continuity)
475        if dist == 'norm':
476            #need proportion
477            k_low = (k_low) * 1. / nobs #-1 to match PASS
478            k_upp = k_upp * 1. / nobs
479#    else:
480#        if dist == 'binom':
481#            #need counts
482#            k_low *= nobs
483#            k_upp *= nobs
484    #print mean_low, np.sqrt(var_low), crit, var_low
485    #print mean_upp, np.sqrt(var_upp), crit, var_upp
486    if np.any(k_low > k_upp):   #vectorize
487        import warnings
488        warnings.warn("no overlap, power is zero", HypothesisTestWarning)
489    std_alt = np.sqrt(var_alt)
490    z_low = (k_low - mean_alt - continuity[0] * 0.5 / nobs) / std_alt
491    z_upp = (k_upp - mean_alt + continuity[1] * 0.5 / nobs) / std_alt
492    if dist == 'norm':
493        power = stats.norm.cdf(z_upp) - stats.norm.cdf(z_low)
494    elif dist == 'binom':
495        power = (stats.binom.cdf(k_upp, nobs, mean_alt) -
496                     stats.binom.cdf(k_low-1, nobs, mean_alt))
497    return power, (k_low, k_upp, z_low, z_upp)
498
499
500def binom_tost(count, nobs, low, upp):
501    '''exact TOST test for one proportion using binomial distribution
502
503    Parameters
504    ----------
505    count : {int, array_like}
506        the number of successes in nobs trials.
507    nobs : int
508        the number of trials or observations.
509    low, upp : floats
510        lower and upper limit of equivalence region
511
512    Returns
513    -------
514    pvalue : float
515        p-value of equivalence test
516    pval_low, pval_upp : floats
517        p-values of lower and upper one-sided tests
518
519    '''
520    # binom_test_stat only returns pval
521    tt1 = binom_test(count, nobs, alternative='larger', prop=low)
522    tt2 = binom_test(count, nobs, alternative='smaller', prop=upp)
523    return np.maximum(tt1, tt2), tt1, tt2,
524
525
526def binom_tost_reject_interval(low, upp, nobs, alpha=0.05):
527    '''rejection region for binomial TOST
528
529    The interval includes the end points,
530    `reject` if and only if `r_low <= x <= r_upp`.
531
532    The interval might be empty with `r_upp < r_low`.
533
534    Parameters
535    ----------
536    low, upp : floats
537        lower and upper limit of equivalence region
538    nobs : int
539        the number of trials or observations.
540
541    Returns
542    -------
543    x_low, x_upp : float
544        lower and upper bound of rejection region
545
546    '''
547    x_low = stats.binom.isf(alpha, nobs, low) + 1
548    x_upp = stats.binom.ppf(alpha, nobs, upp) - 1
549    return x_low, x_upp
550
551
552def binom_test_reject_interval(value, nobs, alpha=0.05, alternative='two-sided'):
553    '''rejection region for binomial test for one sample proportion
554
555    The interval includes the end points of the rejection region.
556
557    Parameters
558    ----------
559    value : float
560        proportion under the Null hypothesis
561    nobs : int
562        the number of trials or observations.
563
564
565    Returns
566    -------
567    x_low, x_upp : float
568        lower and upper bound of rejection region
569
570
571    '''
572    if alternative in ['2s', 'two-sided']:
573        alternative = '2s'  # normalize alternative name
574        alpha = alpha / 2
575
576    if alternative in ['2s', 'smaller']:
577        x_low = stats.binom.ppf(alpha, nobs, value) - 1
578    else:
579        x_low = 0
580    if alternative in ['2s', 'larger']:
581        x_upp = stats.binom.isf(alpha, nobs, value) + 1
582    else :
583        x_upp = nobs
584
585    return x_low, x_upp
586
587
588def binom_test(count, nobs, prop=0.5, alternative='two-sided'):
589    '''Perform a test that the probability of success is p.
590
591    This is an exact, two-sided test of the null hypothesis
592    that the probability of success in a Bernoulli experiment
593    is `p`.
594
595    Parameters
596    ----------
597    count : {int, array_like}
598        the number of successes in nobs trials.
599    nobs : int
600        the number of trials or observations.
601    prop : float, optional
602        The probability of success under the null hypothesis,
603        `0 <= prop <= 1`. The default value is `prop = 0.5`
604    alternative : str in ['two-sided', 'smaller', 'larger']
605        alternative hypothesis, which can be two-sided or either one of the
606        one-sided tests.
607
608    Returns
609    -------
610    p-value : float
611        The p-value of the hypothesis test
612
613    Notes
614    -----
615    This uses scipy.stats.binom_test for the two-sided alternative.
616
617    '''
618
619    if np.any(prop > 1.0) or np.any(prop < 0.0):
620        raise ValueError("p must be in range [0,1]")
621    if alternative in ['2s', 'two-sided']:
622        pval = stats.binom_test(count, n=nobs, p=prop)
623    elif alternative in ['l', 'larger']:
624        pval = stats.binom.sf(count-1, nobs, prop)
625    elif alternative in ['s', 'smaller']:
626        pval = stats.binom.cdf(count, nobs, prop)
627    else:
628        raise ValueError('alternative not recognized\n'
629                         'should be two-sided, larger or smaller')
630    return pval
631
632
633def power_binom_tost(low, upp, nobs, p_alt=None, alpha=0.05):
634    if p_alt is None:
635        p_alt = 0.5 * (low + upp)
636    x_low, x_upp = binom_tost_reject_interval(low, upp, nobs, alpha=alpha)
637    power = (stats.binom.cdf(x_upp, nobs, p_alt) -
638                     stats.binom.cdf(x_low-1, nobs, p_alt))
639    return power
640
641
642def power_ztost_prop(low, upp, nobs, p_alt, alpha=0.05, dist='norm',
643                     variance_prop=None, discrete=True, continuity=0,
644                     critval_continuity=0):
645    '''Power of proportions equivalence test based on normal distribution
646
647    Parameters
648    ----------
649    low, upp : floats
650        lower and upper limit of equivalence region
651    nobs : int
652        number of observations
653    p_alt : float in (0,1)
654        proportion under the alternative
655    alpha : float in (0,1)
656        significance level of the test
657    dist : str in ['norm', 'binom']
658        This defines the distribution to evaluate the power of the test. The
659        critical values of the TOST test are always based on the normal
660        approximation, but the distribution for the power can be either the
661        normal (default) or the binomial (exact) distribution.
662    variance_prop : None or float in (0,1)
663        If this is None, then the variances for the two one sided tests are
664        based on the proportions equal to the equivalence limits.
665        If variance_prop is given, then it is used to calculate the variance
666        for the TOST statistics. If this is based on an sample, then the
667        estimated proportion can be used.
668    discrete : bool
669        If true, then the critical values of the rejection region are converted
670        to integers. If dist is "binom", this is automatically assumed.
671        If discrete is false, then the TOST critical values are used as
672        floating point numbers, and the power is calculated based on the
673        rejection region that is not discretized.
674    continuity : bool or float
675        adjust the rejection region for the normal power probability. This has
676        and effect only if ``dist='norm'``
677    critval_continuity : bool or float
678        If this is non-zero, then the critical values of the tost rejection
679        region are adjusted before converting to integers. This affects both
680        distributions, ``dist='norm'`` and ``dist='binom'``.
681
682    Returns
683    -------
684    power : float
685        statistical power of the equivalence test.
686    (k_low, k_upp, z_low, z_upp) : tuple of floats
687        critical limits in intermediate steps
688        temporary return, will be changed
689
690    Notes
691    -----
692    In small samples the power for the ``discrete`` version, has a sawtooth
693    pattern as a function of the number of observations. As a consequence,
694    small changes in the number of observations or in the normal approximation
695    can have a large effect on the power.
696
697    ``continuity`` and ``critval_continuity`` are added to match some results
698    of PASS, and are mainly to investigate the sensitivity of the ztost power
699    to small changes in the rejection region. From my interpretation of the
700    equations in the SAS manual, both are zero in SAS.
701
702    works vectorized
703
704    **verification:**
705
706    The ``dist='binom'`` results match PASS,
707    The ``dist='norm'`` results look reasonable, but no benchmark is available.
708
709    References
710    ----------
711    SAS Manual: Chapter 68: The Power Procedure, Computational Resources
712    PASS Chapter 110: Equivalence Tests for One Proportion.
713
714    '''
715    mean_low = low
716    var_low = std_prop(low, nobs)**2
717    mean_upp = upp
718    var_upp = std_prop(upp, nobs)**2
719    mean_alt = p_alt
720    var_alt = std_prop(p_alt, nobs)**2
721    if variance_prop is not None:
722        var_low = var_upp = std_prop(variance_prop, nobs)**2
723    power = _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt,
724                 alpha=alpha, discrete=discrete, dist=dist, nobs=nobs,
725                 continuity=continuity, critval_continuity=critval_continuity)
726    return np.maximum(power[0], 0), power[1:]
727
728
729def _table_proportion(count, nobs):
730    '''create a k by 2 contingency table for proportion
731
732    helper function for proportions_chisquare
733
734    Parameters
735    ----------
736    count : {int, array_like}
737        the number of successes in nobs trials.
738    nobs : int
739        the number of trials or observations.
740
741    Returns
742    -------
743    table : ndarray
744        (k, 2) contingency table
745
746    Notes
747    -----
748    recent scipy has more elaborate contingency table functions
749
750    '''
751    count = np.asarray(count)
752    dt = np.promote_types(count.dtype, np.float64)
753    count = np.asarray(count, dtype=dt)
754    table = np.column_stack((count, nobs - count))
755    expected = table.sum(0) * table.sum(1)[:, None] * 1. / table.sum()
756    n_rows = table.shape[0]
757    return table, expected, n_rows
758
759
760def proportions_ztest(count, nobs, value=None, alternative='two-sided',
761                      prop_var=False):
762    """
763    Test for proportions based on normal (z) test
764
765    Parameters
766    ----------
767    count : {int, array_like}
768        the number of successes in nobs trials. If this is array_like, then
769        the assumption is that this represents the number of successes for
770        each independent sample
771    nobs : {int, array_like}
772        the number of trials or observations, with the same length as
773        count.
774    value : float, array_like or None, optional
775        This is the value of the null hypothesis equal to the proportion in the
776        case of a one sample test. In the case of a two-sample test, the
777        null hypothesis is that prop[0] - prop[1] = value, where prop is the
778        proportion in the two samples. If not provided value = 0 and the null
779        is prop[0] = prop[1]
780    alternative : str in ['two-sided', 'smaller', 'larger']
781        The alternative hypothesis can be either two-sided or one of the one-
782        sided tests, smaller means that the alternative hypothesis is
783        ``prop < value`` and larger means ``prop > value``. In the two sample
784        test, smaller means that the alternative hypothesis is ``p1 < p2`` and
785        larger means ``p1 > p2`` where ``p1`` is the proportion of the first
786        sample and ``p2`` of the second one.
787    prop_var : False or float in (0, 1)
788        If prop_var is false, then the variance of the proportion estimate is
789        calculated based on the sample proportion. Alternatively, a proportion
790        can be specified to calculate this variance. Common use case is to
791        use the proportion under the Null hypothesis to specify the variance
792        of the proportion estimate.
793
794    Returns
795    -------
796    zstat : float
797        test statistic for the z-test
798    p-value : float
799        p-value for the z-test
800
801    Examples
802    --------
803    >>> count = 5
804    >>> nobs = 83
805    >>> value = .05
806    >>> stat, pval = proportions_ztest(count, nobs, value)
807    >>> print('{0:0.3f}'.format(pval))
808    0.695
809
810    >>> import numpy as np
811    >>> from statsmodels.stats.proportion import proportions_ztest
812    >>> count = np.array([5, 12])
813    >>> nobs = np.array([83, 99])
814    >>> stat, pval = proportions_ztest(count, nobs)
815    >>> print('{0:0.3f}'.format(pval))
816    0.159
817
818    Notes
819    -----
820    This uses a simple normal test for proportions. It should be the same as
821    running the mean z-test on the data encoded 1 for event and 0 for no event
822    so that the sum corresponds to the count.
823
824    In the one and two sample cases with two-sided alternative, this test
825    produces the same p-value as ``proportions_chisquare``, since the
826    chisquare is the distribution of the square of a standard normal
827    distribution.
828    """
829    # TODO: verify that this really holds
830    # TODO: add continuity correction or other improvements for small samples
831    # TODO: change options similar to propotion_ztost ?
832
833    count = np.asarray(count)
834    nobs = np.asarray(nobs)
835
836    if nobs.size == 1:
837        nobs = nobs * np.ones_like(count)
838
839    prop = count * 1. / nobs
840    k_sample = np.size(prop)
841    if value is None:
842        if k_sample == 1:
843            raise ValueError('value must be provided for a 1-sample test')
844        value = 0
845    if k_sample == 1:
846        diff = prop - value
847    elif k_sample == 2:
848        diff = prop[0] - prop[1] - value
849    else:
850        msg = 'more than two samples are not implemented yet'
851        raise NotImplementedError(msg)
852
853    p_pooled = np.sum(count) * 1. / np.sum(nobs)
854
855    nobs_fact = np.sum(1. / nobs)
856    if prop_var:
857        p_pooled = prop_var
858    var_ = p_pooled * (1 - p_pooled) * nobs_fact
859    std_diff = np.sqrt(var_)
860    from statsmodels.stats.weightstats import _zstat_generic2
861    return _zstat_generic2(diff, std_diff, alternative)
862
863
864def proportions_ztost(count, nobs, low, upp, prop_var='sample'):
865    '''Equivalence test based on normal distribution
866
867    Parameters
868    ----------
869    count : {int, array_like}
870        the number of successes in nobs trials. If this is array_like, then
871        the assumption is that this represents the number of successes for
872        each independent sample
873    nobs : int
874        the number of trials or observations, with the same length as
875        count.
876    low, upp : float
877        equivalence interval low < prop1 - prop2 < upp
878    prop_var : str or float in (0, 1)
879        prop_var determines which proportion is used for the calculation
880        of the standard deviation of the proportion estimate
881        The available options for string are 'sample' (default), 'null' and
882        'limits'. If prop_var is a float, then it is used directly.
883
884    Returns
885    -------
886    pvalue : float
887        pvalue of the non-equivalence test
888    t1, pv1 : tuple of floats
889        test statistic and pvalue for lower threshold test
890    t2, pv2 : tuple of floats
891        test statistic and pvalue for upper threshold test
892
893    Notes
894    -----
895    checked only for 1 sample case
896
897    '''
898    if prop_var == 'limits':
899        prop_var_low = low
900        prop_var_upp = upp
901    elif prop_var == 'sample':
902        prop_var_low = prop_var_upp = False  #ztest uses sample
903    elif prop_var == 'null':
904        prop_var_low = prop_var_upp = 0.5 * (low + upp)
905    elif np.isreal(prop_var):
906        prop_var_low = prop_var_upp = prop_var
907
908    tt1 = proportions_ztest(count, nobs, alternative='larger',
909                            prop_var=prop_var_low, value=low)
910    tt2 = proportions_ztest(count, nobs, alternative='smaller',
911                            prop_var=prop_var_upp, value=upp)
912    return np.maximum(tt1[1], tt2[1]), tt1, tt2,
913
914
915def proportions_chisquare(count, nobs, value=None):
916    '''test for proportions based on chisquare test
917
918    Parameters
919    ----------
920    count : {int, array_like}
921        the number of successes in nobs trials. If this is array_like, then
922        the assumption is that this represents the number of successes for
923        each independent sample
924    nobs : int
925        the number of trials or observations, with the same length as
926        count.
927    value : None or float or array_like
928
929    Returns
930    -------
931    chi2stat : float
932        test statistic for the chisquare test
933    p-value : float
934        p-value for the chisquare test
935    (table, expected)
936        table is a (k, 2) contingency table, ``expected`` is the corresponding
937        table of counts that are expected under independence with given
938        margins
939
940    Notes
941    -----
942    Recent version of scipy.stats have a chisquare test for independence in
943    contingency tables.
944
945    This function provides a similar interface to chisquare tests as
946    ``prop.test`` in R, however without the option for Yates continuity
947    correction.
948
949    count can be the count for the number of events for a single proportion,
950    or the counts for several independent proportions. If value is given, then
951    all proportions are jointly tested against this value. If value is not
952    given and count and nobs are not scalar, then the null hypothesis is
953    that all samples have the same proportion.
954
955    '''
956    nobs = np.atleast_1d(nobs)
957    table, expected, n_rows = _table_proportion(count, nobs)
958    if value is not None:
959        expected = np.column_stack((nobs * value, nobs * (1 - value)))
960        ddof = n_rows - 1
961    else:
962        ddof = n_rows
963
964    #print table, expected
965    chi2stat, pval = stats.chisquare(table.ravel(), expected.ravel(),
966                                     ddof=ddof)
967    return chi2stat, pval, (table, expected)
968
969
970def proportions_chisquare_allpairs(count, nobs, multitest_method='hs'):
971    '''chisquare test of proportions for all pairs of k samples
972
973    Performs a chisquare test for proportions for all pairwise comparisons.
974    The alternative is two-sided
975
976    Parameters
977    ----------
978    count : {int, array_like}
979        the number of successes in nobs trials.
980    nobs : int
981        the number of trials or observations.
982    prop : float, optional
983        The probability of success under the null hypothesis,
984        `0 <= prop <= 1`. The default value is `prop = 0.5`
985    multitest_method : str
986        This chooses the method for the multiple testing p-value correction,
987        that is used as default in the results.
988        It can be any method that is available in  ``multipletesting``.
989        The default is Holm-Sidak 'hs'.
990
991    Returns
992    -------
993    result : AllPairsResults instance
994        The returned results instance has several statistics, such as p-values,
995        attached, and additional methods for using a non-default
996        ``multitest_method``.
997
998    Notes
999    -----
1000    Yates continuity correction is not available.
1001    '''
1002    #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1)))
1003    all_pairs = lzip(*np.triu_indices(len(count), 1))
1004    pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)])[1]
1005               for pair in all_pairs]
1006    return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method)
1007
1008
1009def proportions_chisquare_pairscontrol(count, nobs, value=None,
1010                               multitest_method='hs', alternative='two-sided'):
1011    '''chisquare test of proportions for pairs of k samples compared to control
1012
1013    Performs a chisquare test for proportions for pairwise comparisons with a
1014    control (Dunnet's test). The control is assumed to be the first element
1015    of ``count`` and ``nobs``. The alternative is two-sided, larger or
1016    smaller.
1017
1018    Parameters
1019    ----------
1020    count : {int, array_like}
1021        the number of successes in nobs trials.
1022    nobs : int
1023        the number of trials or observations.
1024    prop : float, optional
1025        The probability of success under the null hypothesis,
1026        `0 <= prop <= 1`. The default value is `prop = 0.5`
1027    multitest_method : str
1028        This chooses the method for the multiple testing p-value correction,
1029        that is used as default in the results.
1030        It can be any method that is available in  ``multipletesting``.
1031        The default is Holm-Sidak 'hs'.
1032    alternative : str in ['two-sided', 'smaller', 'larger']
1033        alternative hypothesis, which can be two-sided or either one of the
1034        one-sided tests.
1035
1036    Returns
1037    -------
1038    result : AllPairsResults instance
1039        The returned results instance has several statistics, such as p-values,
1040        attached, and additional methods for using a non-default
1041        ``multitest_method``.
1042
1043
1044    Notes
1045    -----
1046    Yates continuity correction is not available.
1047
1048    ``value`` and ``alternative`` options are not yet implemented.
1049
1050    '''
1051    if (value is not None) or (alternative not in ['two-sided', '2s']):
1052        raise NotImplementedError
1053    #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1)))
1054    all_pairs = [(0, k) for k in range(1, len(count))]
1055    pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)],
1056                                   #alternative=alternative)[1]
1057                                   )[1]
1058               for pair in all_pairs]
1059    return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method)
1060
1061
1062def confint_proportions_2indep(count1, nobs1, count2, nobs2, method=None,
1063                               compare='diff', alpha=0.05, correction=True):
1064    """Confidence intervals for comparing two independent proportions
1065
1066    This assumes that we have two independent binomial samples.
1067
1068    Parameters
1069    ----------
1070    count1, nobs1 : float
1071        Count and sample size for first sample.
1072    count2, nobs2 : float
1073        Count and sample size for the second sample.
1074    method : str
1075        Method for computing confidence interval. If method is None, then a
1076        default method is used. The default might change as more methods are
1077        added.
1078
1079        diff:
1080         - 'wald',
1081         - 'agresti-caffo'
1082         - 'newcomb' (default)
1083         - 'score'
1084
1085        ratio:
1086         - 'log'
1087         - 'log-adjusted' (default)
1088         - 'score'
1089
1090        odds-ratio:
1091         - 'logit'
1092         - 'logit-adjusted' (default)
1093         - 'score'
1094
1095    compare : string in ['diff', 'ratio' 'odds-ratio']
1096        If compare is diff, then the confidence interval is for diff = p1 - p2.
1097        If compare is ratio, then the confidence interval is for the risk ratio
1098        defined by ratio = p1 / p2.
1099        If compare is odds-ratio, then the confidence interval is for the
1100        odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2).
1101    alpha : float
1102        Significance level for the confidence interval, default is 0.05.
1103        The nominal coverage probability is 1 - alpha.
1104
1105    Returns
1106    -------
1107    low, upp
1108
1109    Notes
1110    -----
1111    Status: experimental, API and defaults might still change.
1112        more ``methods`` will be added.
1113
1114    References
1115    ----------
1116    .. [1] Fagerland, Morten W., Stian Lydersen, and Petter Laake. 2015.
1117       “Recommended Confidence Intervals for Two Independent Binomial
1118       Proportions.” Statistical Methods in Medical Research 24 (2): 224–54.
1119       https://doi.org/10.1177/0962280211415469.
1120    .. [2] Koopman, P. A. R. 1984. “Confidence Intervals for the Ratio of Two
1121       Binomial Proportions.” Biometrics 40 (2): 513–17.
1122       https://doi.org/10.2307/2531405.
1123    .. [3] Miettinen, Olli, and Markku Nurminen. "Comparative analysis of two
1124       rates." Statistics in medicine 4, no. 2 (1985): 213-226.
1125    .. [4] Newcombe, Robert G. 1998. “Interval Estimation for the Difference
1126       between Independent Proportions: Comparison of Eleven Methods.”
1127       Statistics in Medicine 17 (8): 873–90.
1128       https://doi.org/10.1002/(SICI)1097-0258(19980430)17:8<873::AID-
1129       SIM779>3.0.CO;2-I.
1130    .. [5] Newcombe, Robert G., and Markku M. Nurminen. 2011. “In Defence of
1131       Score Intervals for Proportions and Their Differences.” Communications
1132       in Statistics - Theory and Methods 40 (7): 1271–82.
1133       https://doi.org/10.1080/03610920903576580.
1134    """
1135    method_default = {'diff': 'newcomb',
1136                      'ratio': 'log-adjusted',
1137                      'odds-ratio': 'logit-adjusted'}
1138    # normalize compare name
1139    if compare.lower() == 'or':
1140        compare = 'odds-ratio'
1141    if method is None:
1142        method = method_default[compare]
1143
1144    method = method.lower()
1145    if method.startswith('agr'):
1146        method = 'agresti-caffo'
1147
1148    p1 = count1 / nobs1
1149    p2 = count2 / nobs2
1150    diff = p1 - p2
1151    addone = 1 if method == 'agresti-caffo' else 0
1152
1153    if compare == 'diff':
1154        if method in ['wald', 'agresti-caffo']:
1155            count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone
1156            count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone
1157            p1_ = count1_ / nobs1_
1158            p2_ = count2_ / nobs2_
1159            diff_ = p1_ - p2_
1160            var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_
1161            z = stats.norm.isf(alpha / 2)
1162            d_wald = z * np.sqrt(var)
1163            low = diff_ - d_wald
1164            upp = diff_ + d_wald
1165
1166        elif method.startswith('newcomb'):
1167            low1, upp1 = proportion_confint(count1, nobs1,
1168                                            method='wilson', alpha=alpha)
1169            low2, upp2 = proportion_confint(count2, nobs2,
1170                                            method='wilson', alpha=alpha)
1171            d_low = np.sqrt((p1 - low1)**2 + (upp2 - p2)**2)
1172            d_upp = np.sqrt((p2 - low2)**2 + (upp1 - p1)**2)
1173            low = diff - d_low
1174            upp = diff + d_upp
1175
1176        elif method == "score":
1177            low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2,
1178                                                compare=compare, alpha=alpha,
1179                                                correction=correction)
1180
1181        else:
1182            raise ValueError('method not recognized')
1183
1184    elif compare == 'ratio':
1185        # ratio = p1 / p2
1186        if method in ['log', 'log-adjusted']:
1187            addhalf = 0.5 if method == 'log-adjusted' else 0
1188            count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf
1189            count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf
1190            p1_ = count1_ / nobs1_
1191            p2_ = count2_ / nobs2_
1192            ratio_ = p1_ / p2_
1193            var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_
1194            z = stats.norm.isf(alpha / 2)
1195            d_log = z * np.sqrt(var)
1196            low = np.exp(np.log(ratio_) - d_log)
1197            upp = np.exp(np.log(ratio_) + d_log)
1198
1199        elif method == 'score':
1200            res = _confint_riskratio_koopman(count1, nobs1, count2, nobs2,
1201                                             alpha=alpha,
1202                                             correction=correction)
1203            low, upp = res.confint
1204
1205        else:
1206            raise ValueError('method not recognized')
1207
1208    elif compare == 'odds-ratio':
1209        # odds_ratio = p1 / (1 - p1) / p2 * (1 - p2)
1210        if method in ['logit', 'logit-adjusted', 'logit-smoothed']:
1211            if method in ['logit-smoothed']:
1212                adjusted = _shrink_prob(count1, nobs1, count2, nobs2,
1213                                        shrink_factor=2, return_corr=False)[0]
1214                count1_, nobs1_, count2_, nobs2_ = adjusted
1215
1216            else:
1217                addhalf = 0.5 if method == 'logit-adjusted' else 0
1218                count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf
1219                count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf
1220            p1_ = count1_ / nobs1_
1221            p2_ = count2_ / nobs2_
1222            odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_)
1223            var = (1 / count1_ + 1 / (nobs1_ - count1_) +
1224                   1 / count2_ + 1 / (nobs2_ - count2_))
1225            z = stats.norm.isf(alpha / 2)
1226            d_log = z * np.sqrt(var)
1227            low = np.exp(np.log(odds_ratio_) - d_log)
1228            upp = np.exp(np.log(odds_ratio_) + d_log)
1229
1230        elif method == "score":
1231            low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2,
1232                                                compare=compare, alpha=alpha,
1233                                                correction=correction)
1234
1235        else:
1236            raise ValueError('method not recognized')
1237
1238    else:
1239        raise ValueError('compare not recognized')
1240
1241    return low, upp
1242
1243
1244def _shrink_prob(count1, nobs1, count2, nobs2, shrink_factor=2,
1245                 return_corr=True):
1246    """shrink observed counts towards independence
1247
1248    Helper function for 'logit-smoothed' inference for the odds-ratio of two
1249    independent proportions.
1250
1251    Parameters
1252    ----------
1253    count1, nobs1 : float or int
1254        count and sample size for first sample
1255    count2, nobs2 : float or int
1256        count and sample size for the second sample
1257    shrink_factor : float
1258        This corresponds to the number of observations that are added in total
1259        proportional to the probabilities under independence.
1260    return_corr : bool
1261        If true, then only the correction term is returned
1262        If false, then the corrected counts, i.e. original counts plus
1263        correction term, are returned.
1264
1265    Returns
1266    -------
1267    count1_corr, nobs1_corr, count2_corr, nobs2_corr : float
1268        correction or corrected counts
1269    prob_indep :
1270        TODO/Warning : this will change most likely
1271        probabilities under independence, only returned if return_corr is
1272        false.
1273
1274    """
1275    nobs_col = np.array([count1 + count2, nobs1 - count1 + nobs2 - count2])
1276    nobs_row = np.array([nobs1, nobs2])
1277    nobs = nobs1 + nobs2
1278    prob_indep = (nobs_col * nobs_row[:, None]) / nobs**2
1279    corr = shrink_factor * prob_indep
1280    if return_corr:
1281        return (corr[0, 0], corr[0].sum(), corr[1, 0], corr[1].sum())
1282    else:
1283        return (count1 + corr[0, 0], nobs1 + corr[0].sum(),
1284                count2 + corr[1, 0], nobs2 + corr[1].sum()), prob_indep
1285
1286
1287def score_test_proportions_2indep(count1, nobs1, count2, nobs2, value=None,
1288                                  compare='diff', alternative='two-sided',
1289                                  correction=True, return_results=True):
1290    """score_test for two independent proportions
1291
1292    This uses the constrained estimate of the proportions to compute
1293    the variance under the Null hypothesis.
1294
1295    Parameters
1296    ----------
1297    count1, nobs1 :
1298        count and sample size for first sample
1299    count2, nobs2 :
1300        count and sample size for the second sample
1301    value : float
1302        diff, ratio or odds-ratio under the null hypothesis. If value is None,
1303        then equality of proportions under the Null is assumed,
1304        i.e. value=0 for 'diff' or value=1 for either rate or odds-ratio.
1305    compare : string in ['diff', 'ratio' 'odds-ratio']
1306        If compare is diff, then the confidence interval is for diff = p1 - p2.
1307        If compare is ratio, then the confidence interval is for the risk ratio
1308        defined by ratio = p1 / p2.
1309        If compare is odds-ratio, then the confidence interval is for the
1310        odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2)
1311    return_results : bool
1312        If true, then a results instance with extra information is returned,
1313        otherwise a tuple with statistic and pvalue is returned.
1314
1315    Returns
1316    -------
1317    results : results instance or tuple
1318        If return_results is True, then a results instance with the
1319        information in attributes is returned.
1320        If return_results is False, then only ``statistic`` and ``pvalue``
1321        are returned.
1322
1323        statistic : float
1324            test statistic asymptotically normal distributed N(0, 1)
1325        pvalue : float
1326            p-value based on normal distribution
1327        other attributes :
1328            additional information about the hypothesis test
1329
1330    Notes
1331    -----
1332    Status: experimental, the type or extra information in the return might
1333    change.
1334
1335    """
1336
1337    value_default = 0 if compare == 'diff' else 1
1338    if value is None:
1339        # TODO: odds ratio does not work if value=1
1340        value = value_default
1341
1342    nobs = nobs1 + nobs2
1343    count = count1 + count2
1344    p1 = count1 / nobs1
1345    p2 = count2 / nobs2
1346    if value == value_default:
1347        # use pooled estimator if equality test
1348        # shortcut, but required for odds ratio
1349        prop0 = prop1 = count / nobs
1350    # this uses index 0 from Miettinen Nurminned 1985
1351    count0, nobs0 = count2, nobs2
1352    p0 = p2
1353
1354    if compare == 'diff':
1355        diff = value  # hypothesis value
1356
1357        if diff != 0:
1358            tmp3 = nobs
1359            tmp2 = (nobs1 + 2 * nobs0) * diff - nobs - count
1360            tmp1 = (count0 * diff - nobs - 2 * count0) * diff + count
1361            tmp0 = count0 * diff * (1 - diff)
1362            q = ((tmp2 / (3 * tmp3))**3 - tmp1 * tmp2 / (6 * tmp3**2) +
1363                 tmp0 / (2 * tmp3))
1364            p = np.sign(q) * np.sqrt((tmp2 / (3 * tmp3))**2 -
1365                                     tmp1 / (3 * tmp3))
1366            a = (np.pi + np.arccos(q / p**3)) / 3
1367
1368            prop0 = 2 * p * np.cos(a) - tmp2 / (3 * tmp3)
1369            prop1 = prop0 + diff
1370
1371        correction = True
1372        var = prop1 * (1 - prop1) / nobs1 + prop0 * (1 - prop0) / nobs0
1373        if correction:
1374            var *= nobs / (nobs - 1)
1375
1376        diff_stat = (p1 - p0 - diff)
1377
1378    elif compare == 'ratio':
1379        # risk ratio
1380        ratio = value
1381
1382        if ratio != 1:
1383            a = nobs * ratio
1384            b = -(nobs1 * ratio + count1 + nobs2 + count0 * ratio)
1385            c = count
1386            prop0 = (-b - np.sqrt(b**2 - 4 * a * c)) / (2 * a)
1387            prop1 = prop0 * ratio
1388
1389        var = (prop1 * (1 - prop1) / nobs1 +
1390               ratio**2 * prop0 * (1 - prop0) / nobs0)
1391        if correction:
1392            var *= nobs / (nobs - 1)
1393
1394        # NCSS looks incorrect for var, but it is what should be reported
1395        # diff_stat = (p1 / p0 - ratio)   # NCSS/PASS
1396        diff_stat = (p1 - ratio * p0)  # Miettinen Nurminen
1397
1398    elif compare in ['or', 'odds-ratio']:
1399        # odds ratio
1400        oratio = value
1401
1402        if oratio != 1:
1403            # Note the constraint estimator does not handle odds-ratio = 1
1404            a = nobs0 * (oratio - 1)
1405            b = nobs1 * oratio + nobs0 - count * (oratio - 1)
1406            c = -count
1407            prop0 = (-b + np.sqrt(b**2 - 4 * a * c)) / (2 * a)
1408            prop1 = prop0 * oratio / (1 + prop0 * (oratio - 1))
1409
1410        # try to avoid 0 and 1 proportions,
1411        # those raise Zero Division Runtime Warnings
1412        eps = 1e-10
1413        prop0 = np.clip(prop0, eps, 1 - eps)
1414        prop1 = np.clip(prop1, eps, 1 - eps)
1415
1416        var = (1 / (prop1 * (1 - prop1) * nobs1) +
1417               1 / (prop0 * (1 - prop0) * nobs0))
1418        if correction:
1419            var *= nobs / (nobs - 1)
1420
1421        diff_stat = ((p1 - prop1) / (prop1 * (1 - prop1)) -
1422                     (p0 - prop0) / (prop0 * (1 - prop0)))
1423
1424    statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var),
1425                                        alternative=alternative)
1426
1427    if return_results:
1428        res = HolderTuple(statistic=statistic,
1429                          pvalue=pvalue,
1430                          compare=compare,
1431                          method='score',
1432                          variance=var,
1433                          alternative=alternative,
1434                          prop1_null=prop1,
1435                          prop2_null=prop0,
1436                          )
1437        return res
1438    else:
1439        return statistic, pvalue
1440
1441
1442def test_proportions_2indep(count1, nobs1, count2, nobs2, value=None,
1443                            method=None, compare='diff',
1444                            alternative='two-sided', correction=True,
1445                            return_results=True):
1446    """Hypothesis test for comparing two independent proportions
1447
1448    This assumes that we have two independent binomial samples.
1449
1450    The Null and alternative hypothesis are
1451
1452    for compare = 'diff'
1453
1454    - H0: prop1 - prop2 - value = 0
1455    - H1: prop1 - prop2 - value != 0  if alternative = 'two-sided'
1456    - H1: prop1 - prop2 - value > 0   if alternative = 'larger'
1457    - H1: prop1 - prop2 - value < 0   if alternative = 'smaller'
1458
1459    for compare = 'ratio'
1460
1461    - H0: prop1 / prop2 - value = 0
1462    - H1: prop1 / prop2 - value != 0  if alternative = 'two-sided'
1463    - H1: prop1 / prop2 - value > 0   if alternative = 'larger'
1464    - H1: prop1 / prop2 - value < 0   if alternative = 'smaller'
1465
1466    for compare = 'odds-ratio'
1467
1468    - H0: or - value = 0
1469    - H1: or - value != 0  if alternative = 'two-sided'
1470    - H1: or - value > 0   if alternative = 'larger'
1471    - H1: or - value < 0   if alternative = 'smaller'
1472
1473    where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2))
1474
1475    Parameters
1476    ----------
1477    count1 : int
1478        Count for first sample.
1479    nobs1 : int
1480        Sample size for first sample.
1481    count2 : int
1482        Count for the second sample.
1483    nobs2 : int
1484        Sample size for the second sample.
1485    method : string
1486        Method for computing confidence interval. If method is None, then a
1487        default method is used. The default might change as more methods are
1488        added.
1489
1490        diff:
1491
1492        - 'wald',
1493        - 'agresti-caffo'
1494        - 'score' if correction is True, then this uses the degrees of freedom
1495           correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
1496
1497        ratio:
1498
1499        - 'log': wald test using log transformation
1500        - 'log-adjusted': wald test using log transformation,
1501           adds 0.5 to counts
1502        - 'score': if correction is True, then this uses the degrees of freedom
1503           correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
1504
1505        odds-ratio:
1506
1507        - 'logit': wald test using logit transformation
1508        - 'logit-adjusted': wald test using logit transformation,
1509           adds 0.5 to counts
1510        - 'logit-smoothed': wald test using logit transformation, biases
1511           cell counts towards independence by adding two observations in
1512           total.
1513        - 'score' if correction is True, then this uses the degrees of freedom
1514           correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
1515
1516    compare : {'diff', 'ratio' 'odds-ratio'}
1517        If compare is `diff`, then the confidence interval is for
1518        diff = p1 - p2.
1519        If compare is `ratio`, then the confidence interval is for the
1520        risk ratio defined by ratio = p1 / p2.
1521        If compare is `odds-ratio`, then the confidence interval is for the
1522        odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2)
1523    alternative : {'two-sided', 'smaller', 'larger'}
1524        alternative hypothesis, which can be two-sided or either one of the
1525        one-sided tests.
1526    correction : bool
1527        If correction is True (default), then the Miettinen and Nurminen
1528        small sample correction to the variance nobs / (nobs - 1) is used.
1529        Applies only if method='score'.
1530    return_results : bool
1531        If true, then a results instance with extra information is returned,
1532        otherwise a tuple with statistic and pvalue is returned.
1533
1534    Returns
1535    -------
1536    results : results instance or tuple
1537        If return_results is True, then a results instance with the
1538        information in attributes is returned.
1539        If return_results is False, then only ``statistic`` and ``pvalue``
1540        are returned.
1541
1542        statistic : float
1543            test statistic asymptotically normal distributed N(0, 1)
1544        pvalue : float
1545            p-value based on normal distribution
1546        other attributes :
1547            additional information about the hypothesis test
1548
1549    Notes
1550    -----
1551    Status: experimental, API and defaults might still change.
1552        More ``methods`` will be added.
1553
1554    """
1555    method_default = {'diff': 'agresti-caffo',
1556                      'ratio': 'log-adjusted',
1557                      'odds-ratio': 'logit-adjusted'}
1558    # normalize compare name
1559    if compare.lower() == 'or':
1560        compare = 'odds-ratio'
1561    if method is None:
1562        method = method_default[compare]
1563
1564    method = method.lower()
1565    if method.startswith('agr'):
1566        method = 'agresti-caffo'
1567
1568    if value is None:
1569        # TODO: odds ratio does not work if value=1 for score test
1570        value = 0 if compare == 'diff' else 1
1571
1572    p1 = count1 / nobs1
1573    p2 = count2 / nobs2
1574    diff = p1 - p2
1575    ratio = p1 / p2
1576    odds_ratio = p1 / (1 - p1) / p2 * (1 - p2)
1577    res = None
1578
1579    if compare == 'diff':
1580        if method in ['wald', 'agresti-caffo']:
1581            addone = 1 if method == 'agresti-caffo' else 0
1582            count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone
1583            count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone
1584            p1_ = count1_ / nobs1_
1585            p2_ = count2_ / nobs2_
1586            diff_stat = p1_ - p2_ - value
1587            var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_
1588            statistic = diff_stat / np.sqrt(var)
1589            distr = 'normal'
1590
1591        elif method.startswith('newcomb'):
1592            msg = 'newcomb not available for hypothesis test'
1593            raise NotImplementedError(msg)
1594
1595        elif method == 'score':
1596            # Note score part is the same call for all compare
1597            res = score_test_proportions_2indep(count1, nobs1, count2, nobs2,
1598                                                value=value, compare=compare,
1599                                                alternative=alternative,
1600                                                correction=correction,
1601                                                return_results=return_results)
1602            if return_results is False:
1603                statistic, pvalue = res[:2]
1604            distr = 'normal'
1605            # TODO/Note score_test_proportion_2samp returns statistic  and
1606            #     not diff_stat
1607            diff_stat = None
1608        else:
1609            raise ValueError('method not recognized')
1610
1611    elif compare == 'ratio':
1612        if method in ['log', 'log-adjusted']:
1613            addhalf = 0.5 if method == 'log-adjusted' else 0
1614            count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf
1615            count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf
1616            p1_ = count1_ / nobs1_
1617            p2_ = count2_ / nobs2_
1618            ratio_ = p1_ / p2_
1619            var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_
1620            diff_stat = np.log(ratio_) - np.log(value)
1621            statistic = diff_stat / np.sqrt(var)
1622            distr = 'normal'
1623
1624        elif method == 'score':
1625            res = score_test_proportions_2indep(count1, nobs1, count2, nobs2,
1626                                                value=value, compare=compare,
1627                                                alternative=alternative,
1628                                                correction=correction,
1629                                                return_results=return_results)
1630            if return_results is False:
1631                statistic, pvalue = res[:2]
1632            distr = 'normal'
1633            diff_stat = None
1634
1635        else:
1636            raise ValueError('method not recognized')
1637
1638    elif compare == "odds-ratio":
1639
1640        if method in ['logit', 'logit-adjusted', 'logit-smoothed']:
1641            if method in ['logit-smoothed']:
1642                adjusted = _shrink_prob(count1, nobs1, count2, nobs2,
1643                                        shrink_factor=2, return_corr=False)[0]
1644                count1_, nobs1_, count2_, nobs2_ = adjusted
1645
1646            else:
1647                addhalf = 0.5 if method == 'logit-adjusted' else 0
1648                count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf
1649                count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf
1650            p1_ = count1_ / nobs1_
1651            p2_ = count2_ / nobs2_
1652            odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_)
1653            var = (1 / count1_ + 1 / (nobs1_ - count1_) +
1654                   1 / count2_ + 1 / (nobs2_ - count2_))
1655
1656            diff_stat = np.log(odds_ratio_) - np.log(value)
1657            statistic = diff_stat / np.sqrt(var)
1658            distr = 'normal'
1659
1660        elif method == 'score':
1661            res = score_test_proportions_2indep(count1, nobs1, count2, nobs2,
1662                                                value=value, compare=compare,
1663                                                alternative=alternative,
1664                                                correction=correction,
1665                                                return_results=return_results)
1666            if return_results is False:
1667                statistic, pvalue = res[:2]
1668            distr = 'normal'
1669            diff_stat = None
1670        else:
1671            raise ValueError('method "%s" not recognized' % method)
1672
1673    else:
1674        raise ValueError('compare "%s" not recognized' % compare)
1675
1676    if distr == 'normal' and diff_stat is not None:
1677        statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var),
1678                                            alternative=alternative)
1679
1680    if return_results:
1681        if res is None:
1682            res = HolderTuple(statistic=statistic,
1683                              pvalue=pvalue,
1684                              compare=compare,
1685                              method=method,
1686                              diff=diff,
1687                              ratio=ratio,
1688                              odds_ratio=odds_ratio,
1689                              variance=var,
1690                              alternative=alternative,
1691                              value=value,
1692                              )
1693        else:
1694            # we already have a return result from score test
1695            # add missing attributes
1696            res.diff = diff
1697            res.ratio = ratio
1698            res.odds_ratio = odds_ratio
1699            res.value = value
1700        return res
1701    else:
1702        return statistic, pvalue
1703
1704
1705def tost_proportions_2indep(count1, nobs1, count2, nobs2, low, upp,
1706                            method=None, compare='diff', correction=True):
1707    """
1708    Equivalence test based on two one-sided `test_proportions_2indep`
1709
1710    This assumes that we have two independent binomial samples.
1711
1712    The Null and alternative hypothesis for equivalence testing are
1713
1714    for compare = 'diff'
1715
1716    - H0: prop1 - prop2 <= low or upp <= prop1 - prop2
1717    - H1: low < prop1 - prop2 < upp
1718
1719    for compare = 'ratio'
1720
1721    - H0: prop1 / prop2 <= low or upp <= prop1 / prop2
1722    - H1: low < prop1 / prop2 < upp
1723
1724
1725    for compare = 'odds-ratio'
1726
1727    - H0: or <= low or upp <= or
1728    - H1: low < or < upp
1729
1730    where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2))
1731
1732    Parameters
1733    ----------
1734    count1, nobs1 :
1735        count and sample size for first sample
1736    count2, nobs2 :
1737        count and sample size for the second sample
1738    low, upp :
1739        equivalence margin for diff, risk ratio or odds ratio
1740    method : string
1741        method for computing confidence interval. If method is None, then a
1742        default method is used. The default might change as more methods are
1743        added.
1744
1745        diff:
1746         - 'wald',
1747         - 'agresti-caffo'
1748         - 'score' if correction is True, then this uses the degrees of freedom
1749           correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985.
1750
1751        ratio:
1752         - 'log': wald test using log transformation
1753         - 'log-adjusted': wald test using log transformation,
1754            adds 0.5 to counts
1755         - 'score' if correction is True, then this uses the degrees of freedom
1756           correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985.
1757
1758        odds-ratio:
1759         - 'logit': wald test using logit transformation
1760         - 'logit-adjusted': : wald test using logit transformation,
1761            adds 0.5 to counts
1762         - 'logit-smoothed': : wald test using logit transformation, biases
1763            cell counts towards independence by adding two observations in
1764            total.
1765         - 'score' if correction is True, then this uses the degrees of freedom
1766            correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
1767
1768    compare : string in ['diff', 'ratio' 'odds-ratio']
1769        If compare is `diff`, then the confidence interval is for
1770        diff = p1 - p2.
1771        If compare is `ratio`, then the confidence interval is for the
1772        risk ratio defined by ratio = p1 / p2.
1773        If compare is `odds-ratio`, then the confidence interval is for the
1774        odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2).
1775    correction : bool
1776        If correction is True (default), then the Miettinen and Nurminen
1777        small sample correction to the variance nobs / (nobs - 1) is used.
1778        Applies only if method='score'.
1779
1780    Returns
1781    -------
1782    pvalue : float
1783        p-value is the max of the pvalues of the two one-sided tests
1784    t1 : test results
1785        results instance for one-sided hypothesis at the lower margin
1786    t1 : test results
1787        results instance for one-sided hypothesis at the upper margin
1788
1789    Notes
1790    -----
1791    Status: experimental, API and defaults might still change.
1792
1793    """
1794
1795    tt1 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=low,
1796                                  method=method, compare=compare,
1797                                  alternative='larger',
1798                                  correction=correction,
1799                                  return_results=True)
1800    tt2 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=upp,
1801                                  method=method, compare=compare,
1802                                  alternative='smaller',
1803                                  correction=correction,
1804                                  return_results=True)
1805
1806    idx_max = 0 if tt1.pvalue < tt2.pvalue else 1
1807    res = HolderTuple(statistic=[tt1.statistic, tt2.statistic][idx_max],
1808                      pvalue=[tt1.pvalue, tt2.pvalue][idx_max],
1809                      compare=compare,
1810                      method=method,
1811                      results_larger=tt1,
1812                      results_smaller=tt2,
1813                      title="Equivalence test for 2 independent proportions"
1814                      )
1815
1816    return res
1817
1818
1819def _std_2prop_power(diff, p2, ratio=1, alpha=0.05, value=0):
1820    """compute standard error under null and alternative for 2 proportions
1821
1822    helper function for power and sample size computation
1823
1824    """
1825    if value != 0:
1826        msg = 'non-zero diff under null, value, is not yet implemented'
1827        raise NotImplementedError(msg)
1828
1829    nobs_ratio = ratio
1830    p1 = p2 + diff
1831    # The following contains currently redundant variables that will
1832    # be useful for different options for the null variance
1833    p_pooled = (p1 + p2 * ratio) / (1 + ratio)
1834    # probabilities for the variance for the null statistic
1835    p1_vnull, p2_vnull = p_pooled, p_pooled
1836    p2_alt = p2
1837    p1_alt = p2_alt + diff
1838
1839    std_null = _std_diff_prop(p1_vnull, p2_vnull)
1840    std_alt = _std_diff_prop(p1_alt, p2_alt, ratio=nobs_ratio)
1841    return p_pooled, std_null, std_alt
1842
1843
1844def power_proportions_2indep(diff, prop2, nobs1, ratio=1, alpha=0.05,
1845                             value=0, alternative='two-sided',
1846                             return_results=True):
1847    """power for ztest that two independent proportions are equal
1848
1849    This assumes that the variance is based on the pooled proportion
1850    under the null and the non-pooled variance under the alternative
1851
1852    Parameters
1853    ----------
1854    diff : float
1855        difference between proportion 1 and 2 under the alternative
1856    prop2 : float
1857        proportion for the reference case, prop2, proportions for the
1858        first case will be computing using p2 and diff
1859        p1 = p2 + diff
1860    nobs1 : float or int
1861        number of observations in sample 1
1862    ratio : float
1863        sample size ratio, nobs2 = ratio * nobs1
1864    alpha : float in interval (0,1)
1865        Significance level, e.g. 0.05, is the probability of a type I
1866        error, that is wrong rejections if the Null Hypothesis is true.
1867    value : float
1868        currently only `value=0`, i.e. equality testing, is supported
1869    alternative : string, 'two-sided' (default), 'larger', 'smaller'
1870        Alternative hypothesis whether the power is calculated for a
1871        two-sided (default) or one sided test. The one-sided test can be
1872        either 'larger', 'smaller'.
1873    return_results : bool
1874        If true, then a results instance with extra information is returned,
1875        otherwise only the computed power is returned.
1876
1877    Returns
1878    -------
1879    results : results instance or float
1880        If return_results is True, then a results instance with the
1881        information in attributes is returned.
1882        If return_results is False, then only the power is returned.
1883
1884        power : float
1885            Power of the test, e.g. 0.8, is one minus the probability of a
1886            type II error. Power is the probability that the test correctly
1887            rejects the Null Hypothesis if the Alternative Hypothesis is true.
1888
1889        Other attributes in results instance include :
1890
1891        p_pooled
1892            pooled proportion, used for std_null
1893        std_null
1894            standard error of difference under the null hypothesis (without
1895            sqrt(nobs))
1896        std_alt
1897            standard error of difference under the alternative hypothesis
1898            (without sqrt(nobs))
1899    """
1900    # TODO: avoid possible circular import, check if needed
1901    from statsmodels.stats.power import normal_power_het
1902
1903    p_pooled, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=1,
1904                                                   alpha=0.05, value=0)
1905
1906    pow_ = normal_power_het(diff, nobs1, alpha, std_null=std_null,
1907                            std_alternative=std_alt,
1908                            alternative=alternative)
1909
1910    if return_results:
1911        res = Holder(power=pow_,
1912                     p_pooled=p_pooled,
1913                     std_null=std_null,
1914                     std_alt=std_alt,
1915                     nobs1=nobs1,
1916                     nobs2=ratio * nobs1,
1917                     nobs_ratio=ratio,
1918                     alpha=alpha,
1919                     )
1920        return res
1921    else:
1922        return pow_
1923
1924
1925def samplesize_proportions_2indep_onetail(diff, prop2, power, ratio=1,
1926                                          alpha=0.05, value=0,
1927                                          alternative='two-sided'):
1928    """required sample size assuming normal distribution based on one tail
1929
1930    This uses an explicit computation for the sample size that is required
1931    to achieve a given power corresponding to the appropriate tails of the
1932    normal distribution. This ignores the far tail in a two-sided test
1933    which is negligible in the common case when alternative and null are
1934    far apart.
1935
1936    Parameters
1937    ----------
1938    diff : float
1939        Difference between proportion 1 and 2 under the alternative
1940    prop2 : float
1941        proportion for the reference case, prop2, proportions for the
1942        first case will be computing using p2 and diff
1943        p1 = p2 + diff
1944    power : float
1945        Power for which sample size is computed.
1946    ratio : float
1947        Sample size ratio, nobs2 = ratio * nobs1
1948    alpha : float in interval (0,1)
1949        Significance level, e.g. 0.05, is the probability of a type I
1950        error, that is wrong rejections if the Null Hypothesis is true.
1951    value : float
1952        Currently only `value=0`, i.e. equality testing, is supported
1953    alternative : string, 'two-sided' (default), 'larger', 'smaller'
1954        Alternative hypothesis whether the power is calculated for a
1955        two-sided (default) or one sided test. In the case of a one-sided
1956        alternative, it is assumed that the test is in the appropriate tail.
1957
1958    Returns
1959    -------
1960    nobs1 : float
1961        Number of observations in sample 1.
1962    """
1963    # TODO: avoid possible circular import, check if needed
1964    from statsmodels.stats.power import normal_sample_size_one_tail
1965
1966    if alternative in ['two-sided', '2s']:
1967        alpha = alpha / 2
1968
1969    _, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=ratio,
1970                                            alpha=alpha, value=value)
1971
1972    nobs = normal_sample_size_one_tail(diff, power, alpha, std_null=std_null,
1973                                       std_alternative=std_alt)
1974    return nobs
1975
1976
1977def _score_confint_inversion(count1, nobs1, count2, nobs2, compare='diff',
1978                             alpha=0.05, correction=True):
1979    """Compute score confidence interval by inverting score test
1980
1981    Parameters
1982    ----------
1983    count1, nobs1 :
1984        Count and sample size for first sample.
1985    count2, nobs2 :
1986        Count and sample size for the second sample.
1987    compare : string in ['diff', 'ratio' 'odds-ratio']
1988        If compare is `diff`, then the confidence interval is for
1989        diff = p1 - p2.
1990        If compare is `ratio`, then the confidence interval is for the
1991        risk ratio defined by ratio = p1 / p2.
1992        If compare is `odds-ratio`, then the confidence interval is for the
1993        odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2).
1994    alpha : float in interval (0,1)
1995        Significance level, e.g. 0.05, is the probability of a type I
1996        error, that is wrong rejections if the Null Hypothesis is true.
1997    correction : bool
1998        If correction is True (default), then the Miettinen and Nurminen
1999        small sample correction to the variance nobs / (nobs - 1) is used.
2000        Applies only if method='score'.
2001
2002    Returns
2003    -------
2004    low : float
2005        Lower confidence bound.
2006    upp : float
2007        Upper confidence bound.
2008    """
2009
2010    def func(v):
2011        r = test_proportions_2indep(count1, nobs1, count2, nobs2,
2012                                    value=v, compare=compare, method='score',
2013                                    correction=correction,
2014                                    alternative="two-sided")
2015        return r.pvalue - alpha
2016
2017    rt0 = test_proportions_2indep(count1, nobs1, count2, nobs2,
2018                                  value=0, compare=compare, method='score',
2019                                  correction=correction,
2020                                  alternative="two-sided")
2021
2022    # use default method to get starting values
2023    # this will not work if score confint becomes default
2024    # maybe use "wald" as alias that works for all compare statistics
2025    use_method = {"diff": "wald", "ratio": "log", "odds-ratio": "logit"}
2026    rci0 = confint_proportions_2indep(count1, nobs1, count2, nobs2,
2027                                      method=use_method[compare],
2028                                      compare=compare, alpha=alpha)
2029
2030    # Note diff might be negative
2031    ub = rci0[1] + np.abs(rci0[1]) * 0.5
2032    lb = rci0[0] - np.abs(rci0[0]) * 0.25
2033    if compare == 'diff':
2034        param = rt0.diff
2035        # 1 might not be the correct upper bound because
2036        #     rootfinding is for the `diff` and not for a probability.
2037        ub = min(ub, 0.99999)
2038    elif compare == 'ratio':
2039        param = rt0.ratio
2040        ub *= 2  # add more buffer
2041    if compare == 'odds-ratio':
2042        param = rt0.odds_ratio
2043
2044    # root finding for confint bounds
2045    upp = optimize.brentq(func, param, ub)
2046    low = optimize.brentq(func, lb, param)
2047    return low, upp
2048
2049
2050def _confint_riskratio_koopman(count1, nobs1, count2, nobs2, alpha=0.05,
2051                               correction=True):
2052    """score confidence interval for ratio or proportions, Koopman/Nam
2053
2054    signature not consistent with other functions
2055
2056    When correction is True, then the small sample correction nobs / (nobs - 1)
2057    by Miettinen/Nurminen is used.
2058    """
2059    # The names below follow Nam
2060    x0, x1, n0, n1 = count2, count1, nobs2, nobs1
2061    x = x0 + x1
2062    n = n0 + n1
2063    z = stats.norm.isf(alpha / 2)**2
2064    if correction:
2065        # Mietinnen/Nurminen small sample correction
2066        z *= n / (n - 1)
2067    # z = stats.chi2.isf(alpha, 1)
2068    # equ 6 in Nam 1995
2069    a1 = n0 * (n0 * n * x1 + n1 * (n0 + x1) * z)
2070    a2 = - n0 * (n0 * n1 * x + 2 * n * x0 * x1 + n1 * (n0 + x0 + 2 * x1) * z)
2071    a3 = 2 * n0 * n1 * x0 * x + n * x0 * x0 * x1 + n0 * n1 * x * z
2072    a4 = - n1 * x0 * x0 * x
2073
2074    p_roots_ = np.sort(np.roots([a1, a2, a3, a4]))
2075    p_roots = p_roots_[:2][::-1]
2076
2077    # equ 5
2078    ci = (1 - (n1 - x1) * (1 - p_roots) / (x0 + n1 - n * p_roots)) / p_roots
2079
2080    res = Holder()
2081    res.confint = ci
2082    res._p_roots = p_roots_  # for unit tests, can be dropped
2083    return res
2084
2085
2086def _confint_riskratio_paired_nam(table, alpha=0.05):
2087    """confidence interval for marginal risk ratio for matched pairs
2088
2089    need full table
2090
2091             success fail  marginal
2092    success    x11    x10  x1.
2093    fail       x01    x00  x0.
2094    marginal   x.1    x.0   n
2095
2096    The confidence interval is for the ratio p1 / p0 where
2097    p1 = x1. / n and
2098    p0 - x.1 / n
2099    Todo: rename p1 to pa and p2 to pb, so we have a, b for treatment and
2100    0, 1 for success/failure
2101
2102    current namings follow Nam 2009
2103
2104    status
2105    testing:
2106    compared to example in Nam 2009
2107    internal polynomial coefficients in calculation correspond at around
2108        4 decimals
2109    confidence interval agrees only at 2 decimals
2110
2111    """
2112    x11, x10, x01, x00 = np.ravel(table)
2113    n = np.sum(table)  # nobs
2114    p10, p01 = x10 / n, x01 / n
2115    p1 = (x11 + x10) / n
2116    p0 = (x11 + x01) / n
2117    q00 = 1 - x00 / n
2118
2119    z2 = stats.norm.isf(alpha / 2)**2
2120    # z = stats.chi2.isf(alpha, 1)
2121    # before equ 3 in Nam 2009
2122
2123    g1 = (n * p0 + z2 / 2) * p0
2124    g2 = - (2 * n * p1 * p0 + z2 * q00)
2125    g3 = (n * p1 + z2 / 2) * p1
2126
2127    a0 = g1**2 - (z2 * p0 / 2)**2
2128    a1 = 2 * g1 * g2
2129    a2 = g2**2 + 2 * g1 * g3 + z2**2 * (p1 * p0 - 2 * p10 * p01) / 2
2130    a3 = 2 * g2 * g3
2131    a4 = g3**2 - (z2 * p1 / 2)**2
2132
2133    p_roots = np.sort(np.roots([a0, a1, a2, a3, a4]))
2134    # p_roots = np.sort(np.roots([1, a1 / a0, a2 / a0, a3 / a0, a4 / a0]))
2135
2136    ci = [p_roots.min(), p_roots.max()]
2137    res = Holder()
2138    res.confint = ci
2139    res.p = p1, p0
2140    res._p_roots = p_roots  # for unit tests, can be dropped
2141    return res
2142