1'''Multiple Testing and P-Value Correction
2
3
4Author: Josef Perktold
5License: BSD-3
6
7'''
8
9
10import numpy as np
11
12from statsmodels.stats._knockoff import RegressionFDR
13
14__all__ = ['fdrcorrection', 'fdrcorrection_twostage', 'local_fdr',
15           'multipletests', 'NullDistribution', 'RegressionFDR']
16
17# ==============================================
18#
19# Part 1: Multiple Tests and P-Value Correction
20#
21# ==============================================
22
23
24def _ecdf(x):
25    '''no frills empirical cdf used in fdrcorrection
26    '''
27    nobs = len(x)
28    return np.arange(1,nobs+1)/float(nobs)
29
30multitest_methods_names = {'b': 'Bonferroni',
31                           's': 'Sidak',
32                           'h': 'Holm',
33                           'hs': 'Holm-Sidak',
34                           'sh': 'Simes-Hochberg',
35                           'ho': 'Hommel',
36                           'fdr_bh': 'FDR Benjamini-Hochberg',
37                           'fdr_by': 'FDR Benjamini-Yekutieli',
38                           'fdr_tsbh': 'FDR 2-stage Benjamini-Hochberg',
39                           'fdr_tsbky': 'FDR 2-stage Benjamini-Krieger-Yekutieli',
40                           'fdr_gbs': 'FDR adaptive Gavrilov-Benjamini-Sarkar'
41                           }
42
43_alias_list = [['b', 'bonf', 'bonferroni'],
44               ['s', 'sidak'],
45               ['h', 'holm'],
46               ['hs', 'holm-sidak'],
47               ['sh', 'simes-hochberg'],
48               ['ho', 'hommel'],
49               ['fdr_bh', 'fdr_i', 'fdr_p', 'fdri', 'fdrp'],
50               ['fdr_by', 'fdr_n', 'fdr_c', 'fdrn', 'fdrcorr'],
51               ['fdr_tsbh', 'fdr_2sbh'],
52               ['fdr_tsbky', 'fdr_2sbky', 'fdr_twostage'],
53               ['fdr_gbs']
54               ]
55
56
57multitest_alias = {}
58for m in _alias_list:
59    multitest_alias[m[0]] = m[0]
60    for a in m[1:]:
61        multitest_alias[a] = m[0]
62
63def multipletests(pvals, alpha=0.05, method='hs', is_sorted=False,
64                  returnsorted=False):
65    """
66    Test results and p-value correction for multiple tests
67
68    Parameters
69    ----------
70    pvals : array_like, 1-d
71        uncorrected p-values.   Must be 1-dimensional.
72    alpha : float
73        FWER, family-wise error rate, e.g. 0.1
74    method : str
75        Method used for testing and adjustment of pvalues. Can be either the
76        full name or initial letters. Available methods are:
77
78        - `bonferroni` : one-step correction
79        - `sidak` : one-step correction
80        - `holm-sidak` : step down method using Sidak adjustments
81        - `holm` : step-down method using Bonferroni adjustments
82        - `simes-hochberg` : step-up method  (independent)
83        - `hommel` : closed method based on Simes tests (non-negative)
84        - `fdr_bh` : Benjamini/Hochberg  (non-negative)
85        - `fdr_by` : Benjamini/Yekutieli (negative)
86        - `fdr_tsbh` : two stage fdr correction (non-negative)
87        - `fdr_tsbky` : two stage fdr correction (non-negative)
88
89    is_sorted : bool
90        If False (default), the p_values will be sorted, but the corrected
91        pvalues are in the original order. If True, then it assumed that the
92        pvalues are already sorted in ascending order.
93    returnsorted : bool
94         not tested, return sorted p-values instead of original sequence
95
96    Returns
97    -------
98    reject : ndarray, boolean
99        true for hypothesis that can be rejected for given alpha
100    pvals_corrected : ndarray
101        p-values corrected for multiple tests
102    alphacSidak : float
103        corrected alpha for Sidak method
104    alphacBonf : float
105        corrected alpha for Bonferroni method
106
107    Notes
108    -----
109    There may be API changes for this function in the future.
110
111    Except for 'fdr_twostage', the p-value correction is independent of the
112    alpha specified as argument. In these cases the corrected p-values
113    can also be compared with a different alpha. In the case of 'fdr_twostage',
114    the corrected p-values are specific to the given alpha, see
115    ``fdrcorrection_twostage``.
116
117    The 'fdr_gbs' procedure is not verified against another package, p-values
118    are derived from scratch and are not derived in the reference. In Monte
119    Carlo experiments the method worked correctly and maintained the false
120    discovery rate.
121
122    All procedures that are included, control FWER or FDR in the independent
123    case, and most are robust in the positively correlated case.
124
125    `fdr_gbs`: high power, fdr control for independent case and only small
126    violation in positively correlated case
127
128    **Timing**:
129
130    Most of the time with large arrays is spent in `argsort`. When
131    we want to calculate the p-value for several methods, then it is more
132    efficient to presort the pvalues, and put the results back into the
133    original order outside of the function.
134
135    Method='hommel' is very slow for large arrays, since it requires the
136    evaluation of n partitions, where n is the number of p-values.
137    """
138    import gc
139    pvals = np.asarray(pvals)
140    alphaf = alpha  # Notation ?
141
142    if not is_sorted:
143        sortind = np.argsort(pvals)
144        pvals = np.take(pvals, sortind)
145
146    ntests = len(pvals)
147    alphacSidak = 1 - np.power((1. - alphaf), 1./ntests)
148    alphacBonf = alphaf / float(ntests)
149    if method.lower() in ['b', 'bonf', 'bonferroni']:
150        reject = pvals <= alphacBonf
151        pvals_corrected = pvals * float(ntests)
152
153    elif method.lower() in ['s', 'sidak']:
154        reject = pvals <= alphacSidak
155        pvals_corrected = -np.expm1(ntests * np.log1p(-pvals))
156
157    elif method.lower() in ['hs', 'holm-sidak']:
158        alphacSidak_all = 1 - np.power((1. - alphaf),
159                                       1./np.arange(ntests, 0, -1))
160        notreject = pvals > alphacSidak_all
161        del alphacSidak_all
162
163        nr_index = np.nonzero(notreject)[0]
164        if nr_index.size == 0:
165            # nonreject is empty, all rejected
166            notrejectmin = len(pvals)
167        else:
168            notrejectmin = np.min(nr_index)
169        notreject[notrejectmin:] = True
170        reject = ~notreject
171        del notreject
172
173        # It's eqivalent to 1 - np.power((1. - pvals),
174        #                           np.arange(ntests, 0, -1))
175        # but prevents the issue of the floating point precision
176        pvals_corrected_raw = -np.expm1(np.arange(ntests, 0, -1) *
177                                        np.log1p(-pvals))
178        pvals_corrected = np.maximum.accumulate(pvals_corrected_raw)
179        del pvals_corrected_raw
180
181    elif method.lower() in ['h', 'holm']:
182        notreject = pvals > alphaf / np.arange(ntests, 0, -1)
183        nr_index = np.nonzero(notreject)[0]
184        if nr_index.size == 0:
185            # nonreject is empty, all rejected
186            notrejectmin = len(pvals)
187        else:
188            notrejectmin = np.min(nr_index)
189        notreject[notrejectmin:] = True
190        reject = ~notreject
191        pvals_corrected_raw = pvals * np.arange(ntests, 0, -1)
192        pvals_corrected = np.maximum.accumulate(pvals_corrected_raw)
193        del pvals_corrected_raw
194        gc.collect()
195
196    elif method.lower() in ['sh', 'simes-hochberg']:
197        alphash = alphaf / np.arange(ntests, 0, -1)
198        reject = pvals <= alphash
199        rejind = np.nonzero(reject)
200        if rejind[0].size > 0:
201            rejectmax = np.max(np.nonzero(reject))
202            reject[:rejectmax] = True
203        pvals_corrected_raw = np.arange(ntests, 0, -1) * pvals
204        pvals_corrected = np.minimum.accumulate(pvals_corrected_raw[::-1])[::-1]
205        del pvals_corrected_raw
206
207    elif method.lower() in ['ho', 'hommel']:
208        # we need a copy because we overwrite it in a loop
209        a = pvals.copy()
210        for m in range(ntests, 1, -1):
211            cim = np.min(m * pvals[-m:] / np.arange(1,m+1.))
212            a[-m:] = np.maximum(a[-m:], cim)
213            a[:-m] = np.maximum(a[:-m], np.minimum(m * pvals[:-m], cim))
214        pvals_corrected = a
215        reject = a <= alphaf
216
217    elif method.lower() in ['fdr_bh', 'fdr_i', 'fdr_p', 'fdri', 'fdrp']:
218        # delegate, call with sorted pvals
219        reject, pvals_corrected = fdrcorrection(pvals, alpha=alpha,
220                                                 method='indep',
221                                                 is_sorted=True)
222    elif method.lower() in ['fdr_by', 'fdr_n', 'fdr_c', 'fdrn', 'fdrcorr']:
223        # delegate, call with sorted pvals
224        reject, pvals_corrected = fdrcorrection(pvals, alpha=alpha,
225                                                 method='n',
226                                                 is_sorted=True)
227    elif method.lower() in ['fdr_tsbky', 'fdr_2sbky', 'fdr_twostage']:
228        # delegate, call with sorted pvals
229        reject, pvals_corrected = fdrcorrection_twostage(pvals, alpha=alpha,
230                                                         method='bky',
231                                                         is_sorted=True)[:2]
232    elif method.lower() in ['fdr_tsbh', 'fdr_2sbh']:
233        # delegate, call with sorted pvals
234        reject, pvals_corrected = fdrcorrection_twostage(pvals, alpha=alpha,
235                                                         method='bh',
236                                                         is_sorted=True)[:2]
237
238    elif method.lower() in ['fdr_gbs']:
239        #adaptive stepdown in Gavrilov, Benjamini, Sarkar, Annals of Statistics 2009
240##        notreject = pvals > alphaf / np.arange(ntests, 0, -1) #alphacSidak
241##        notrejectmin = np.min(np.nonzero(notreject))
242##        notreject[notrejectmin:] = True
243##        reject = ~notreject
244
245        ii = np.arange(1, ntests + 1)
246        q = (ntests + 1. - ii)/ii * pvals / (1. - pvals)
247        pvals_corrected_raw = np.maximum.accumulate(q) #up requirementd
248
249        pvals_corrected = np.minimum.accumulate(pvals_corrected_raw[::-1])[::-1]
250        del pvals_corrected_raw
251        reject = pvals_corrected <= alpha
252
253    else:
254        raise ValueError('method not recognized')
255
256    if pvals_corrected is not None: #not necessary anymore
257        pvals_corrected[pvals_corrected>1] = 1
258    if is_sorted or returnsorted:
259        return reject, pvals_corrected, alphacSidak, alphacBonf
260    else:
261        pvals_corrected_ = np.empty_like(pvals_corrected)
262        pvals_corrected_[sortind] = pvals_corrected
263        del pvals_corrected
264        reject_ = np.empty_like(reject)
265        reject_[sortind] = reject
266        return reject_, pvals_corrected_, alphacSidak, alphacBonf
267
268
269def fdrcorrection(pvals, alpha=0.05, method='indep', is_sorted=False):
270    '''
271    pvalue correction for false discovery rate.
272
273    This covers Benjamini/Hochberg for independent or positively correlated and
274    Benjamini/Yekutieli for general or negatively correlated tests.
275
276    Parameters
277    ----------
278    pvals : array_like, 1d
279        Set of p-values of the individual tests.
280    alpha : float, optional
281        Family-wise error rate. Defaults to ``0.05``.
282    method : {'i', 'indep', 'p', 'poscorr', 'n', 'negcorr'}, optional
283        Which method to use for FDR correction.
284        ``{'i', 'indep', 'p', 'poscorr'}`` all refer to ``fdr_bh``
285        (Benjamini/Hochberg for independent or positively
286        correlated tests). ``{'n', 'negcorr'}`` both refer to ``fdr_by``
287        (Benjamini/Yekutieli for general or negatively correlated tests).
288        Defaults to ``'indep'``.
289    is_sorted : bool, optional
290        If False (default), the p_values will be sorted, but the corrected
291        pvalues are in the original order. If True, then it assumed that the
292        pvalues are already sorted in ascending order.
293
294    Returns
295    -------
296    rejected : ndarray, bool
297        True if a hypothesis is rejected, False if not
298    pvalue-corrected : ndarray
299        pvalues adjusted for multiple hypothesis testing to limit FDR
300
301    Notes
302    -----
303    If there is prior information on the fraction of true hypothesis, then alpha
304    should be set to ``alpha * m/m_0`` where m is the number of tests,
305    given by the p-values, and m_0 is an estimate of the true hypothesis.
306    (see Benjamini, Krieger and Yekuteli)
307
308    The two-step method of Benjamini, Krieger and Yekutiel that estimates the number
309    of false hypotheses will be available (soon).
310
311    Both methods exposed via this function (Benjamini/Hochberg, Benjamini/Yekutieli)
312    are also available in the function ``multipletests``, as ``method="fdr_bh"`` and
313    ``method="fdr_by"``, respectively.
314
315    See also
316    --------
317    multipletests
318
319    '''
320    pvals = np.asarray(pvals)
321    assert pvals.ndim == 1, "pvals must be 1-dimensional, that is of shape (n,)"
322
323    if not is_sorted:
324        pvals_sortind = np.argsort(pvals)
325        pvals_sorted = np.take(pvals, pvals_sortind)
326    else:
327        pvals_sorted = pvals  # alias
328
329    if method in ['i', 'indep', 'p', 'poscorr']:
330        ecdffactor = _ecdf(pvals_sorted)
331    elif method in ['n', 'negcorr']:
332        cm = np.sum(1./np.arange(1, len(pvals_sorted)+1))   #corrected this
333        ecdffactor = _ecdf(pvals_sorted) / cm
334##    elif method in ['n', 'negcorr']:
335##        cm = np.sum(np.arange(len(pvals)))
336##        ecdffactor = ecdf(pvals_sorted)/cm
337    else:
338        raise ValueError('only indep and negcorr implemented')
339    reject = pvals_sorted <= ecdffactor*alpha
340    if reject.any():
341        rejectmax = max(np.nonzero(reject)[0])
342        reject[:rejectmax] = True
343
344    pvals_corrected_raw = pvals_sorted / ecdffactor
345    pvals_corrected = np.minimum.accumulate(pvals_corrected_raw[::-1])[::-1]
346    del pvals_corrected_raw
347    pvals_corrected[pvals_corrected>1] = 1
348    if not is_sorted:
349        pvals_corrected_ = np.empty_like(pvals_corrected)
350        pvals_corrected_[pvals_sortind] = pvals_corrected
351        del pvals_corrected
352        reject_ = np.empty_like(reject)
353        reject_[pvals_sortind] = reject
354        return reject_, pvals_corrected_
355    else:
356        return reject, pvals_corrected
357
358
359def fdrcorrection_twostage(pvals, alpha=0.05, method='bky', iter=False,
360                           is_sorted=False):
361    '''(iterated) two stage linear step-up procedure with estimation of number of true
362    hypotheses
363
364    Benjamini, Krieger and Yekuteli, procedure in Definition 6
365
366    Parameters
367    ----------
368    pvals : array_like
369        set of p-values of the individual tests.
370    alpha : float
371        error rate
372    method : {'bky', 'bh')
373        see Notes for details
374
375        * 'bky' - implements the procedure in Definition 6 of Benjamini, Krieger
376           and Yekuteli 2006
377        * 'bh' - the two stage method of Benjamini and Hochberg
378
379    iter : bool
380
381    Returns
382    -------
383    rejected : ndarray, bool
384        True if a hypothesis is rejected, False if not
385    pvalue-corrected : ndarray
386        pvalues adjusted for multiple hypotheses testing to limit FDR
387    m0 : int
388        ntest - rej, estimated number of true hypotheses
389    alpha_stages : list of floats
390        A list of alphas that have been used at each stage
391
392    Notes
393    -----
394    The returned corrected p-values are specific to the given alpha, they
395    cannot be used for a different alpha.
396
397    The returned corrected p-values are from the last stage of the fdr_bh
398    linear step-up procedure (fdrcorrection0 with method='indep') corrected
399    for the estimated fraction of true hypotheses.
400    This means that the rejection decision can be obtained with
401    ``pval_corrected <= alpha``, where ``alpha`` is the original significance
402    level.
403    (Note: This has changed from earlier versions (<0.5.0) of statsmodels.)
404
405    BKY described several other multi-stage methods, which would be easy to implement.
406    However, in their simulation the simple two-stage method (with iter=False) was the
407    most robust to the presence of positive correlation
408
409    TODO: What should be returned?
410
411    '''
412    pvals = np.asarray(pvals)
413
414    if not is_sorted:
415        pvals_sortind = np.argsort(pvals)
416        pvals = np.take(pvals, pvals_sortind)
417
418    ntests = len(pvals)
419    if method == 'bky':
420        fact = (1.+alpha)
421        alpha_prime = alpha / fact
422    elif method == 'bh':
423        fact = 1.
424        alpha_prime = alpha
425    else:
426        raise ValueError("only 'bky' and 'bh' are available as method")
427
428    alpha_stages = [alpha_prime]
429    rej, pvalscorr = fdrcorrection(pvals, alpha=alpha_prime, method='indep',
430                                   is_sorted=True)
431    r1 = rej.sum()
432    if (r1 == 0) or (r1 == ntests):
433        return rej, pvalscorr * fact, ntests - r1, alpha_stages
434    ri_old = r1
435
436    while True:
437        ntests0 = 1.0 * ntests - ri_old
438        alpha_star = alpha_prime * ntests / ntests0
439        alpha_stages.append(alpha_star)
440        #print ntests0, alpha_star
441        rej, pvalscorr = fdrcorrection(pvals, alpha=alpha_star, method='indep',
442                                       is_sorted=True)
443        ri = rej.sum()
444        if (not iter) or ri == ri_old:
445            break
446        elif ri < ri_old:
447            # prevent cycles and endless loops
448            raise RuntimeError(" oops - should not be here")
449        ri_old = ri
450
451    # make adjustment to pvalscorr to reflect estimated number of Non-Null cases
452    # decision is then pvalscorr < alpha  (or <=)
453    pvalscorr *= ntests0 * 1.0 /  ntests
454    if method == 'bky':
455        pvalscorr *= (1. + alpha)
456
457    if not is_sorted:
458        pvalscorr_ = np.empty_like(pvalscorr)
459        pvalscorr_[pvals_sortind] = pvalscorr
460        del pvalscorr
461        reject = np.empty_like(rej)
462        reject[pvals_sortind] = rej
463        return reject, pvalscorr_, ntests - ri, alpha_stages
464    else:
465        return rej, pvalscorr, ntests - ri, alpha_stages
466
467
468def local_fdr(zscores, null_proportion=1.0, null_pdf=None, deg=7,
469              nbins=30, alpha=0):
470    """
471    Calculate local FDR values for a list of Z-scores.
472
473    Parameters
474    ----------
475    zscores : array_like
476        A vector of Z-scores
477    null_proportion : float
478        The assumed proportion of true null hypotheses
479    null_pdf : function mapping reals to positive reals
480        The density of null Z-scores; if None, use standard normal
481    deg : int
482        The maximum exponent in the polynomial expansion of the
483        density of non-null Z-scores
484    nbins : int
485        The number of bins for estimating the marginal density
486        of Z-scores.
487    alpha : float
488        Use Poisson ridge regression with parameter alpha to estimate
489        the density of non-null Z-scores.
490
491    Returns
492    -------
493    fdr : array_like
494        A vector of FDR values
495
496    References
497    ----------
498    B Efron (2008).  Microarrays, Empirical Bayes, and the Two-Groups
499    Model.  Statistical Science 23:1, 1-22.
500
501    Examples
502    --------
503    Basic use (the null Z-scores are taken to be standard normal):
504
505    >>> from statsmodels.stats.multitest import local_fdr
506    >>> import numpy as np
507    >>> zscores = np.random.randn(30)
508    >>> fdr = local_fdr(zscores)
509
510    Use a Gaussian null distribution estimated from the data:
511
512    >>> null = EmpiricalNull(zscores)
513    >>> fdr = local_fdr(zscores, null_pdf=null.pdf)
514    """
515
516    from statsmodels.genmod.generalized_linear_model import GLM
517    from statsmodels.genmod.generalized_linear_model import families
518    from statsmodels.regression.linear_model import OLS
519
520    # Bins for Poisson modeling of the marginal Z-score density
521    minz = min(zscores)
522    maxz = max(zscores)
523    bins = np.linspace(minz, maxz, nbins)
524
525    # Bin counts
526    zhist = np.histogram(zscores, bins)[0]
527
528    # Bin centers
529    zbins = (bins[:-1] + bins[1:]) / 2
530
531    # The design matrix at bin centers
532    dmat = np.vander(zbins, deg + 1)
533
534    # Rescale the design matrix
535    sd = dmat.std(0)
536    ii = sd >1e-8
537    dmat[:, ii] /= sd[ii]
538
539    start = OLS(np.log(1 + zhist), dmat).fit().params
540
541    # Poisson regression
542    if alpha > 0:
543        md = GLM(zhist, dmat, family=families.Poisson()).fit_regularized(L1_wt=0, alpha=alpha, start_params=start)
544    else:
545        md = GLM(zhist, dmat, family=families.Poisson()).fit(start_params=start)
546
547    # The design matrix for all Z-scores
548    dmat_full = np.vander(zscores, deg + 1)
549    dmat_full[:, ii] /= sd[ii]
550
551    # The height of the estimated marginal density of Z-scores,
552    # evaluated at every observed Z-score.
553    fz = md.predict(dmat_full) / (len(zscores) * (bins[1] - bins[0]))
554
555    # The null density.
556    if null_pdf is None:
557        f0 = np.exp(-0.5 * zscores**2) / np.sqrt(2 * np.pi)
558    else:
559        f0 = null_pdf(zscores)
560
561    # The local FDR values
562    fdr = null_proportion * f0 / fz
563
564    fdr = np.clip(fdr, 0, 1)
565
566    return fdr
567
568
569class NullDistribution(object):
570    """
571    Estimate a Gaussian distribution for the null Z-scores.
572
573    The observed Z-scores consist of both null and non-null values.
574    The fitted distribution of null Z-scores is Gaussian, but may have
575    non-zero mean and/or non-unit scale.
576
577    Parameters
578    ----------
579    zscores : array_like
580        The observed Z-scores.
581    null_lb : float
582        Z-scores between `null_lb` and `null_ub` are all considered to be
583        true null hypotheses.
584    null_ub : float
585        See `null_lb`.
586    estimate_mean : bool
587        If True, estimate the mean of the distribution.  If False, the
588        mean is fixed at zero.
589    estimate_scale : bool
590        If True, estimate the scale of the distribution.  If False, the
591        scale parameter is fixed at 1.
592    estimate_null_proportion : bool
593        If True, estimate the proportion of true null hypotheses (i.e.
594        the proportion of z-scores with expected value zero).  If False,
595        this parameter is fixed at 1.
596
597    Attributes
598    ----------
599    mean : float
600        The estimated mean of the empirical null distribution
601    sd : float
602        The estimated standard deviation of the empirical null distribution
603    null_proportion : float
604        The estimated proportion of true null hypotheses among all hypotheses
605
606    References
607    ----------
608    B Efron (2008).  Microarrays, Empirical Bayes, and the Two-Groups
609    Model.  Statistical Science 23:1, 1-22.
610
611    Notes
612    -----
613    See also:
614
615    http://nipy.org/nipy/labs/enn.html#nipy.algorithms.statistics.empirical_pvalue.NormalEmpiricalNull.fdr
616    """
617
618    def __init__(self, zscores, null_lb=-1, null_ub=1, estimate_mean=True,
619                 estimate_scale=True, estimate_null_proportion=False):
620
621        # Extract the null z-scores
622        ii = np.flatnonzero((zscores >= null_lb) & (zscores <= null_ub))
623        if len(ii) == 0:
624            raise RuntimeError("No Z-scores fall between null_lb and null_ub")
625        zscores0 = zscores[ii]
626
627        # Number of Z-scores, and null Z-scores
628        n_zs, n_zs0 = len(zscores), len(zscores0)
629
630        # Unpack and transform the parameters to the natural scale, hold
631        # parameters fixed as specified.
632        def xform(params):
633
634            mean = 0.
635            sd = 1.
636            prob = 1.
637
638            ii = 0
639            if estimate_mean:
640                mean = params[ii]
641                ii += 1
642            if estimate_scale:
643                sd = np.exp(params[ii])
644                ii += 1
645            if estimate_null_proportion:
646                prob = 1 / (1 + np.exp(-params[ii]))
647
648            return mean, sd, prob
649
650
651        from scipy.stats.distributions import norm
652
653
654        def fun(params):
655            """
656            Negative log-likelihood of z-scores.
657
658            The function has three arguments, packed into a vector:
659
660            mean : location parameter
661            logscale : log of the scale parameter
662            logitprop : logit of the proportion of true nulls
663
664            The implementation follows section 4 from Efron 2008.
665            """
666
667            d, s, p = xform(params)
668
669            # Mass within the central region
670            central_mass = (norm.cdf((null_ub - d) / s) -
671                            norm.cdf((null_lb - d) / s))
672
673            # Probability that a Z-score is null and is in the central region
674            cp = p * central_mass
675
676            # Binomial term
677            rval = n_zs0 * np.log(cp) + (n_zs - n_zs0) * np.log(1 - cp)
678
679            # Truncated Gaussian term for null Z-scores
680            zv = (zscores0 - d) / s
681            rval += np.sum(-zv**2 / 2) - n_zs0 * np.log(s)
682            rval -= n_zs0 * np.log(central_mass)
683
684            return -rval
685
686
687        # Estimate the parameters
688        from scipy.optimize import minimize
689        # starting values are mean = 0, scale = 1, p0 ~ 1
690        mz = minimize(fun, np.r_[0., 0, 3], method="Nelder-Mead")
691        mean, sd, prob = xform(mz['x'])
692
693        self.mean = mean
694        self.sd = sd
695        self.null_proportion = prob
696
697
698    # The fitted null density function
699    def pdf(self, zscores):
700        """
701        Evaluates the fitted empirical null Z-score density.
702
703        Parameters
704        ----------
705        zscores : scalar or array_like
706            The point or points at which the density is to be
707            evaluated.
708
709        Returns
710        -------
711        The empirical null Z-score density evaluated at the given
712        points.
713        """
714
715        zval = (zscores - self.mean) / self.sd
716        return np.exp(-0.5*zval**2 - np.log(self.sd) - 0.5*np.log(2*np.pi))
717