1# -*- coding: utf-8 -*-
2"""
3Created on Thu Apr  2 14:34:25 2020
4
5Author: Josef Perktold
6License: BSD-3
7
8"""
9
10import numpy as np
11import pandas as pd
12from scipy import stats
13
14from statsmodels.stats.base import HolderTuple
15
16
17class CombineResults(object):
18    """Results from combined estimate of means or effect sizes
19
20    This currently includes intermediate results that might be removed
21    """
22
23    def __init__(self, **kwds):
24        self.__dict__.update(kwds)
25        self._ini_keys = list(kwds.keys())
26
27        self.df_resid = self.k - 1
28
29        # TODO: move to property ?
30        self.sd_eff_w_fe_hksj = np.sqrt(self.var_hksj_fe)
31        self.sd_eff_w_re_hksj = np.sqrt(self.var_hksj_re)
32
33        # explained variance measures
34        self.h2 = self.q / (self.k - 1)
35        self.i2 = 1 - 1 / self.h2
36
37        # memoize ci_samples
38        self.cache_ci = {}
39
40    def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None,
41                         ci_func=None):
42        """confidence intervals for the effect size estimate of samples
43
44        Additional information needs to be provided for confidence intervals
45        that are not based on normal distribution using available variance.
46        This is likely to change in future.
47
48        Parameters
49        ----------
50        alpha : float in (0, 1)
51            Significance level for confidence interval. Nominal coverage is
52            ``1 - alpha``.
53        use_t : None or bool
54            If use_t is None, then the attribute `use_t` determines whether
55            normal or t-distribution is used for confidence intervals.
56            Specifying use_t overrides the attribute.
57            If use_t is false, then confidence intervals are based on the
58            normal distribution. If it is true, then the t-distribution is
59            used.
60        nobs : None or float
61            Number of observations used for degrees of freedom computation.
62            Only used if use_t is true.
63        ci_func : None or callable
64            User provided function to compute confidence intervals.
65            This is not used yet and will allow using non-standard confidence
66            intervals.
67
68        Returns
69        -------
70        ci_eff : tuple of ndarrays
71            Tuple (ci_low, ci_upp) with confidence interval computed for each
72            sample.
73
74        Notes
75        -----
76        CombineResults currently only has information from the combine_effects
77        function, which does not provide details about individual samples.
78        """
79        # this is a bit messy, we don't have enough information about
80        # computing conf_int already in results for other than normal
81        # TODO: maybe there is a better
82        if (alpha, use_t) in self.cache_ci:
83            return self.cache_ci[(alpha, use_t)]
84
85        if use_t is None:
86            use_t = self.use_t
87
88        if ci_func is not None:
89            kwds = {"use_t": use_t} if use_t is not None else {}
90            ci_eff = ci_func(alpha=alpha, **kwds)
91            self.ci_sample_distr = "ci_func"
92        else:
93            if use_t is False:
94                crit = stats.norm.isf(alpha / 2)
95                self.ci_sample_distr = "normal"
96            else:
97                if nobs is not None:
98                    df_resid = nobs - 1
99                    crit = stats.t.isf(alpha / 2, df_resid)
100                    self.ci_sample_distr = "t"
101                else:
102                    msg = ("`use_t=True` requires `nobs` for each sample "
103                           "or `ci_func`. Using normal distribution for "
104                           "confidence interval of individual samples.")
105                    import warnings
106                    warnings.warn(msg)
107                    crit = stats.norm.isf(alpha / 2)
108                    self.ci_sample_distr = "normal"
109
110            # sgn = np.asarray([-1, 1])
111            # ci_eff = self.eff + sgn * crit * self.sd_eff
112            ci_low = self.eff - crit * self.sd_eff
113            ci_upp = self.eff + crit * self.sd_eff
114            ci_eff = (ci_low, ci_upp)
115
116        # if (alpha, use_t) not in self.cache_ci:  # not needed
117        self.cache_ci[(alpha, use_t)] = ci_eff
118        return ci_eff
119
120    def conf_int(self, alpha=0.05, use_t=None):
121        """confidence interval for the overall mean estimate
122
123        Parameters
124        ----------
125        alpha : float in (0, 1)
126            Significance level for confidence interval. Nominal coverage is
127            ``1 - alpha``.
128        use_t : None or bool
129            If use_t is None, then the attribute `use_t` determines whether
130            normal or t-distribution is used for confidence intervals.
131            Specifying use_t overrides the attribute.
132            If use_t is false, then confidence intervals are based on the
133            normal distribution. If it is true, then the t-distribution is
134            used.
135
136        Returns
137        -------
138        ci_eff_fe : tuple of floats
139            Confidence interval for mean effects size based on fixed effects
140            model with scale=1.
141        ci_eff_re : tuple of floats
142            Confidence interval for mean effects size based on random effects
143            model with scale=1
144        ci_eff_fe_wls : tuple of floats
145            Confidence interval for mean effects size based on fixed effects
146            model with estimated scale corresponding to WLS, ie. HKSJ.
147        ci_eff_re_wls : tuple of floats
148            Confidence interval for mean effects size based on random effects
149            model with estimated scale corresponding to WLS, ie. HKSJ.
150            If random effects method is fully iterated, i.e. Paule-Mandel, then
151            the estimated scale is 1.
152
153        """
154        if use_t is None:
155            use_t = self.use_t
156
157        if use_t is False:
158            crit = stats.norm.isf(alpha / 2)
159        else:
160            crit = stats.t.isf(alpha / 2, self.df_resid)
161
162        sgn = np.asarray([-1, 1])
163        m_fe = self.mean_effect_fe
164        m_re = self.mean_effect_re
165        ci_eff_fe = m_fe + sgn * crit * self.sd_eff_w_fe
166        ci_eff_re = m_re + sgn * crit * self.sd_eff_w_re
167
168        ci_eff_fe_wls = m_fe + sgn * crit * np.sqrt(self.var_hksj_fe)
169        ci_eff_re_wls = m_re + sgn * crit * np.sqrt(self.var_hksj_re)
170
171        return ci_eff_fe, ci_eff_re, ci_eff_fe_wls, ci_eff_re_wls
172
173    def test_homogeneity(self):
174        """Test whether the means of all samples are the same
175
176        currently no options, test uses chisquare distribution
177        default might change depending on `use_t`
178
179        Returns
180        -------
181        res : HolderTuple instance
182            The results include the following attributes:
183
184            - statistic : float
185                Test statistic, ``q`` in meta-analysis, this is the
186                pearson_chi2 statistic for the fixed effects model.
187            - pvalue : float
188                P-value based on chisquare distribution.
189            - df : float
190                Degrees of freedom, equal to number of studies or samples
191                minus 1.
192        """
193        pvalue = stats.chi2.sf(self.q, self.k - 1)
194        res = HolderTuple(statistic=self.q,
195                          pvalue=pvalue,
196                          df=self.k - 1,
197                          distr="chi2")
198        return res
199
200    def summary_array(self, alpha=0.05, use_t=None):
201        """Create array with sample statistics and mean estimates
202
203        Parameters
204        ----------
205        alpha : float in (0, 1)
206            Significance level for confidence interval. Nominal coverage is
207            ``1 - alpha``.
208        use_t : None or bool
209            If use_t is None, then the attribute `use_t` determines whether
210            normal or t-distribution is used for confidence intervals.
211            Specifying use_t overrides the attribute.
212            If use_t is false, then confidence intervals are based on the
213            normal distribution. If it is true, then the t-distribution is
214            used.
215
216        Returns
217        -------
218        res : ndarray
219            Array with columns
220            ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
221            Rows include statistics for samples and estimates of overall mean.
222        column_names : list of str
223            The names for the columns, used when creating summary DataFrame.
224        """
225
226        ci_low, ci_upp = self.conf_int_samples(alpha=alpha, use_t=use_t)
227        res = np.column_stack([self.eff, self.sd_eff,
228                               ci_low, ci_upp,
229                               self.weights_rel_fe, self.weights_rel_re])
230
231        ci = self.conf_int(alpha=alpha, use_t=use_t)
232        res_fe = [[self.mean_effect_fe, self.sd_eff_w_fe,
233                   ci[0][0], ci[0][1], 1, np.nan]]
234        res_re = [[self.mean_effect_re, self.sd_eff_w_re,
235                   ci[1][0], ci[1][1], np.nan, 1]]
236        res_fe_wls = [[self.mean_effect_fe, self.sd_eff_w_fe_hksj,
237                       ci[2][0], ci[2][1], 1, np.nan]]
238        res_re_wls = [[self.mean_effect_re, self.sd_eff_w_re_hksj,
239                       ci[3][0], ci[3][1], np.nan, 1]]
240
241        res = np.concatenate([res, res_fe, res_re, res_fe_wls, res_re_wls],
242                             axis=0)
243        column_names = ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe", "w_re"]
244        return res, column_names
245
246    def summary_frame(self, alpha=0.05, use_t=None):
247        """Create DataFrame with sample statistics and mean estimates
248
249        Parameters
250        ----------
251        alpha : float in (0, 1)
252            Significance level for confidence interval. Nominal coverage is
253            ``1 - alpha``.
254        use_t : None or bool
255            If use_t is None, then the attribute `use_t` determines whether
256            normal or t-distribution is used for confidence intervals.
257            Specifying use_t overrides the attribute.
258            If use_t is false, then confidence intervals are based on the
259            normal distribution. If it is true, then the t-distribution is
260            used.
261
262        Returns
263        -------
264        res : DataFrame
265            pandas DataFrame instance with columns
266            ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
267            Rows include statistics for samples and estimates of overall mean.
268
269        """
270        if use_t is None:
271            use_t = self.use_t
272        labels = (list(self.row_names) +
273                  ["fixed effect", "random effect",
274                   "fixed effect wls", "random effect wls"])
275        res, col_names = self.summary_array(alpha=alpha, use_t=use_t)
276        results = pd.DataFrame(res, index=labels, columns=col_names)
277        return results
278
279    def plot_forest(self, alpha=0.05, use_t=None, use_exp=False,
280                    ax=None, **kwds):
281        """Forest plot with means and confidence intervals
282
283        Parameters
284        ----------
285        ax : None or matplotlib axis instance
286            If ax is provided, then the plot will be added to it.
287        alpha : float in (0, 1)
288            Significance level for confidence interval. Nominal coverage is
289            ``1 - alpha``.
290        use_t : None or bool
291            If use_t is None, then the attribute `use_t` determines whether
292            normal or t-distribution is used for confidence intervals.
293            Specifying use_t overrides the attribute.
294            If use_t is false, then confidence intervals are based on the
295            normal distribution. If it is true, then the t-distribution is
296            used.
297        use_exp : bool
298            If `use_exp` is True, then the effect size and confidence limits
299            will be exponentiated. This transform log-odds-ration into
300            odds-ratio, and similarly for risk-ratio.
301        ax : AxesSubplot, optional
302            If given, this axes is used to plot in instead of a new figure
303            being created.
304        kwds : optional keyword arguments
305            Keywords are forwarded to the dot_plot function that creates the
306            plot.
307
308        Returns
309        -------
310        fig : Matplotlib figure instance
311
312        See Also
313        --------
314        dot_plot
315
316        """
317        from statsmodels.graphics.dotplots import dot_plot
318        res_df = self.summary_frame(alpha=alpha, use_t=use_t)
319        if use_exp:
320            res_df = np.exp(res_df[["eff", "ci_low", "ci_upp"]])
321        hw = np.abs(res_df[["ci_low", "ci_upp"]] - res_df[["eff"]].values)
322        fig = dot_plot(points=res_df["eff"], intervals=hw,
323                       lines=res_df.index, line_order=res_df.index, **kwds)
324        return fig
325
326
327def effectsize_smd(mean1, sd1, nobs1, mean2, sd2, nobs2):
328    """effect sizes for mean difference for use in meta-analysis
329
330    mean1, sd1, nobs1 are for treatment
331    mean2, sd2, nobs2 are for control
332
333    Effect sizes are computed for the mean difference ``mean1 - mean2``
334    standardized by an estimate of the within variance.
335
336    This does not have option yet.
337    It uses standardized mean difference with bias correction as effect size.
338
339    This currently does not use np.asarray, all computations are possible in
340    pandas.
341
342    Parameters
343    ----------
344    mean1 : array
345        mean of second sample, treatment groups
346    sd1 : array
347        standard deviation of residuals in treatment groups, within
348    nobs1 : array
349        number of observations in treatment groups
350    mean2, sd2, nobs2 : arrays
351        mean, standard deviation and number of observations of control groups
352
353    Returns
354    -------
355    smd_bc : array
356        bias corrected estimate of standardized mean difference
357    var_smdbc : array
358        estimate of variance of smd_bc
359
360    Notes
361    -----
362    Status: API will still change. This is currently intended for support of
363    meta-analysis.
364
365    References
366    ----------
367    Borenstein, Michael. 2009. Introduction to Meta-Analysis.
368        Chichester: Wiley.
369
370    Chen, Ding-Geng, and Karl E. Peace. 2013. Applied Meta-Analysis with R.
371        Chapman & Hall/CRC Biostatistics Series.
372        Boca Raton: CRC Press/Taylor & Francis Group.
373
374    """
375    # TODO: not used yet, design and options ?
376    # k = len(mean1)
377    # if row_names is None:
378    #    row_names = list(range(k))
379    # crit = stats.norm.isf(alpha / 2)
380    # var_diff_uneq = sd1**2 / nobs1 + sd2**2 / nobs2
381    var_diff = (sd1**2 * (nobs1 - 1) +
382                sd2**2 * (nobs2 - 1)) / (nobs1 + nobs2 - 2)
383    sd_diff = np.sqrt(var_diff)
384    nobs = nobs1 + nobs2
385    bias_correction = 1 - 3 / (4 * nobs - 9)
386    smd = (mean1 - mean2) / sd_diff
387    smd_bc = bias_correction * smd
388    var_smdbc = nobs / nobs1 / nobs2 + smd_bc**2 / 2 / (nobs - 3.94)
389    return smd_bc, var_smdbc
390
391
392def effectsize_2proportions(count1, nobs1, count2, nobs2, statistic="diff",
393                            zero_correction=None, zero_kwds=None):
394    """Effects sizes for two sample binomial proportions
395
396    Parameters
397    ----------
398    count1, nobs1, count2, nobs2 : array_like
399        data for two samples
400    statistic : {"diff", "odds-ratio", "risk-ratio", "arcsine"}
401        statistic for the comparison of two proportions
402        Effect sizes for "odds-ratio" and "risk-ratio" are in logarithm.
403    zero_correction : {None, float, "tac", "clip"}
404        Some statistics are not finite when zero counts are in the data.
405        The options to remove zeros are:
406
407        * float : if zero_correction is a single float, then it will be added
408          to all count (cells) if the sample has any zeros.
409        * "tac" : treatment arm continuity correction see Ruecker et al 2009,
410          section 3.2
411        * "clip" : clip proportions without adding a value to all cells
412          The clip bounds can be set with zero_kwds["clip_bounds"]
413
414    zero_kwds : dict
415        additional options to handle zero counts
416        "clip_bounds" tuple, default (1e-6, 1 - 1e-6) if zero_correction="clip"
417        other options not yet implemented
418
419    Returns
420    -------
421    effect size : array
422        Effect size for each sample.
423    var_es : array
424        Estimate of variance of the effect size
425
426    Notes
427    -----
428    Status: API is experimental, Options for zero handling is incomplete.
429
430    The names for ``statistics`` keyword can be shortened to "rd", "rr", "or"
431    and "as".
432
433    The statistics are defined as:
434
435     - risk difference = p1 - p2
436     - log risk ratio = log(p1 / p2)
437     - log odds_ratio = log(p1 / (1 - p1) * (1 - p2) / p2)
438     - arcsine-sqrt = arcsin(sqrt(p1)) - arcsin(sqrt(p2))
439
440    where p1 and p2 are the estimated proportions in sample 1 (treatment) and
441    sample 2 (control).
442
443    log-odds-ratio and log-risk-ratio can be transformed back to ``or`` and
444    `rr` using `exp` function.
445
446    See Also
447    --------
448    statsmodels.stats.contingency_tables
449    """
450    if zero_correction is None:
451        cc1 = cc2 = 0
452    elif zero_correction == "tac":
453        # treatment arm continuity correction Ruecker et al 2009, section 3.2
454        nobs_t = nobs1 + nobs2
455        cc1 = nobs2 / nobs_t
456        cc2 = nobs1 / nobs_t
457    elif zero_correction == "clip":
458        clip_bounds = zero_kwds.get("clip_bounds", (1e-6, 1 - 1e-6))
459        cc1 = cc2 = 0
460    elif zero_correction:
461        # TODO: check is float_like
462        cc1 = cc2 = zero_correction
463    else:
464        msg = "zero_correction not recognized or supported"
465        raise NotImplementedError(msg)
466
467    zero_mask1 = (count1 == 0) | (count1 == nobs1)
468    zero_mask2 = (count2 == 0) | (count2 == nobs2)
469    zmask = np.logical_or(zero_mask1, zero_mask2)
470    n1 = nobs1 + (cc1 + cc2) * zmask
471    n2 = nobs2 + (cc1 + cc2) * zmask
472    p1 = (count1 + cc1) / (n1)
473    p2 = (count2 + cc2) / (n2)
474
475    if zero_correction == "clip":
476        p1 = np.clip(p1, *clip_bounds)
477        p2 = np.clip(p2, *clip_bounds)
478
479    if statistic in ["diff", "rd"]:
480        rd = p1 - p2
481        rd_var = p1 * (1 - p1) / n1 + p2 * (1 - p2) / n2
482        eff = rd
483        var_eff = rd_var
484    elif statistic in ["risk-ratio", "rr"]:
485        # rr = p1 / p2
486        log_rr = np.log(p1) - np.log(p2)
487        log_rr_var = (1 - p1) / p1 / n1 + (1 - p2) / p2 / n2
488        eff = log_rr
489        var_eff = log_rr_var
490    elif statistic in ["odds-ratio", "or"]:
491        # or_ = p1 / (1 - p1) * (1 - p2) / p2
492        log_or = np.log(p1) - np.log(1 - p1) - np.log(p2) + np.log(1 - p2)
493        log_or_var = 1 / (p1 * (1 - p1) * n1) + 1 / (p2 * (1 - p2) * n2)
494        eff = log_or
495        var_eff = log_or_var
496    elif statistic in ["arcsine", "arcsin", "as"]:
497        as_ = np.arcsin(np.sqrt(p1)) - np.arcsin(np.sqrt(p2))
498        as_var = (1 / n1 + 1 / n2) / 4
499        eff = as_
500        var_eff = as_var
501    else:
502        msg = 'statistic not recognized, use one of "rd", "rr", "or", "as"'
503        raise NotImplementedError(msg)
504
505    return eff, var_eff
506
507
508def combine_effects(effect, variance, method_re="iterated", row_names=None,
509                    use_t=False, alpha=0.05, **kwds):
510    """combining effect sizes for effect sizes using meta-analysis
511
512    This currently does not use np.asarray, all computations are possible in
513    pandas.
514
515    Parameters
516    ----------
517    effect : array
518        mean of effect size measure for all samples
519    variance : array
520        variance of mean or effect size measure for all samples
521    method_re : {"iterated", "chi2"}
522        method that is use to compute the between random effects variance
523        "iterated" or "pm" uses Paule and Mandel method to iteratively
524        estimate the random effects variance. Options for the iteration can
525        be provided in the ``kwds``
526        "chi2" or "dl" uses DerSimonian and Laird one-step estimator.
527    row_names : list of strings (optional)
528        names for samples or studies, will be included in results summary and
529        table.
530    alpha : float in (0, 1)
531        significance level, default is 0.05, for the confidence intervals
532
533    Returns
534    -------
535    results : CombineResults
536        Contains estimation results and intermediate statistics, and includes
537        a method to return a summary table.
538        Statistics from intermediate calculations might be removed at a later
539        time.
540
541    Notes
542    -----
543    Status: Basic functionality is verified, mainly compared to R metafor
544    package. However, API might still change.
545
546    This computes both fixed effects and random effects estimates. The
547    random effects results depend on the method to estimate the RE variance.
548
549    Scale estimate
550    In fixed effects models and in random effects models without fully
551    iterated random effects variance, the model will in general not account
552    for all residual variance. Traditional meta-analysis uses a fixed
553    scale equal to 1, that might not produce test statistics and
554    confidence intervals with the correct size. Estimating the scale to account
555    for residual variance often improves the small sample properties of
556    inference and confidence intervals.
557    This adjustment to the standard errors is often referred to as HKSJ
558    method based attributed to Hartung and Knapp and Sidik and Jonkman.
559    However, this is equivalent to estimating the scale in WLS.
560    The results instance includes both, fixed scale and estimated scale
561    versions of standard errors and confidence intervals.
562
563    References
564    ----------
565    Borenstein, Michael. 2009. Introduction to Meta-Analysis.
566        Chichester: Wiley.
567
568    Chen, Ding-Geng, and Karl E. Peace. 2013. Applied Meta-Analysis with R.
569        Chapman & Hall/CRC Biostatistics Series.
570        Boca Raton: CRC Press/Taylor & Francis Group.
571
572    """
573
574    k = len(effect)
575    if row_names is None:
576        row_names = list(range(k))
577    crit = stats.norm.isf(alpha / 2)
578
579    # alias for initial version
580    eff = effect
581    var_eff = variance
582    sd_eff = np.sqrt(var_eff)
583
584    # fixed effects computation
585
586    weights_fe = 1 / var_eff  # no bias correction ?
587    w_total_fe = weights_fe.sum(0)
588    weights_rel_fe = weights_fe / w_total_fe
589
590    eff_w_fe = weights_rel_fe * eff
591    mean_effect_fe = eff_w_fe.sum()
592    var_eff_w_fe = 1 / w_total_fe
593    sd_eff_w_fe = np.sqrt(var_eff_w_fe)
594
595    # random effects computation
596
597    q = (weights_fe * eff**2).sum(0)
598    q -= (weights_fe * eff).sum()**2 / w_total_fe
599    df = k - 1
600
601    if method_re.lower() in ["iterated", "pm"]:
602        tau2, _ = _fit_tau_iterative(eff, var_eff, **kwds)
603    elif method_re.lower() in ["chi2", "dl"]:
604        c = w_total_fe - (weights_fe**2).sum() / w_total_fe
605        tau2 = (q - df) / c
606    else:
607        raise ValueError('method_re should be "iterated" or "chi2"')
608
609    weights_re = 1 / (var_eff + tau2)  # no  bias_correction ?
610    w_total_re = weights_re.sum(0)
611    weights_rel_re = weights_re / weights_re.sum(0)
612
613    eff_w_re = weights_rel_re * eff
614    mean_effect_re = eff_w_re.sum()
615    var_eff_w_re = 1 / w_total_re
616    sd_eff_w_re = np.sqrt(var_eff_w_re)
617    # ci_low_eff_re = mean_effect_re - crit * sd_eff_w_re
618    # ci_upp_eff_re = mean_effect_re + crit * sd_eff_w_re
619
620    scale_hksj_re = (weights_re * (eff - mean_effect_re)**2).sum() / df
621    scale_hksj_fe = (weights_fe * (eff - mean_effect_fe)**2).sum() / df
622    var_hksj_re = (weights_rel_re * (eff - mean_effect_re)**2).sum() / df
623    var_hksj_fe = (weights_rel_fe * (eff - mean_effect_fe)**2).sum() / df
624
625    res = CombineResults(**locals())
626    return res
627
628
629def _fit_tau_iterative(eff, var_eff, tau2_start=0, atol=1e-5, maxiter=50):
630    """Paule-Mandel iterative estimate of between random effect variance
631
632    implementation follows DerSimonian and Kacker 2007 Appendix 8
633    see also Kacker 2004
634
635    Parameters
636    ----------
637    eff : ndarray
638        effect sizes
639    var_eff : ndarray
640        variance of effect sizes
641    tau2_start : float
642        starting value for iteration
643    atol : float, default: 1e-5
644        convergence tolerance for absolute value of estimating equation
645    maxiter : int
646        maximum number of iterations
647
648    Returns
649    -------
650    tau2 : float
651        estimate of random effects variance tau squared
652    converged : bool
653        True if iteration has converged.
654
655    """
656    tau2 = tau2_start
657    k = eff.shape[0]
658    converged = False
659    for i in range(maxiter):
660        w = 1 / (var_eff + tau2)
661        m = w.dot(eff) / w.sum(0)
662        resid_sq = (eff - m)**2
663        q_w = w.dot(resid_sq)
664        # estimating equation
665        ee = q_w - (k - 1)
666        if ee < 0:
667            tau2 = 0
668            converged = 0
669            break
670        if np.allclose(ee, 0, atol=atol):
671            converged = True
672            break
673        # update tau2
674        delta = ee / (w**2).dot(resid_sq)
675        tau2 += delta
676
677    return tau2, converged
678
679
680def _fit_tau_mm(eff, var_eff, weights):
681    """one-step method of moment estimate of between random effect variance
682
683    implementation follows Kacker 2004 and DerSimonian and Kacker 2007 eq. 6
684
685    Parameters
686    ----------
687    eff : ndarray
688        effect sizes
689    var_eff : ndarray
690        variance of effect sizes
691    weights : ndarray
692        weights for estimating overall weighted mean
693
694    Returns
695    -------
696    tau2 : float
697        estimate of random effects variance tau squared
698
699    """
700    w = weights
701
702    m = w.dot(eff) / w.sum(0)
703    resid_sq = (eff - m)**2
704    q_w = w.dot(resid_sq)
705    w_t = w.sum()
706    expect = w.dot(var_eff) - (w**2).dot(var_eff) / w_t
707    denom = w_t - (w**2).sum() / w_t
708    # moment estimate from estimating equation
709    tau2 = (q_w - expect) / denom
710
711    return tau2
712
713
714def _fit_tau_iter_mm(eff, var_eff, tau2_start=0, atol=1e-5, maxiter=50):
715    """iterated method of moment estimate of between random effect variance
716
717    This repeatedly estimates tau, updating weights in each iteration
718    see two-step estimators in DerSimonian and Kacker 2007
719
720    Parameters
721    ----------
722    eff : ndarray
723        effect sizes
724    var_eff : ndarray
725        variance of effect sizes
726    tau2_start : float
727        starting value for iteration
728    atol : float, default: 1e-5
729        convergence tolerance for change in tau2 estimate between iterations
730    maxiter : int
731        maximum number of iterations
732
733    Returns
734    -------
735    tau2 : float
736        estimate of random effects variance tau squared
737    converged : bool
738        True if iteration has converged.
739
740    """
741    tau2 = tau2_start
742    converged = False
743    for _ in range(maxiter):
744        w = 1 / (var_eff + tau2)
745
746        tau2_new = _fit_tau_mm(eff, var_eff, w)
747        tau2_new = max(0, tau2_new)
748
749        delta = tau2_new - tau2
750        if np.allclose(delta, 0, atol=atol):
751            converged = True
752            break
753
754        tau2 = tau2_new
755
756    return tau2, converged
757