1# -*- coding: utf-8 -*- 2""" 3Created on Thu Apr 2 14:34:25 2020 4 5Author: Josef Perktold 6License: BSD-3 7 8""" 9 10import numpy as np 11import pandas as pd 12from scipy import stats 13 14from statsmodels.stats.base import HolderTuple 15 16 17class CombineResults(object): 18 """Results from combined estimate of means or effect sizes 19 20 This currently includes intermediate results that might be removed 21 """ 22 23 def __init__(self, **kwds): 24 self.__dict__.update(kwds) 25 self._ini_keys = list(kwds.keys()) 26 27 self.df_resid = self.k - 1 28 29 # TODO: move to property ? 30 self.sd_eff_w_fe_hksj = np.sqrt(self.var_hksj_fe) 31 self.sd_eff_w_re_hksj = np.sqrt(self.var_hksj_re) 32 33 # explained variance measures 34 self.h2 = self.q / (self.k - 1) 35 self.i2 = 1 - 1 / self.h2 36 37 # memoize ci_samples 38 self.cache_ci = {} 39 40 def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None, 41 ci_func=None): 42 """confidence intervals for the effect size estimate of samples 43 44 Additional information needs to be provided for confidence intervals 45 that are not based on normal distribution using available variance. 46 This is likely to change in future. 47 48 Parameters 49 ---------- 50 alpha : float in (0, 1) 51 Significance level for confidence interval. Nominal coverage is 52 ``1 - alpha``. 53 use_t : None or bool 54 If use_t is None, then the attribute `use_t` determines whether 55 normal or t-distribution is used for confidence intervals. 56 Specifying use_t overrides the attribute. 57 If use_t is false, then confidence intervals are based on the 58 normal distribution. If it is true, then the t-distribution is 59 used. 60 nobs : None or float 61 Number of observations used for degrees of freedom computation. 62 Only used if use_t is true. 63 ci_func : None or callable 64 User provided function to compute confidence intervals. 65 This is not used yet and will allow using non-standard confidence 66 intervals. 67 68 Returns 69 ------- 70 ci_eff : tuple of ndarrays 71 Tuple (ci_low, ci_upp) with confidence interval computed for each 72 sample. 73 74 Notes 75 ----- 76 CombineResults currently only has information from the combine_effects 77 function, which does not provide details about individual samples. 78 """ 79 # this is a bit messy, we don't have enough information about 80 # computing conf_int already in results for other than normal 81 # TODO: maybe there is a better 82 if (alpha, use_t) in self.cache_ci: 83 return self.cache_ci[(alpha, use_t)] 84 85 if use_t is None: 86 use_t = self.use_t 87 88 if ci_func is not None: 89 kwds = {"use_t": use_t} if use_t is not None else {} 90 ci_eff = ci_func(alpha=alpha, **kwds) 91 self.ci_sample_distr = "ci_func" 92 else: 93 if use_t is False: 94 crit = stats.norm.isf(alpha / 2) 95 self.ci_sample_distr = "normal" 96 else: 97 if nobs is not None: 98 df_resid = nobs - 1 99 crit = stats.t.isf(alpha / 2, df_resid) 100 self.ci_sample_distr = "t" 101 else: 102 msg = ("`use_t=True` requires `nobs` for each sample " 103 "or `ci_func`. Using normal distribution for " 104 "confidence interval of individual samples.") 105 import warnings 106 warnings.warn(msg) 107 crit = stats.norm.isf(alpha / 2) 108 self.ci_sample_distr = "normal" 109 110 # sgn = np.asarray([-1, 1]) 111 # ci_eff = self.eff + sgn * crit * self.sd_eff 112 ci_low = self.eff - crit * self.sd_eff 113 ci_upp = self.eff + crit * self.sd_eff 114 ci_eff = (ci_low, ci_upp) 115 116 # if (alpha, use_t) not in self.cache_ci: # not needed 117 self.cache_ci[(alpha, use_t)] = ci_eff 118 return ci_eff 119 120 def conf_int(self, alpha=0.05, use_t=None): 121 """confidence interval for the overall mean estimate 122 123 Parameters 124 ---------- 125 alpha : float in (0, 1) 126 Significance level for confidence interval. Nominal coverage is 127 ``1 - alpha``. 128 use_t : None or bool 129 If use_t is None, then the attribute `use_t` determines whether 130 normal or t-distribution is used for confidence intervals. 131 Specifying use_t overrides the attribute. 132 If use_t is false, then confidence intervals are based on the 133 normal distribution. If it is true, then the t-distribution is 134 used. 135 136 Returns 137 ------- 138 ci_eff_fe : tuple of floats 139 Confidence interval for mean effects size based on fixed effects 140 model with scale=1. 141 ci_eff_re : tuple of floats 142 Confidence interval for mean effects size based on random effects 143 model with scale=1 144 ci_eff_fe_wls : tuple of floats 145 Confidence interval for mean effects size based on fixed effects 146 model with estimated scale corresponding to WLS, ie. HKSJ. 147 ci_eff_re_wls : tuple of floats 148 Confidence interval for mean effects size based on random effects 149 model with estimated scale corresponding to WLS, ie. HKSJ. 150 If random effects method is fully iterated, i.e. Paule-Mandel, then 151 the estimated scale is 1. 152 153 """ 154 if use_t is None: 155 use_t = self.use_t 156 157 if use_t is False: 158 crit = stats.norm.isf(alpha / 2) 159 else: 160 crit = stats.t.isf(alpha / 2, self.df_resid) 161 162 sgn = np.asarray([-1, 1]) 163 m_fe = self.mean_effect_fe 164 m_re = self.mean_effect_re 165 ci_eff_fe = m_fe + sgn * crit * self.sd_eff_w_fe 166 ci_eff_re = m_re + sgn * crit * self.sd_eff_w_re 167 168 ci_eff_fe_wls = m_fe + sgn * crit * np.sqrt(self.var_hksj_fe) 169 ci_eff_re_wls = m_re + sgn * crit * np.sqrt(self.var_hksj_re) 170 171 return ci_eff_fe, ci_eff_re, ci_eff_fe_wls, ci_eff_re_wls 172 173 def test_homogeneity(self): 174 """Test whether the means of all samples are the same 175 176 currently no options, test uses chisquare distribution 177 default might change depending on `use_t` 178 179 Returns 180 ------- 181 res : HolderTuple instance 182 The results include the following attributes: 183 184 - statistic : float 185 Test statistic, ``q`` in meta-analysis, this is the 186 pearson_chi2 statistic for the fixed effects model. 187 - pvalue : float 188 P-value based on chisquare distribution. 189 - df : float 190 Degrees of freedom, equal to number of studies or samples 191 minus 1. 192 """ 193 pvalue = stats.chi2.sf(self.q, self.k - 1) 194 res = HolderTuple(statistic=self.q, 195 pvalue=pvalue, 196 df=self.k - 1, 197 distr="chi2") 198 return res 199 200 def summary_array(self, alpha=0.05, use_t=None): 201 """Create array with sample statistics and mean estimates 202 203 Parameters 204 ---------- 205 alpha : float in (0, 1) 206 Significance level for confidence interval. Nominal coverage is 207 ``1 - alpha``. 208 use_t : None or bool 209 If use_t is None, then the attribute `use_t` determines whether 210 normal or t-distribution is used for confidence intervals. 211 Specifying use_t overrides the attribute. 212 If use_t is false, then confidence intervals are based on the 213 normal distribution. If it is true, then the t-distribution is 214 used. 215 216 Returns 217 ------- 218 res : ndarray 219 Array with columns 220 ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"]. 221 Rows include statistics for samples and estimates of overall mean. 222 column_names : list of str 223 The names for the columns, used when creating summary DataFrame. 224 """ 225 226 ci_low, ci_upp = self.conf_int_samples(alpha=alpha, use_t=use_t) 227 res = np.column_stack([self.eff, self.sd_eff, 228 ci_low, ci_upp, 229 self.weights_rel_fe, self.weights_rel_re]) 230 231 ci = self.conf_int(alpha=alpha, use_t=use_t) 232 res_fe = [[self.mean_effect_fe, self.sd_eff_w_fe, 233 ci[0][0], ci[0][1], 1, np.nan]] 234 res_re = [[self.mean_effect_re, self.sd_eff_w_re, 235 ci[1][0], ci[1][1], np.nan, 1]] 236 res_fe_wls = [[self.mean_effect_fe, self.sd_eff_w_fe_hksj, 237 ci[2][0], ci[2][1], 1, np.nan]] 238 res_re_wls = [[self.mean_effect_re, self.sd_eff_w_re_hksj, 239 ci[3][0], ci[3][1], np.nan, 1]] 240 241 res = np.concatenate([res, res_fe, res_re, res_fe_wls, res_re_wls], 242 axis=0) 243 column_names = ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe", "w_re"] 244 return res, column_names 245 246 def summary_frame(self, alpha=0.05, use_t=None): 247 """Create DataFrame with sample statistics and mean estimates 248 249 Parameters 250 ---------- 251 alpha : float in (0, 1) 252 Significance level for confidence interval. Nominal coverage is 253 ``1 - alpha``. 254 use_t : None or bool 255 If use_t is None, then the attribute `use_t` determines whether 256 normal or t-distribution is used for confidence intervals. 257 Specifying use_t overrides the attribute. 258 If use_t is false, then confidence intervals are based on the 259 normal distribution. If it is true, then the t-distribution is 260 used. 261 262 Returns 263 ------- 264 res : DataFrame 265 pandas DataFrame instance with columns 266 ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"]. 267 Rows include statistics for samples and estimates of overall mean. 268 269 """ 270 if use_t is None: 271 use_t = self.use_t 272 labels = (list(self.row_names) + 273 ["fixed effect", "random effect", 274 "fixed effect wls", "random effect wls"]) 275 res, col_names = self.summary_array(alpha=alpha, use_t=use_t) 276 results = pd.DataFrame(res, index=labels, columns=col_names) 277 return results 278 279 def plot_forest(self, alpha=0.05, use_t=None, use_exp=False, 280 ax=None, **kwds): 281 """Forest plot with means and confidence intervals 282 283 Parameters 284 ---------- 285 ax : None or matplotlib axis instance 286 If ax is provided, then the plot will be added to it. 287 alpha : float in (0, 1) 288 Significance level for confidence interval. Nominal coverage is 289 ``1 - alpha``. 290 use_t : None or bool 291 If use_t is None, then the attribute `use_t` determines whether 292 normal or t-distribution is used for confidence intervals. 293 Specifying use_t overrides the attribute. 294 If use_t is false, then confidence intervals are based on the 295 normal distribution. If it is true, then the t-distribution is 296 used. 297 use_exp : bool 298 If `use_exp` is True, then the effect size and confidence limits 299 will be exponentiated. This transform log-odds-ration into 300 odds-ratio, and similarly for risk-ratio. 301 ax : AxesSubplot, optional 302 If given, this axes is used to plot in instead of a new figure 303 being created. 304 kwds : optional keyword arguments 305 Keywords are forwarded to the dot_plot function that creates the 306 plot. 307 308 Returns 309 ------- 310 fig : Matplotlib figure instance 311 312 See Also 313 -------- 314 dot_plot 315 316 """ 317 from statsmodels.graphics.dotplots import dot_plot 318 res_df = self.summary_frame(alpha=alpha, use_t=use_t) 319 if use_exp: 320 res_df = np.exp(res_df[["eff", "ci_low", "ci_upp"]]) 321 hw = np.abs(res_df[["ci_low", "ci_upp"]] - res_df[["eff"]].values) 322 fig = dot_plot(points=res_df["eff"], intervals=hw, 323 lines=res_df.index, line_order=res_df.index, **kwds) 324 return fig 325 326 327def effectsize_smd(mean1, sd1, nobs1, mean2, sd2, nobs2): 328 """effect sizes for mean difference for use in meta-analysis 329 330 mean1, sd1, nobs1 are for treatment 331 mean2, sd2, nobs2 are for control 332 333 Effect sizes are computed for the mean difference ``mean1 - mean2`` 334 standardized by an estimate of the within variance. 335 336 This does not have option yet. 337 It uses standardized mean difference with bias correction as effect size. 338 339 This currently does not use np.asarray, all computations are possible in 340 pandas. 341 342 Parameters 343 ---------- 344 mean1 : array 345 mean of second sample, treatment groups 346 sd1 : array 347 standard deviation of residuals in treatment groups, within 348 nobs1 : array 349 number of observations in treatment groups 350 mean2, sd2, nobs2 : arrays 351 mean, standard deviation and number of observations of control groups 352 353 Returns 354 ------- 355 smd_bc : array 356 bias corrected estimate of standardized mean difference 357 var_smdbc : array 358 estimate of variance of smd_bc 359 360 Notes 361 ----- 362 Status: API will still change. This is currently intended for support of 363 meta-analysis. 364 365 References 366 ---------- 367 Borenstein, Michael. 2009. Introduction to Meta-Analysis. 368 Chichester: Wiley. 369 370 Chen, Ding-Geng, and Karl E. Peace. 2013. Applied Meta-Analysis with R. 371 Chapman & Hall/CRC Biostatistics Series. 372 Boca Raton: CRC Press/Taylor & Francis Group. 373 374 """ 375 # TODO: not used yet, design and options ? 376 # k = len(mean1) 377 # if row_names is None: 378 # row_names = list(range(k)) 379 # crit = stats.norm.isf(alpha / 2) 380 # var_diff_uneq = sd1**2 / nobs1 + sd2**2 / nobs2 381 var_diff = (sd1**2 * (nobs1 - 1) + 382 sd2**2 * (nobs2 - 1)) / (nobs1 + nobs2 - 2) 383 sd_diff = np.sqrt(var_diff) 384 nobs = nobs1 + nobs2 385 bias_correction = 1 - 3 / (4 * nobs - 9) 386 smd = (mean1 - mean2) / sd_diff 387 smd_bc = bias_correction * smd 388 var_smdbc = nobs / nobs1 / nobs2 + smd_bc**2 / 2 / (nobs - 3.94) 389 return smd_bc, var_smdbc 390 391 392def effectsize_2proportions(count1, nobs1, count2, nobs2, statistic="diff", 393 zero_correction=None, zero_kwds=None): 394 """Effects sizes for two sample binomial proportions 395 396 Parameters 397 ---------- 398 count1, nobs1, count2, nobs2 : array_like 399 data for two samples 400 statistic : {"diff", "odds-ratio", "risk-ratio", "arcsine"} 401 statistic for the comparison of two proportions 402 Effect sizes for "odds-ratio" and "risk-ratio" are in logarithm. 403 zero_correction : {None, float, "tac", "clip"} 404 Some statistics are not finite when zero counts are in the data. 405 The options to remove zeros are: 406 407 * float : if zero_correction is a single float, then it will be added 408 to all count (cells) if the sample has any zeros. 409 * "tac" : treatment arm continuity correction see Ruecker et al 2009, 410 section 3.2 411 * "clip" : clip proportions without adding a value to all cells 412 The clip bounds can be set with zero_kwds["clip_bounds"] 413 414 zero_kwds : dict 415 additional options to handle zero counts 416 "clip_bounds" tuple, default (1e-6, 1 - 1e-6) if zero_correction="clip" 417 other options not yet implemented 418 419 Returns 420 ------- 421 effect size : array 422 Effect size for each sample. 423 var_es : array 424 Estimate of variance of the effect size 425 426 Notes 427 ----- 428 Status: API is experimental, Options for zero handling is incomplete. 429 430 The names for ``statistics`` keyword can be shortened to "rd", "rr", "or" 431 and "as". 432 433 The statistics are defined as: 434 435 - risk difference = p1 - p2 436 - log risk ratio = log(p1 / p2) 437 - log odds_ratio = log(p1 / (1 - p1) * (1 - p2) / p2) 438 - arcsine-sqrt = arcsin(sqrt(p1)) - arcsin(sqrt(p2)) 439 440 where p1 and p2 are the estimated proportions in sample 1 (treatment) and 441 sample 2 (control). 442 443 log-odds-ratio and log-risk-ratio can be transformed back to ``or`` and 444 `rr` using `exp` function. 445 446 See Also 447 -------- 448 statsmodels.stats.contingency_tables 449 """ 450 if zero_correction is None: 451 cc1 = cc2 = 0 452 elif zero_correction == "tac": 453 # treatment arm continuity correction Ruecker et al 2009, section 3.2 454 nobs_t = nobs1 + nobs2 455 cc1 = nobs2 / nobs_t 456 cc2 = nobs1 / nobs_t 457 elif zero_correction == "clip": 458 clip_bounds = zero_kwds.get("clip_bounds", (1e-6, 1 - 1e-6)) 459 cc1 = cc2 = 0 460 elif zero_correction: 461 # TODO: check is float_like 462 cc1 = cc2 = zero_correction 463 else: 464 msg = "zero_correction not recognized or supported" 465 raise NotImplementedError(msg) 466 467 zero_mask1 = (count1 == 0) | (count1 == nobs1) 468 zero_mask2 = (count2 == 0) | (count2 == nobs2) 469 zmask = np.logical_or(zero_mask1, zero_mask2) 470 n1 = nobs1 + (cc1 + cc2) * zmask 471 n2 = nobs2 + (cc1 + cc2) * zmask 472 p1 = (count1 + cc1) / (n1) 473 p2 = (count2 + cc2) / (n2) 474 475 if zero_correction == "clip": 476 p1 = np.clip(p1, *clip_bounds) 477 p2 = np.clip(p2, *clip_bounds) 478 479 if statistic in ["diff", "rd"]: 480 rd = p1 - p2 481 rd_var = p1 * (1 - p1) / n1 + p2 * (1 - p2) / n2 482 eff = rd 483 var_eff = rd_var 484 elif statistic in ["risk-ratio", "rr"]: 485 # rr = p1 / p2 486 log_rr = np.log(p1) - np.log(p2) 487 log_rr_var = (1 - p1) / p1 / n1 + (1 - p2) / p2 / n2 488 eff = log_rr 489 var_eff = log_rr_var 490 elif statistic in ["odds-ratio", "or"]: 491 # or_ = p1 / (1 - p1) * (1 - p2) / p2 492 log_or = np.log(p1) - np.log(1 - p1) - np.log(p2) + np.log(1 - p2) 493 log_or_var = 1 / (p1 * (1 - p1) * n1) + 1 / (p2 * (1 - p2) * n2) 494 eff = log_or 495 var_eff = log_or_var 496 elif statistic in ["arcsine", "arcsin", "as"]: 497 as_ = np.arcsin(np.sqrt(p1)) - np.arcsin(np.sqrt(p2)) 498 as_var = (1 / n1 + 1 / n2) / 4 499 eff = as_ 500 var_eff = as_var 501 else: 502 msg = 'statistic not recognized, use one of "rd", "rr", "or", "as"' 503 raise NotImplementedError(msg) 504 505 return eff, var_eff 506 507 508def combine_effects(effect, variance, method_re="iterated", row_names=None, 509 use_t=False, alpha=0.05, **kwds): 510 """combining effect sizes for effect sizes using meta-analysis 511 512 This currently does not use np.asarray, all computations are possible in 513 pandas. 514 515 Parameters 516 ---------- 517 effect : array 518 mean of effect size measure for all samples 519 variance : array 520 variance of mean or effect size measure for all samples 521 method_re : {"iterated", "chi2"} 522 method that is use to compute the between random effects variance 523 "iterated" or "pm" uses Paule and Mandel method to iteratively 524 estimate the random effects variance. Options for the iteration can 525 be provided in the ``kwds`` 526 "chi2" or "dl" uses DerSimonian and Laird one-step estimator. 527 row_names : list of strings (optional) 528 names for samples or studies, will be included in results summary and 529 table. 530 alpha : float in (0, 1) 531 significance level, default is 0.05, for the confidence intervals 532 533 Returns 534 ------- 535 results : CombineResults 536 Contains estimation results and intermediate statistics, and includes 537 a method to return a summary table. 538 Statistics from intermediate calculations might be removed at a later 539 time. 540 541 Notes 542 ----- 543 Status: Basic functionality is verified, mainly compared to R metafor 544 package. However, API might still change. 545 546 This computes both fixed effects and random effects estimates. The 547 random effects results depend on the method to estimate the RE variance. 548 549 Scale estimate 550 In fixed effects models and in random effects models without fully 551 iterated random effects variance, the model will in general not account 552 for all residual variance. Traditional meta-analysis uses a fixed 553 scale equal to 1, that might not produce test statistics and 554 confidence intervals with the correct size. Estimating the scale to account 555 for residual variance often improves the small sample properties of 556 inference and confidence intervals. 557 This adjustment to the standard errors is often referred to as HKSJ 558 method based attributed to Hartung and Knapp and Sidik and Jonkman. 559 However, this is equivalent to estimating the scale in WLS. 560 The results instance includes both, fixed scale and estimated scale 561 versions of standard errors and confidence intervals. 562 563 References 564 ---------- 565 Borenstein, Michael. 2009. Introduction to Meta-Analysis. 566 Chichester: Wiley. 567 568 Chen, Ding-Geng, and Karl E. Peace. 2013. Applied Meta-Analysis with R. 569 Chapman & Hall/CRC Biostatistics Series. 570 Boca Raton: CRC Press/Taylor & Francis Group. 571 572 """ 573 574 k = len(effect) 575 if row_names is None: 576 row_names = list(range(k)) 577 crit = stats.norm.isf(alpha / 2) 578 579 # alias for initial version 580 eff = effect 581 var_eff = variance 582 sd_eff = np.sqrt(var_eff) 583 584 # fixed effects computation 585 586 weights_fe = 1 / var_eff # no bias correction ? 587 w_total_fe = weights_fe.sum(0) 588 weights_rel_fe = weights_fe / w_total_fe 589 590 eff_w_fe = weights_rel_fe * eff 591 mean_effect_fe = eff_w_fe.sum() 592 var_eff_w_fe = 1 / w_total_fe 593 sd_eff_w_fe = np.sqrt(var_eff_w_fe) 594 595 # random effects computation 596 597 q = (weights_fe * eff**2).sum(0) 598 q -= (weights_fe * eff).sum()**2 / w_total_fe 599 df = k - 1 600 601 if method_re.lower() in ["iterated", "pm"]: 602 tau2, _ = _fit_tau_iterative(eff, var_eff, **kwds) 603 elif method_re.lower() in ["chi2", "dl"]: 604 c = w_total_fe - (weights_fe**2).sum() / w_total_fe 605 tau2 = (q - df) / c 606 else: 607 raise ValueError('method_re should be "iterated" or "chi2"') 608 609 weights_re = 1 / (var_eff + tau2) # no bias_correction ? 610 w_total_re = weights_re.sum(0) 611 weights_rel_re = weights_re / weights_re.sum(0) 612 613 eff_w_re = weights_rel_re * eff 614 mean_effect_re = eff_w_re.sum() 615 var_eff_w_re = 1 / w_total_re 616 sd_eff_w_re = np.sqrt(var_eff_w_re) 617 # ci_low_eff_re = mean_effect_re - crit * sd_eff_w_re 618 # ci_upp_eff_re = mean_effect_re + crit * sd_eff_w_re 619 620 scale_hksj_re = (weights_re * (eff - mean_effect_re)**2).sum() / df 621 scale_hksj_fe = (weights_fe * (eff - mean_effect_fe)**2).sum() / df 622 var_hksj_re = (weights_rel_re * (eff - mean_effect_re)**2).sum() / df 623 var_hksj_fe = (weights_rel_fe * (eff - mean_effect_fe)**2).sum() / df 624 625 res = CombineResults(**locals()) 626 return res 627 628 629def _fit_tau_iterative(eff, var_eff, tau2_start=0, atol=1e-5, maxiter=50): 630 """Paule-Mandel iterative estimate of between random effect variance 631 632 implementation follows DerSimonian and Kacker 2007 Appendix 8 633 see also Kacker 2004 634 635 Parameters 636 ---------- 637 eff : ndarray 638 effect sizes 639 var_eff : ndarray 640 variance of effect sizes 641 tau2_start : float 642 starting value for iteration 643 atol : float, default: 1e-5 644 convergence tolerance for absolute value of estimating equation 645 maxiter : int 646 maximum number of iterations 647 648 Returns 649 ------- 650 tau2 : float 651 estimate of random effects variance tau squared 652 converged : bool 653 True if iteration has converged. 654 655 """ 656 tau2 = tau2_start 657 k = eff.shape[0] 658 converged = False 659 for i in range(maxiter): 660 w = 1 / (var_eff + tau2) 661 m = w.dot(eff) / w.sum(0) 662 resid_sq = (eff - m)**2 663 q_w = w.dot(resid_sq) 664 # estimating equation 665 ee = q_w - (k - 1) 666 if ee < 0: 667 tau2 = 0 668 converged = 0 669 break 670 if np.allclose(ee, 0, atol=atol): 671 converged = True 672 break 673 # update tau2 674 delta = ee / (w**2).dot(resid_sq) 675 tau2 += delta 676 677 return tau2, converged 678 679 680def _fit_tau_mm(eff, var_eff, weights): 681 """one-step method of moment estimate of between random effect variance 682 683 implementation follows Kacker 2004 and DerSimonian and Kacker 2007 eq. 6 684 685 Parameters 686 ---------- 687 eff : ndarray 688 effect sizes 689 var_eff : ndarray 690 variance of effect sizes 691 weights : ndarray 692 weights for estimating overall weighted mean 693 694 Returns 695 ------- 696 tau2 : float 697 estimate of random effects variance tau squared 698 699 """ 700 w = weights 701 702 m = w.dot(eff) / w.sum(0) 703 resid_sq = (eff - m)**2 704 q_w = w.dot(resid_sq) 705 w_t = w.sum() 706 expect = w.dot(var_eff) - (w**2).dot(var_eff) / w_t 707 denom = w_t - (w**2).sum() / w_t 708 # moment estimate from estimating equation 709 tau2 = (q_w - expect) / denom 710 711 return tau2 712 713 714def _fit_tau_iter_mm(eff, var_eff, tau2_start=0, atol=1e-5, maxiter=50): 715 """iterated method of moment estimate of between random effect variance 716 717 This repeatedly estimates tau, updating weights in each iteration 718 see two-step estimators in DerSimonian and Kacker 2007 719 720 Parameters 721 ---------- 722 eff : ndarray 723 effect sizes 724 var_eff : ndarray 725 variance of effect sizes 726 tau2_start : float 727 starting value for iteration 728 atol : float, default: 1e-5 729 convergence tolerance for change in tau2 estimate between iterations 730 maxiter : int 731 maximum number of iterations 732 733 Returns 734 ------- 735 tau2 : float 736 estimate of random effects variance tau squared 737 converged : bool 738 True if iteration has converged. 739 740 """ 741 tau2 = tau2_start 742 converged = False 743 for _ in range(maxiter): 744 w = 1 / (var_eff + tau2) 745 746 tau2_new = _fit_tau_mm(eff, var_eff, w) 747 tau2_new = max(0, tau2_new) 748 749 delta = tau2_new - tau2 750 if np.allclose(delta, 0, atol=atol): 751 converged = True 752 break 753 754 tau2 = tau2_new 755 756 return tau2, converged 757