1'''functions to work with contrasts for multiple tests 2 3contrast matrices for comparing all pairs, all levels to reference level, ... 4extension to 2-way groups in progress 5 6TwoWay: class for bringing two-way analysis together and try out 7various helper functions 8 9 10Idea for second part 11- get all transformation matrices to move in between different full rank 12 parameterizations 13- standardize to one parameterization to get all interesting effects. 14 15- multivariate normal distribution 16 - exploit or expand what we have in LikelihoodResults, cov_params, f_test, 17 t_test, example: resols_dropf_full.cov_params(C2) 18 - connect to new multiple comparison for contrast matrices, based on 19 multivariate normal or t distribution (Hothorn, Bretz, Westfall) 20 21''' 22 23 24 25from numpy.testing import assert_equal 26 27import numpy as np 28 29#next 3 functions copied from multicomp.py 30 31def contrast_allpairs(nm): 32 '''contrast or restriction matrix for all pairs of nm variables 33 34 Parameters 35 ---------- 36 nm : int 37 38 Returns 39 ------- 40 contr : ndarray, 2d, (nm*(nm-1)/2, nm) 41 contrast matrix for all pairwise comparisons 42 43 ''' 44 contr = [] 45 for i in range(nm): 46 for j in range(i+1, nm): 47 contr_row = np.zeros(nm) 48 contr_row[i] = 1 49 contr_row[j] = -1 50 contr.append(contr_row) 51 return np.array(contr) 52 53def contrast_all_one(nm): 54 '''contrast or restriction matrix for all against first comparison 55 56 Parameters 57 ---------- 58 nm : int 59 60 Returns 61 ------- 62 contr : ndarray, 2d, (nm-1, nm) 63 contrast matrix for all against first comparisons 64 65 ''' 66 contr = np.column_stack((np.ones(nm-1), -np.eye(nm-1))) 67 return contr 68 69def contrast_diff_mean(nm): 70 '''contrast or restriction matrix for all against mean comparison 71 72 Parameters 73 ---------- 74 nm : int 75 76 Returns 77 ------- 78 contr : ndarray, 2d, (nm-1, nm) 79 contrast matrix for all against mean comparisons 80 81 ''' 82 return np.eye(nm) - np.ones((nm,nm))/nm 83 84def signstr(x, noplus=False): 85 if x in [-1,0,1]: 86 if not noplus: 87 return '+' if np.sign(x)>=0 else '-' 88 else: 89 return '' if np.sign(x)>=0 else '-' 90 else: 91 return str(x) 92 93 94def contrast_labels(contrasts, names, reverse=False): 95 if reverse: 96 sl = slice(None, None, -1) 97 else: 98 sl = slice(None) 99 labels = [''.join(['%s%s' % (signstr(c, noplus=True),v) 100 for c,v in zip(row, names)[sl] if c != 0]) 101 for row in contrasts] 102 return labels 103 104def contrast_product(names1, names2, intgroup1=None, intgroup2=None, pairs=False): 105 '''build contrast matrices for products of two categorical variables 106 107 this is an experimental script and should be converted to a class 108 109 Parameters 110 ---------- 111 names1, names2 : lists of strings 112 contains the list of level labels for each categorical variable 113 intgroup1, intgroup2 : ndarrays TODO: this part not tested, finished yet 114 categorical variable 115 116 117 Notes 118 ----- 119 This creates a full rank matrix. It does not do all pairwise comparisons, 120 parameterization is using contrast_all_one to get differences with first 121 level. 122 123 ? does contrast_all_pairs work as a plugin to get all pairs ? 124 125 ''' 126 127 n1 = len(names1) 128 n2 = len(names2) 129 names_prod = ['%s_%s' % (i,j) for i in names1 for j in names2] 130 ee1 = np.zeros((1,n1)) 131 ee1[0,0] = 1 132 if not pairs: 133 dd = np.r_[ee1, -contrast_all_one(n1)] 134 else: 135 dd = np.r_[ee1, -contrast_allpairs(n1)] 136 137 contrast_prod = np.kron(dd[1:], np.eye(n2)) 138 names_contrast_prod0 = contrast_labels(contrast_prod, names_prod, reverse=True) 139 names_contrast_prod = [''.join(['%s%s' % (signstr(c, noplus=True),v) 140 for c,v in zip(row, names_prod)[::-1] if c != 0]) 141 for row in contrast_prod] 142 143 ee2 = np.zeros((1,n2)) 144 ee2[0,0] = 1 145 #dd2 = np.r_[ee2, -contrast_all_one(n2)] 146 if not pairs: 147 dd2 = np.r_[ee2, -contrast_all_one(n2)] 148 else: 149 dd2 = np.r_[ee2, -contrast_allpairs(n2)] 150 151 contrast_prod2 = np.kron(np.eye(n1), dd2[1:]) 152 names_contrast_prod2 = [''.join(['%s%s' % (signstr(c, noplus=True),v) 153 for c,v in zip(row, names_prod)[::-1] if c != 0]) 154 for row in contrast_prod2] 155 156 if (intgroup1 is not None) and (intgroup1 is not None): 157 d1, _ = dummy_1d(intgroup1) 158 d2, _ = dummy_1d(intgroup2) 159 dummy = dummy_product(d1, d2) 160 else: 161 dummy = None 162 163 return (names_prod, contrast_prod, names_contrast_prod, 164 contrast_prod2, names_contrast_prod2, dummy) 165 166 167 168 169 170def dummy_1d(x, varname=None): 171 '''dummy variable for id integer groups 172 173 Parameters 174 ---------- 175 x : ndarray, 1d 176 categorical variable, requires integers if varname is None 177 varname : str 178 name of the variable used in labels for category levels 179 180 Returns 181 ------- 182 dummy : ndarray, 2d 183 array of dummy variables, one column for each level of the 184 category (full set) 185 labels : list[str] 186 labels for the columns, i.e. levels of each category 187 188 189 Notes 190 ----- 191 use tools.categorical instead for more more options 192 193 See Also 194 -------- 195 statsmodels.tools.categorical 196 197 Examples 198 -------- 199 >>> x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'], 200 dtype='|S1') 201 >>> dummy_1d(x, varname='gender') 202 (array([[1, 0], 203 [1, 0], 204 [0, 1], 205 [0, 1], 206 [1, 0], 207 [1, 0], 208 [0, 1], 209 [0, 1], 210 [1, 0], 211 [1, 0], 212 [0, 1], 213 [0, 1]]), ['gender_F', 'gender_M']) 214 215 ''' 216 if varname is None: #assumes integer 217 labels = ['level_%d' % i for i in range(x.max() + 1)] 218 return (x[:,None]==np.arange(x.max()+1)).astype(int), labels 219 else: 220 grouplabels = np.unique(x) 221 labels = [varname + '_%s' % str(i) for i in grouplabels] 222 return (x[:,None]==grouplabels).astype(int), labels 223 224 225def dummy_product(d1, d2, method='full'): 226 '''dummy variable from product of two dummy variables 227 228 Parameters 229 ---------- 230 d1, d2 : ndarray 231 two dummy variables, assumes full set for methods 'drop-last' 232 and 'drop-first' 233 method : {'full', 'drop-last', 'drop-first'} 234 'full' returns the full product, encoding of intersection of 235 categories. 236 The drop methods provide a difference dummy encoding: 237 (constant, main effects, interaction effects). The first or last columns 238 of the dummy variable (i.e. levels) are dropped to get full rank 239 dummy matrix. 240 241 Returns 242 ------- 243 dummy : ndarray 244 dummy variable for product, see method 245 246 ''' 247 248 if method == 'full': 249 dd = (d1[:,:,None]*d2[:,None,:]).reshape(d1.shape[0],-1) 250 elif method == 'drop-last': #same as SAS transreg 251 d12rl = dummy_product(d1[:,:-1], d2[:,:-1]) 252 dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,:-1],d12rl)) 253 #Note: dtype int should preserve dtype of d1 and d2 254 elif method == 'drop-first': 255 d12r = dummy_product(d1[:,1:], d2[:,1:]) 256 dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,1:],d12r)) 257 else: 258 raise ValueError('method not recognized') 259 260 return dd 261 262def dummy_limits(d): 263 '''start and endpoints of groups in a sorted dummy variable array 264 265 helper function for nested categories 266 267 Examples 268 -------- 269 >>> d1 = np.array([[1, 0, 0], 270 [1, 0, 0], 271 [1, 0, 0], 272 [1, 0, 0], 273 [0, 1, 0], 274 [0, 1, 0], 275 [0, 1, 0], 276 [0, 1, 0], 277 [0, 0, 1], 278 [0, 0, 1], 279 [0, 0, 1], 280 [0, 0, 1]]) 281 >>> dummy_limits(d1) 282 (array([0, 4, 8]), array([ 4, 8, 12])) 283 284 get group slices from an array 285 286 >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))] 287 [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8, 9, 10, 11])] 288 >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))] 289 [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8, 9, 10, 11])] 290 ''' 291 nobs, nvars = d.shape 292 start1, col1 = np.nonzero(np.diff(d,axis=0)==1) 293 end1, col1_ = np.nonzero(np.diff(d,axis=0)==-1) 294 cc = np.arange(nvars) 295 #print(cc, np.r_[[0], col1], np.r_[col1_, [nvars-1]] 296 if ((not (np.r_[[0], col1] == cc).all()) 297 or (not (np.r_[col1_, [nvars-1]] == cc).all())): 298 raise ValueError('dummy variable is not sorted') 299 300 start = np.r_[[0], start1+1] 301 end = np.r_[end1+1, [nobs]] 302 return start, end 303 304 305 306def dummy_nested(d1, d2, method='full'): 307 '''unfinished and incomplete mainly copy past dummy_product 308 dummy variable from product of two dummy variables 309 310 Parameters 311 ---------- 312 d1, d2 : ndarray 313 two dummy variables, d2 is assumed to be nested in d1 314 Assumes full set for methods 'drop-last' and 'drop-first'. 315 method : {'full', 'drop-last', 'drop-first'} 316 'full' returns the full product, which in this case is d2. 317 The drop methods provide an effects encoding: 318 (constant, main effects, subgroup effects). The first or last columns 319 of the dummy variable (i.e. levels) are dropped to get full rank 320 encoding. 321 322 Returns 323 ------- 324 dummy : ndarray 325 dummy variable for product, see method 326 327 ''' 328 if method == 'full': 329 return d2 330 331 start1, end1 = dummy_limits(d1) 332 start2, end2 = dummy_limits(d2) 333 first = np.in1d(start2, start1) 334 last = np.in1d(end2, end1) 335 equal = (first == last) 336 col_dropf = ~first*~equal 337 col_dropl = ~last*~equal 338 339 340 if method == 'drop-last': 341 d12rl = dummy_product(d1[:,:-1], d2[:,:-1]) 342 dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,col_dropl])) 343 #Note: dtype int should preserve dtype of d1 and d2 344 elif method == 'drop-first': 345 d12r = dummy_product(d1[:,1:], d2[:,1:]) 346 dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,col_dropf])) 347 else: 348 raise ValueError('method not recognized') 349 350 return dd, col_dropf, col_dropl 351 352 353class DummyTransform(object): 354 '''Conversion between full rank dummy encodings 355 356 357 y = X b + u 358 b = C a 359 a = C^{-1} b 360 361 y = X C a + u 362 363 define Z = X C, then 364 365 y = Z a + u 366 367 contrasts: 368 369 R_b b = r 370 371 R_a a = R_b C a = r 372 373 where R_a = R_b C 374 375 Here C is the transform matrix, with dot_left and dot_right as the main 376 methods, and the same for the inverse transform matrix, C^{-1} 377 378 Note: 379 - The class was mainly written to keep left and right straight. 380 - No checking is done. 381 - not sure yet if method names make sense 382 383 384 ''' 385 386 def __init__(self, d1, d2): 387 '''C such that d1 C = d2, with d1 = X, d2 = Z 388 389 should be (x, z) in arguments ? 390 ''' 391 self.transf_matrix = np.linalg.lstsq(d1, d2, rcond=-1)[0] 392 self.invtransf_matrix = np.linalg.lstsq(d2, d1, rcond=-1)[0] 393 394 def dot_left(self, a): 395 ''' b = C a 396 ''' 397 return np.dot(self.transf_matrix, a) 398 399 def dot_right(self, x): 400 ''' z = x C 401 ''' 402 return np.dot(x, self.transf_matrix) 403 404 def inv_dot_left(self, b): 405 ''' a = C^{-1} b 406 ''' 407 return np.dot(self.invtransf_matrix, b) 408 409 def inv_dot_right(self, z): 410 ''' x = z C^{-1} 411 ''' 412 return np.dot(z, self.invtransf_matrix) 413 414 415 416 417 418def groupmean_d(x, d): 419 '''groupmeans using dummy variables 420 421 Parameters 422 ---------- 423 x : array_like, ndim 424 data array, tested for 1,2 and 3 dimensions 425 d : ndarray, 1d 426 dummy variable, needs to have the same length 427 as x in axis 0. 428 429 Returns 430 ------- 431 groupmeans : ndarray, ndim-1 432 means for each group along axis 0, the levels 433 of the groups are the last axis 434 435 Notes 436 ----- 437 This will be memory intensive if there are many levels 438 in the categorical variable, i.e. many columns in the 439 dummy variable. In this case it is recommended to use 440 a more efficient version. 441 442 ''' 443 x = np.asarray(x) 444## if x.ndim == 1: 445## nvars = 1 446## else: 447 nvars = x.ndim + 1 448 sli = [slice(None)] + [None]*(nvars-2) + [slice(None)] 449 return (x[...,None] * d[sli]).sum(0)*1./d.sum(0) 450 451 452 453class TwoWay(object): 454 '''a wrapper class for two way anova type of analysis with OLS 455 456 457 currently mainly to bring things together 458 459 Notes 460 ----- 461 unclear: adding multiple test might assume block design or orthogonality 462 463 This estimates the full dummy version with OLS. 464 The drop first dummy representation can be recovered through the 465 transform method. 466 467 TODO: add more methods, tests, pairwise, multiple, marginal effects 468 try out what can be added for userfriendly access. 469 470 missing: ANOVA table 471 472 ''' 473 def __init__(self, endog, factor1, factor2, varnames=None): 474 self.nobs = factor1.shape[0] 475 if varnames is None: 476 vname1 = 'a' 477 vname2 = 'b' 478 else: 479 vname1, vname1 = varnames 480 481 self.d1, self.d1_labels = d1, d1_labels = dummy_1d(factor1, vname1) 482 self.d2, self.d2_labels = d2, d2_labels = dummy_1d(factor2, vname2) 483 self.nlevel1 = nlevel1 = d1.shape[1] 484 self.nlevel2 = nlevel2 = d2.shape[1] 485 486 487 #get product dummies 488 res = contrast_product(d1_labels, d2_labels) 489 prodlab, C1, C1lab, C2, C2lab, _ = res 490 self.prod_label, self.C1, self.C1_label, self.C2, self.C2_label, _ = res 491 dp_full = dummy_product(d1, d2, method='full') 492 dp_dropf = dummy_product(d1, d2, method='drop-first') 493 self.transform = DummyTransform(dp_full, dp_dropf) 494 495 #estimate the model 496 self.nvars = dp_full.shape[1] 497 self.exog = dp_full 498 self.resols = sm.OLS(endog, dp_full).fit() 499 self.params = self.resols.params 500 501 #get transformed parameters, (constant, main, interaction effect) 502 self.params_dropf = self.transform.inv_dot_left(self.params) 503 self.start_interaction = 1 + (nlevel1 - 1) + (nlevel2 - 1) 504 self.n_interaction = self.nvars - self.start_interaction 505 506 #convert to cached property 507 def r_nointer(self): 508 '''contrast/restriction matrix for no interaction 509 ''' 510 nia = self.n_interaction 511 R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia))) 512 #inter_direct = resols_full_dropf.tval[-nia:] 513 R_nointer_transf = self.transform.inv_dot_right(R_nointer) 514 self.R_nointer_transf = R_nointer_transf 515 return R_nointer_transf 516 517 def ttest_interaction(self): 518 '''ttests for no-interaction terms are zero 519 ''' 520 #use self.r_nointer instead 521 nia = self.n_interaction 522 R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia))) 523 #inter_direct = resols_full_dropf.tval[-nia:] 524 R_nointer_transf = self.transform.inv_dot_right(R_nointer) 525 self.R_nointer_transf = R_nointer_transf 526 t_res = self.resols.t_test(R_nointer_transf) 527 return t_res 528 529 def ftest_interaction(self): 530 '''ttests for no-interaction terms are zero 531 ''' 532 R_nointer_transf = self.r_nointer() 533 return self.resols.f_test(R_nointer_transf) 534 535 def ttest_conditional_effect(self, factorind): 536 if factorind == 1: 537 return self.resols.t_test(self.C1), self.C1_label 538 else: 539 return self.resols.t_test(self.C2), self.C2_label 540 541 def summary_coeff(self): 542 from statsmodels.iolib import SimpleTable 543 params_arr = self.params.reshape(self.nlevel1, self.nlevel2) 544 stubs = self.d1_labels 545 headers = self.d2_labels 546 title = 'Estimated Coefficients by factors' 547 table_fmt = dict( 548 data_fmts = ["%#10.4g"]*self.nlevel2) 549 return SimpleTable(params_arr, headers, stubs, title=title, 550 txt_fmt=table_fmt) 551 552 553# --------------- tests 554# TODO: several tests still missing, several are in the example with print 555 556class TestContrastTools(object): 557 558 def __init__(self): 559 self.v1name = ['a0', 'a1', 'a2'] 560 self.v2name = ['b0', 'b1'] 561 self.d1 = np.array([[1, 0, 0], 562 [1, 0, 0], 563 [1, 0, 0], 564 [1, 0, 0], 565 [0, 1, 0], 566 [0, 1, 0], 567 [0, 1, 0], 568 [0, 1, 0], 569 [0, 0, 1], 570 [0, 0, 1], 571 [0, 0, 1], 572 [0, 0, 1]]) 573 574 def test_dummy_1d(self): 575 x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'], 576 dtype='|S1') 577 d, labels = (np.array([[1, 0], 578 [1, 0], 579 [0, 1], 580 [0, 1], 581 [1, 0], 582 [1, 0], 583 [0, 1], 584 [0, 1], 585 [1, 0], 586 [1, 0], 587 [0, 1], 588 [0, 1]]), ['gender_F', 'gender_M']) 589 res_d, res_labels = dummy_1d(x, varname='gender') 590 assert_equal(res_d, d) 591 assert_equal(res_labels, labels) 592 593 def test_contrast_product(self): 594 res_cp = contrast_product(self.v1name, self.v2name) 595 res_t = [0]*6 596 res_t[0] = ['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1'] 597 res_t[1] = np.array([[-1., 0., 1., 0., 0., 0.], 598 [ 0., -1., 0., 1., 0., 0.], 599 [-1., 0., 0., 0., 1., 0.], 600 [ 0., -1., 0., 0., 0., 1.]]) 601 res_t[2] = ['a1_b0-a0_b0', 'a1_b1-a0_b1', 'a2_b0-a0_b0', 'a2_b1-a0_b1'] 602 res_t[3] = np.array([[-1., 1., 0., 0., 0., 0.], 603 [ 0., 0., -1., 1., 0., 0.], 604 [ 0., 0., 0., 0., -1., 1.]]) 605 res_t[4] = ['a0_b1-a0_b0', 'a1_b1-a1_b0', 'a2_b1-a2_b0'] 606 for ii in range(5): 607 np.testing.assert_equal(res_cp[ii], res_t[ii], err_msg=str(ii)) 608 609 def test_dummy_limits(self): 610 b,e = dummy_limits(self.d1) 611 assert_equal(b, np.array([0, 4, 8])) 612 assert_equal(e, np.array([ 4, 8, 12])) 613 614 615 616 617if __name__ == '__main__': 618 tt = TestContrastTools() 619 tt.test_contrast_product() 620 tt.test_dummy_1d() 621 tt.test_dummy_limits() 622 623 import statsmodels.api as sm 624 625 examples = ['small', 'large', None][1] 626 627 v1name = ['a0', 'a1', 'a2'] 628 v2name = ['b0', 'b1'] 629 res_cp = contrast_product(v1name, v2name) 630 print(res_cp) 631 632 y = np.arange(12) 633 x1 = np.arange(12)//4 634 x2 = np.arange(12)//2 % 2 635 636 if 'small' in examples: 637 d1, d1_labels = dummy_1d(x1) 638 d2, d2_labels = dummy_1d(x2) 639 640 641 if 'large' in examples: 642 x1 = np.repeat(x1, 5, axis=0) 643 x2 = np.repeat(x2, 5, axis=0) 644 645 nobs = x1.shape[0] 646 d1, d1_labels = dummy_1d(x1) 647 d2, d2_labels = dummy_1d(x2) 648 649 dd_full = dummy_product(d1, d2, method='full') 650 dd_dropl = dummy_product(d1, d2, method='drop-last') 651 dd_dropf = dummy_product(d1, d2, method='drop-first') 652 653 #Note: full parameterization of dummies is orthogonal 654 #np.eye(6)*10 in "large" example 655 print((np.dot(dd_full.T, dd_full) == np.diag(dd_full.sum(0))).all()) 656 657 #check that transforms work 658 #generate 3 data sets with the 3 different parameterizations 659 660 effect_size = [1., 0.01][1] 661 noise_scale = [0.001, 0.1][0] 662 noise = noise_scale * np.random.randn(nobs) 663 beta = effect_size * np.arange(1,7) 664 ydata_full = (dd_full * beta).sum(1) + noise 665 ydata_dropl = (dd_dropl * beta).sum(1) + noise 666 ydata_dropf = (dd_dropf * beta).sum(1) + noise 667 668 resols_full_full = sm.OLS(ydata_full, dd_full).fit() 669 resols_full_dropf = sm.OLS(ydata_full, dd_dropf).fit() 670 params_f_f = resols_full_full.params 671 params_f_df = resols_full_dropf.params 672 673 resols_dropf_full = sm.OLS(ydata_dropf, dd_full).fit() 674 resols_dropf_dropf = sm.OLS(ydata_dropf, dd_dropf).fit() 675 params_df_f = resols_dropf_full.params 676 params_df_df = resols_dropf_dropf.params 677 678 679 tr_of = np.linalg.lstsq(dd_dropf, dd_full, rcond=-1)[0] 680 tr_fo = np.linalg.lstsq(dd_full, dd_dropf, rcond=-1)[0] 681 print(np.dot(tr_fo, params_df_df) - params_df_f) 682 print(np.dot(tr_of, params_f_f) - params_f_df) 683 684 transf_f_df = DummyTransform(dd_full, dd_dropf) 685 print(np.max(np.abs((dd_full - transf_f_df.inv_dot_right(dd_dropf))))) 686 print(np.max(np.abs((dd_dropf - transf_f_df.dot_right(dd_full))))) 687 print(np.max(np.abs((params_df_df 688 - transf_f_df.inv_dot_left(params_df_f))))) 689 np.max(np.abs((params_f_df 690 - transf_f_df.inv_dot_left(params_f_f)))) 691 692 prodlab, C1, C1lab, C2, C2lab,_ = contrast_product(v1name, v2name) 693 694 print('\ntvalues for no effect of factor 1') 695 print('each test is conditional on a level of factor 2') 696 print(C1lab) 697 print(resols_dropf_full.t_test(C1).tvalue) 698 699 print('\ntvalues for no effect of factor 2') 700 print('each test is conditional on a level of factor 1') 701 print(C2lab) 702 print(resols_dropf_full.t_test(C2).tvalue) 703 704 #covariance matrix of restrictions C2, note: orthogonal 705 resols_dropf_full.cov_params(C2) 706 707 #testing for no interaction effect 708 R_noint = np.hstack((np.zeros((2,4)), np.eye(2))) 709 inter_direct = resols_full_dropf.tvalues[-2:] 710 inter_transf = resols_full_full.t_test(transf_f_df.inv_dot_right(R_noint)).tvalue 711 print(np.max(np.abs((inter_direct - inter_transf)))) 712 713 #now with class version 714 tw = TwoWay(ydata_dropf, x1, x2) 715 print(tw.ttest_interaction().tvalue) 716 print(tw.ttest_interaction().pvalue) 717 print(tw.ftest_interaction().fvalue) 718 print(tw.ftest_interaction().pvalue) 719 print(tw.ttest_conditional_effect(1)[0].tvalue) 720 print(tw.ttest_conditional_effect(2)[0].tvalue) 721 print(tw.summary_coeff()) 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739''' documentation for early examples while developing - some have changed already 740>>> y = np.arange(12) 741>>> y 742array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) 743>>> x1 = np.arange(12)//4 744>>> x1 745array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]) 746>>> x2 = np.arange(12)//2%2 747>>> x2 748array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]) 749 750>>> d1 = dummy_1d(x1) 751>>> d1 752array([[1, 0, 0], 753 [1, 0, 0], 754 [1, 0, 0], 755 [1, 0, 0], 756 [0, 1, 0], 757 [0, 1, 0], 758 [0, 1, 0], 759 [0, 1, 0], 760 [0, 0, 1], 761 [0, 0, 1], 762 [0, 0, 1], 763 [0, 0, 1]]) 764 765>>> d2 = dummy_1d(x2) 766>>> d2 767array([[1, 0], 768 [1, 0], 769 [0, 1], 770 [0, 1], 771 [1, 0], 772 [1, 0], 773 [0, 1], 774 [0, 1], 775 [1, 0], 776 [1, 0], 777 [0, 1], 778 [0, 1]]) 779 780>>> d12 = dummy_product(d1, d2) 781>>> d12 782array([[1, 0, 0, 0, 0, 0], 783 [1, 0, 0, 0, 0, 0], 784 [0, 1, 0, 0, 0, 0], 785 [0, 1, 0, 0, 0, 0], 786 [0, 0, 1, 0, 0, 0], 787 [0, 0, 1, 0, 0, 0], 788 [0, 0, 0, 1, 0, 0], 789 [0, 0, 0, 1, 0, 0], 790 [0, 0, 0, 0, 1, 0], 791 [0, 0, 0, 0, 1, 0], 792 [0, 0, 0, 0, 0, 1], 793 [0, 0, 0, 0, 0, 1]]) 794 795 796>>> d12rl = dummy_product(d1[:,:-1], d2[:,:-1]) 797>>> np.column_stack((np.ones(d1.shape[0]), d1[:,:-1], d2[:,:-1],d12rl)) 798array([[ 1., 1., 0., 1., 1., 0.], 799 [ 1., 1., 0., 1., 1., 0.], 800 [ 1., 1., 0., 0., 0., 0.], 801 [ 1., 1., 0., 0., 0., 0.], 802 [ 1., 0., 1., 1., 0., 1.], 803 [ 1., 0., 1., 1., 0., 1.], 804 [ 1., 0., 1., 0., 0., 0.], 805 [ 1., 0., 1., 0., 0., 0.], 806 [ 1., 0., 0., 1., 0., 0.], 807 [ 1., 0., 0., 1., 0., 0.], 808 [ 1., 0., 0., 0., 0., 0.], 809 [ 1., 0., 0., 0., 0., 0.]]) 810''' 811 812 813 814 815#nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']] 816#>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0]) 817# for row in np.kron(dd[1:], np.eye(2))] 818 819 820 821''' 822>>> nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']] 823>>> nprod 824['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1'] 825>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0]) for row in np.kron(dd[1:], np.eye(2))] 826['-a0b0+a1b0', '-a0b1+a1b1', '-a0b0+a2b0', '-a0b1+a2b1'] 827>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod)[::-1] if c != 0]) for row in np.kron(dd[1:], np.eye(2))] 828['+a1_b0-a0_b0', '+a1_b1-a0_b1', '+a2_b0-a0_b0', '+a2_b1-a0_b1'] 829 830>>> np.r_[[[1,0,0,0,0]],contrast_all_one(5)] 831array([[ 1., 0., 0., 0., 0.], 832 [ 1., -1., 0., 0., 0.], 833 [ 1., 0., -1., 0., 0.], 834 [ 1., 0., 0., -1., 0.], 835 [ 1., 0., 0., 0., -1.]]) 836 837>>> idxprod = [(i,j) for i in range(3) for j in range(2)] 838>>> idxprod 839[(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)] 840>>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,0] 841array([[0, 1, 2], 842 [0, 1, 2]]) 843>>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,1] 844array([[0, 0, 0], 845 [1, 1, 1]]) 846>>> dd3_ = np.r_[[[0,0,0]],contrast_all_one(3)] 847 848 849 850pairwise contrasts and reparameterization 851 852dd = np.r_[[[1,0,0,0,0]],-contrast_all_one(5)] 853>>> dd 854array([[ 1., 0., 0., 0., 0.], 855 [-1., 1., 0., 0., 0.], 856 [-1., 0., 1., 0., 0.], 857 [-1., 0., 0., 1., 0.], 858 [-1., 0., 0., 0., 1.]]) 859>>> np.dot(dd.T, np.arange(5)) 860array([-10., 1., 2., 3., 4.]) 861>>> np.round(np.linalg.inv(dd.T)).astype(int) 862array([[1, 1, 1, 1, 1], 863 [0, 1, 0, 0, 0], 864 [0, 0, 1, 0, 0], 865 [0, 0, 0, 1, 0], 866 [0, 0, 0, 0, 1]]) 867>>> np.round(np.linalg.inv(dd)).astype(int) 868array([[1, 0, 0, 0, 0], 869 [1, 1, 0, 0, 0], 870 [1, 0, 1, 0, 0], 871 [1, 0, 0, 1, 0], 872 [1, 0, 0, 0, 1]]) 873>>> dd 874array([[ 1., 0., 0., 0., 0.], 875 [-1., 1., 0., 0., 0.], 876 [-1., 0., 1., 0., 0.], 877 [-1., 0., 0., 1., 0.], 878 [-1., 0., 0., 0., 1.]]) 879>>> ddinv=np.round(np.linalg.inv(dd.T)).astype(int) 880>>> np.dot(ddinv, np.arange(5)) 881array([10, 1, 2, 3, 4]) 882>>> np.dot(dd, np.arange(5)) 883array([ 0., 1., 2., 3., 4.]) 884>>> np.dot(dd, 5+np.arange(5)) 885array([ 5., 1., 2., 3., 4.]) 886>>> ddinv2 = np.round(np.linalg.inv(dd)).astype(int) 887>>> np.dot(ddinv2, np.arange(5)) 888array([0, 1, 2, 3, 4]) 889>>> np.dot(ddinv2, 5+np.arange(5)) 890array([ 5, 11, 12, 13, 14]) 891>>> np.dot(ddinv2, [5, 0, 0 , 1, 2]) 892array([5, 5, 5, 6, 7]) 893>>> np.dot(ddinv2, dd) 894array([[ 1., 0., 0., 0., 0.], 895 [ 0., 1., 0., 0., 0.], 896 [ 0., 0., 1., 0., 0.], 897 [ 0., 0., 0., 1., 0.], 898 [ 0., 0., 0., 0., 1.]]) 899 900 901 902>>> dd3 = -np.r_[[[1,0,0]],contrast_all_one(3)] 903>>> dd2 = -np.r_[[[1,0]],contrast_all_one(2)] 904>>> np.kron(np.eye(3), dd2) 905array([[-1., 0., 0., 0., 0., 0.], 906 [-1., 1., 0., 0., 0., 0.], 907 [ 0., 0., -1., 0., 0., 0.], 908 [ 0., 0., -1., 1., 0., 0.], 909 [ 0., 0., 0., 0., -1., 0.], 910 [ 0., 0., 0., 0., -1., 1.]]) 911>>> dd2 912array([[-1., 0.], 913 [-1., 1.]]) 914>>> np.kron(np.eye(3), dd2[1:]) 915array([[-1., 1., 0., 0., 0., 0.], 916 [ 0., 0., -1., 1., 0., 0.], 917 [ 0., 0., 0., 0., -1., 1.]]) 918>>> np.kron(dd[1:], np.eye(2)) 919array([[-1., 0., 1., 0., 0., 0.], 920 [ 0., -1., 0., 1., 0., 0.], 921 [-1., 0., 0., 0., 1., 0.], 922 [ 0., -1., 0., 0., 0., 1.]]) 923 924 925 926d_ = np.r_[[[1,0,0,0,0]],contrast_all_one(5)] 927>>> d_ 928array([[ 1., 0., 0., 0., 0.], 929 [ 1., -1., 0., 0., 0.], 930 [ 1., 0., -1., 0., 0.], 931 [ 1., 0., 0., -1., 0.], 932 [ 1., 0., 0., 0., -1.]]) 933>>> np.round(np.linalg.pinv(d_)).astype(int) 934array([[ 1, 0, 0, 0, 0], 935 [ 1, -1, 0, 0, 0], 936 [ 1, 0, -1, 0, 0], 937 [ 1, 0, 0, -1, 0], 938 [ 1, 0, 0, 0, -1]]) 939>>> np.linalg.inv(d_).astype(int) 940array([[ 1, 0, 0, 0, 0], 941 [ 1, -1, 0, 0, 0], 942 [ 1, 0, -1, 0, 0], 943 [ 1, 0, 0, -1, 0], 944 [ 1, 0, 0, 0, -1]]) 945 946 947group means 948 949>>> sli = [slice(None)] + [None]*(3-2) + [slice(None)] 950>>> (np.column_stack((y, x1, x2))[...,None] * d1[sli]).sum(0)*1./d1.sum(0) 951array([[ 1.5, 5.5, 9.5], 952 [ 0. , 1. , 2. ], 953 [ 0.5, 0.5, 0.5]]) 954 955>>> [(z[:,None] * d1).sum(0)*1./d1.sum(0) for z in np.column_stack((y, x1, x2)).T] 956[array([ 1.5, 5.5, 9.5]), array([ 0., 1., 2.]), array([ 0.5, 0.5, 0.5])] 957>>> 958 959''' 960