1'''functions to work with contrasts for multiple tests
2
3contrast matrices for comparing all pairs, all levels to reference level, ...
4extension to 2-way groups in progress
5
6TwoWay: class for bringing two-way analysis together and try out
7various helper functions
8
9
10Idea for second part
11- get all transformation matrices to move in between different full rank
12  parameterizations
13- standardize to one parameterization to get all interesting effects.
14
15- multivariate normal distribution
16  - exploit or expand what we have in LikelihoodResults, cov_params, f_test,
17    t_test, example: resols_dropf_full.cov_params(C2)
18  - connect to new multiple comparison for contrast matrices, based on
19    multivariate normal or t distribution (Hothorn, Bretz, Westfall)
20
21'''
22
23
24
25from numpy.testing import assert_equal
26
27import numpy as np
28
29#next 3 functions copied from multicomp.py
30
31def contrast_allpairs(nm):
32    '''contrast or restriction matrix for all pairs of nm variables
33
34    Parameters
35    ----------
36    nm : int
37
38    Returns
39    -------
40    contr : ndarray, 2d, (nm*(nm-1)/2, nm)
41       contrast matrix for all pairwise comparisons
42
43    '''
44    contr = []
45    for i in range(nm):
46        for j in range(i+1, nm):
47            contr_row = np.zeros(nm)
48            contr_row[i] = 1
49            contr_row[j] = -1
50            contr.append(contr_row)
51    return np.array(contr)
52
53def contrast_all_one(nm):
54    '''contrast or restriction matrix for all against first comparison
55
56    Parameters
57    ----------
58    nm : int
59
60    Returns
61    -------
62    contr : ndarray, 2d, (nm-1, nm)
63       contrast matrix for all against first comparisons
64
65    '''
66    contr = np.column_stack((np.ones(nm-1), -np.eye(nm-1)))
67    return contr
68
69def contrast_diff_mean(nm):
70    '''contrast or restriction matrix for all against mean comparison
71
72    Parameters
73    ----------
74    nm : int
75
76    Returns
77    -------
78    contr : ndarray, 2d, (nm-1, nm)
79       contrast matrix for all against mean comparisons
80
81    '''
82    return np.eye(nm) - np.ones((nm,nm))/nm
83
84def signstr(x, noplus=False):
85    if x in [-1,0,1]:
86        if not noplus:
87            return '+' if np.sign(x)>=0 else '-'
88        else:
89            return '' if np.sign(x)>=0 else '-'
90    else:
91        return str(x)
92
93
94def contrast_labels(contrasts, names, reverse=False):
95    if reverse:
96        sl = slice(None, None, -1)
97    else:
98        sl = slice(None)
99    labels = [''.join(['%s%s' % (signstr(c, noplus=True),v)
100                          for c,v in zip(row, names)[sl] if c != 0])
101                             for row in contrasts]
102    return labels
103
104def contrast_product(names1, names2, intgroup1=None, intgroup2=None, pairs=False):
105    '''build contrast matrices for products of two categorical variables
106
107    this is an experimental script and should be converted to a class
108
109    Parameters
110    ----------
111    names1, names2 : lists of strings
112        contains the list of level labels for each categorical variable
113    intgroup1, intgroup2 : ndarrays     TODO: this part not tested, finished yet
114        categorical variable
115
116
117    Notes
118    -----
119    This creates a full rank matrix. It does not do all pairwise comparisons,
120    parameterization is using contrast_all_one to get differences with first
121    level.
122
123    ? does contrast_all_pairs work as a plugin to get all pairs ?
124
125    '''
126
127    n1 = len(names1)
128    n2 = len(names2)
129    names_prod = ['%s_%s' % (i,j) for i in names1 for j in names2]
130    ee1 = np.zeros((1,n1))
131    ee1[0,0] = 1
132    if not pairs:
133        dd = np.r_[ee1, -contrast_all_one(n1)]
134    else:
135        dd = np.r_[ee1, -contrast_allpairs(n1)]
136
137    contrast_prod = np.kron(dd[1:], np.eye(n2))
138    names_contrast_prod0 = contrast_labels(contrast_prod, names_prod, reverse=True)
139    names_contrast_prod = [''.join(['%s%s' % (signstr(c, noplus=True),v)
140                              for c,v in zip(row, names_prod)[::-1] if c != 0])
141                                 for row in contrast_prod]
142
143    ee2 = np.zeros((1,n2))
144    ee2[0,0] = 1
145    #dd2 = np.r_[ee2, -contrast_all_one(n2)]
146    if not pairs:
147        dd2 = np.r_[ee2, -contrast_all_one(n2)]
148    else:
149        dd2 = np.r_[ee2, -contrast_allpairs(n2)]
150
151    contrast_prod2 = np.kron(np.eye(n1), dd2[1:])
152    names_contrast_prod2 = [''.join(['%s%s' % (signstr(c, noplus=True),v)
153                              for c,v in zip(row, names_prod)[::-1] if c != 0])
154                                 for row in contrast_prod2]
155
156    if (intgroup1 is not None) and (intgroup1 is not None):
157        d1, _ = dummy_1d(intgroup1)
158        d2, _ = dummy_1d(intgroup2)
159        dummy = dummy_product(d1, d2)
160    else:
161        dummy = None
162
163    return (names_prod, contrast_prod, names_contrast_prod,
164                        contrast_prod2, names_contrast_prod2, dummy)
165
166
167
168
169
170def dummy_1d(x, varname=None):
171    '''dummy variable for id integer groups
172
173    Parameters
174    ----------
175    x : ndarray, 1d
176        categorical variable, requires integers if varname is None
177    varname : str
178        name of the variable used in labels for category levels
179
180    Returns
181    -------
182    dummy : ndarray, 2d
183        array of dummy variables, one column for each level of the
184        category (full set)
185    labels : list[str]
186        labels for the columns, i.e. levels of each category
187
188
189    Notes
190    -----
191    use tools.categorical instead for more more options
192
193    See Also
194    --------
195    statsmodels.tools.categorical
196
197    Examples
198    --------
199    >>> x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'],
200          dtype='|S1')
201    >>> dummy_1d(x, varname='gender')
202    (array([[1, 0],
203           [1, 0],
204           [0, 1],
205           [0, 1],
206           [1, 0],
207           [1, 0],
208           [0, 1],
209           [0, 1],
210           [1, 0],
211           [1, 0],
212           [0, 1],
213           [0, 1]]), ['gender_F', 'gender_M'])
214
215    '''
216    if varname is None:  #assumes integer
217        labels = ['level_%d' % i for i in range(x.max() + 1)]
218        return (x[:,None]==np.arange(x.max()+1)).astype(int), labels
219    else:
220        grouplabels = np.unique(x)
221        labels = [varname + '_%s' % str(i) for i in grouplabels]
222        return (x[:,None]==grouplabels).astype(int), labels
223
224
225def dummy_product(d1, d2, method='full'):
226    '''dummy variable from product of two dummy variables
227
228    Parameters
229    ----------
230    d1, d2 : ndarray
231        two dummy variables, assumes full set for methods 'drop-last'
232        and 'drop-first'
233    method : {'full', 'drop-last', 'drop-first'}
234        'full' returns the full product, encoding of intersection of
235        categories.
236        The drop methods provide a difference dummy encoding:
237        (constant, main effects, interaction effects). The first or last columns
238        of the dummy variable (i.e. levels) are dropped to get full rank
239        dummy matrix.
240
241    Returns
242    -------
243    dummy : ndarray
244        dummy variable for product, see method
245
246    '''
247
248    if method == 'full':
249        dd = (d1[:,:,None]*d2[:,None,:]).reshape(d1.shape[0],-1)
250    elif method == 'drop-last':  #same as SAS transreg
251        d12rl = dummy_product(d1[:,:-1], d2[:,:-1])
252        dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,:-1],d12rl))
253        #Note: dtype int should preserve dtype of d1 and d2
254    elif method == 'drop-first':
255        d12r = dummy_product(d1[:,1:], d2[:,1:])
256        dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,1:],d12r))
257    else:
258        raise ValueError('method not recognized')
259
260    return dd
261
262def dummy_limits(d):
263    '''start and endpoints of groups in a sorted dummy variable array
264
265    helper function for nested categories
266
267    Examples
268    --------
269    >>> d1 = np.array([[1, 0, 0],
270                       [1, 0, 0],
271                       [1, 0, 0],
272                       [1, 0, 0],
273                       [0, 1, 0],
274                       [0, 1, 0],
275                       [0, 1, 0],
276                       [0, 1, 0],
277                       [0, 0, 1],
278                       [0, 0, 1],
279                       [0, 0, 1],
280                       [0, 0, 1]])
281    >>> dummy_limits(d1)
282    (array([0, 4, 8]), array([ 4,  8, 12]))
283
284    get group slices from an array
285
286    >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))]
287    [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8,  9, 10, 11])]
288    >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))]
289    [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8,  9, 10, 11])]
290    '''
291    nobs, nvars = d.shape
292    start1, col1 = np.nonzero(np.diff(d,axis=0)==1)
293    end1, col1_ = np.nonzero(np.diff(d,axis=0)==-1)
294    cc = np.arange(nvars)
295    #print(cc, np.r_[[0], col1], np.r_[col1_, [nvars-1]]
296    if ((not (np.r_[[0], col1] == cc).all())
297            or (not (np.r_[col1_, [nvars-1]] == cc).all())):
298        raise ValueError('dummy variable is not sorted')
299
300    start = np.r_[[0], start1+1]
301    end = np.r_[end1+1, [nobs]]
302    return start, end
303
304
305
306def dummy_nested(d1, d2, method='full'):
307    '''unfinished and incomplete mainly copy past dummy_product
308    dummy variable from product of two dummy variables
309
310    Parameters
311    ----------
312    d1, d2 : ndarray
313        two dummy variables, d2 is assumed to be nested in d1
314        Assumes full set for methods 'drop-last' and 'drop-first'.
315    method : {'full', 'drop-last', 'drop-first'}
316        'full' returns the full product, which in this case is d2.
317        The drop methods provide an effects encoding:
318        (constant, main effects, subgroup effects). The first or last columns
319        of the dummy variable (i.e. levels) are dropped to get full rank
320        encoding.
321
322    Returns
323    -------
324    dummy : ndarray
325        dummy variable for product, see method
326
327    '''
328    if method == 'full':
329        return d2
330
331    start1, end1 = dummy_limits(d1)
332    start2, end2 = dummy_limits(d2)
333    first = np.in1d(start2, start1)
334    last = np.in1d(end2, end1)
335    equal = (first == last)
336    col_dropf = ~first*~equal
337    col_dropl = ~last*~equal
338
339
340    if method == 'drop-last':
341        d12rl = dummy_product(d1[:,:-1], d2[:,:-1])
342        dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,col_dropl]))
343        #Note: dtype int should preserve dtype of d1 and d2
344    elif method == 'drop-first':
345        d12r = dummy_product(d1[:,1:], d2[:,1:])
346        dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,col_dropf]))
347    else:
348        raise ValueError('method not recognized')
349
350    return dd, col_dropf, col_dropl
351
352
353class DummyTransform(object):
354    '''Conversion between full rank dummy encodings
355
356
357    y = X b + u
358    b = C a
359    a = C^{-1} b
360
361    y = X C a + u
362
363    define Z = X C, then
364
365    y = Z a + u
366
367    contrasts:
368
369    R_b b = r
370
371    R_a a = R_b C a = r
372
373    where R_a = R_b C
374
375    Here C is the transform matrix, with dot_left and dot_right as the main
376    methods, and the same for the inverse transform matrix, C^{-1}
377
378    Note:
379     - The class was mainly written to keep left and right straight.
380     - No checking is done.
381     - not sure yet if method names make sense
382
383
384    '''
385
386    def __init__(self, d1, d2):
387        '''C such that d1 C = d2, with d1 = X, d2 = Z
388
389        should be (x, z) in arguments ?
390        '''
391        self.transf_matrix = np.linalg.lstsq(d1, d2, rcond=-1)[0]
392        self.invtransf_matrix = np.linalg.lstsq(d2, d1, rcond=-1)[0]
393
394    def dot_left(self, a):
395        ''' b = C a
396        '''
397        return np.dot(self.transf_matrix, a)
398
399    def dot_right(self, x):
400        ''' z = x C
401        '''
402        return np.dot(x, self.transf_matrix)
403
404    def inv_dot_left(self, b):
405        ''' a = C^{-1} b
406        '''
407        return np.dot(self.invtransf_matrix, b)
408
409    def inv_dot_right(self, z):
410        ''' x = z C^{-1}
411        '''
412        return np.dot(z, self.invtransf_matrix)
413
414
415
416
417
418def groupmean_d(x, d):
419    '''groupmeans using dummy variables
420
421    Parameters
422    ----------
423    x : array_like, ndim
424        data array, tested for 1,2 and 3 dimensions
425    d : ndarray, 1d
426        dummy variable, needs to have the same length
427        as x in axis 0.
428
429    Returns
430    -------
431    groupmeans : ndarray, ndim-1
432        means for each group along axis 0, the levels
433        of the groups are the last axis
434
435    Notes
436    -----
437    This will be memory intensive if there are many levels
438    in the categorical variable, i.e. many columns in the
439    dummy variable. In this case it is recommended to use
440    a more efficient version.
441
442    '''
443    x = np.asarray(x)
444##    if x.ndim == 1:
445##        nvars = 1
446##    else:
447    nvars = x.ndim + 1
448    sli = [slice(None)] + [None]*(nvars-2) + [slice(None)]
449    return (x[...,None] * d[sli]).sum(0)*1./d.sum(0)
450
451
452
453class TwoWay(object):
454    '''a wrapper class for two way anova type of analysis with OLS
455
456
457    currently mainly to bring things together
458
459    Notes
460    -----
461    unclear: adding multiple test might assume block design or orthogonality
462
463    This estimates the full dummy version with OLS.
464    The drop first dummy representation can be recovered through the
465    transform method.
466
467    TODO: add more methods, tests, pairwise, multiple, marginal effects
468    try out what can be added for userfriendly access.
469
470    missing: ANOVA table
471
472    '''
473    def __init__(self, endog, factor1, factor2, varnames=None):
474        self.nobs = factor1.shape[0]
475        if varnames is None:
476            vname1 = 'a'
477            vname2 = 'b'
478        else:
479            vname1, vname1 = varnames
480
481        self.d1, self.d1_labels = d1, d1_labels = dummy_1d(factor1, vname1)
482        self.d2, self.d2_labels = d2, d2_labels = dummy_1d(factor2, vname2)
483        self.nlevel1 = nlevel1 = d1.shape[1]
484        self.nlevel2 = nlevel2 = d2.shape[1]
485
486
487        #get product dummies
488        res = contrast_product(d1_labels, d2_labels)
489        prodlab, C1, C1lab, C2, C2lab, _ = res
490        self.prod_label, self.C1, self.C1_label, self.C2, self.C2_label, _ = res
491        dp_full = dummy_product(d1, d2, method='full')
492        dp_dropf = dummy_product(d1, d2, method='drop-first')
493        self.transform = DummyTransform(dp_full, dp_dropf)
494
495        #estimate the model
496        self.nvars = dp_full.shape[1]
497        self.exog = dp_full
498        self.resols = sm.OLS(endog, dp_full).fit()
499        self.params = self.resols.params
500
501        #get transformed parameters, (constant, main, interaction effect)
502        self.params_dropf = self.transform.inv_dot_left(self.params)
503        self.start_interaction = 1 + (nlevel1 - 1) + (nlevel2 - 1)
504        self.n_interaction = self.nvars - self.start_interaction
505
506    #convert to cached property
507    def r_nointer(self):
508        '''contrast/restriction matrix for no interaction
509        '''
510        nia = self.n_interaction
511        R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia)))
512        #inter_direct = resols_full_dropf.tval[-nia:]
513        R_nointer_transf = self.transform.inv_dot_right(R_nointer)
514        self.R_nointer_transf = R_nointer_transf
515        return R_nointer_transf
516
517    def ttest_interaction(self):
518        '''ttests for no-interaction terms are zero
519        '''
520        #use self.r_nointer instead
521        nia = self.n_interaction
522        R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia)))
523        #inter_direct = resols_full_dropf.tval[-nia:]
524        R_nointer_transf = self.transform.inv_dot_right(R_nointer)
525        self.R_nointer_transf = R_nointer_transf
526        t_res = self.resols.t_test(R_nointer_transf)
527        return t_res
528
529    def ftest_interaction(self):
530        '''ttests for no-interaction terms are zero
531        '''
532        R_nointer_transf = self.r_nointer()
533        return self.resols.f_test(R_nointer_transf)
534
535    def ttest_conditional_effect(self, factorind):
536        if factorind == 1:
537            return self.resols.t_test(self.C1), self.C1_label
538        else:
539            return self.resols.t_test(self.C2), self.C2_label
540
541    def summary_coeff(self):
542        from statsmodels.iolib import SimpleTable
543        params_arr = self.params.reshape(self.nlevel1, self.nlevel2)
544        stubs = self.d1_labels
545        headers = self.d2_labels
546        title = 'Estimated Coefficients by factors'
547        table_fmt = dict(
548            data_fmts = ["%#10.4g"]*self.nlevel2)
549        return SimpleTable(params_arr, headers, stubs, title=title,
550                           txt_fmt=table_fmt)
551
552
553# --------------- tests
554# TODO: several tests still missing, several are in the example with print
555
556class TestContrastTools(object):
557
558    def __init__(self):
559        self.v1name = ['a0', 'a1', 'a2']
560        self.v2name = ['b0', 'b1']
561        self.d1 = np.array([[1, 0, 0],
562                            [1, 0, 0],
563                            [1, 0, 0],
564                            [1, 0, 0],
565                            [0, 1, 0],
566                            [0, 1, 0],
567                            [0, 1, 0],
568                            [0, 1, 0],
569                            [0, 0, 1],
570                            [0, 0, 1],
571                            [0, 0, 1],
572                            [0, 0, 1]])
573
574    def test_dummy_1d(self):
575        x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'],
576              dtype='|S1')
577        d, labels = (np.array([[1, 0],
578                               [1, 0],
579                               [0, 1],
580                               [0, 1],
581                               [1, 0],
582                               [1, 0],
583                               [0, 1],
584                               [0, 1],
585                               [1, 0],
586                               [1, 0],
587                               [0, 1],
588                               [0, 1]]), ['gender_F', 'gender_M'])
589        res_d, res_labels = dummy_1d(x, varname='gender')
590        assert_equal(res_d, d)
591        assert_equal(res_labels, labels)
592
593    def test_contrast_product(self):
594        res_cp = contrast_product(self.v1name, self.v2name)
595        res_t = [0]*6
596        res_t[0] = ['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1']
597        res_t[1] = np.array([[-1.,  0.,  1.,  0.,  0.,  0.],
598                           [ 0., -1.,  0.,  1.,  0.,  0.],
599                           [-1.,  0.,  0.,  0.,  1.,  0.],
600                           [ 0., -1.,  0.,  0.,  0.,  1.]])
601        res_t[2] = ['a1_b0-a0_b0', 'a1_b1-a0_b1', 'a2_b0-a0_b0', 'a2_b1-a0_b1']
602        res_t[3] =  np.array([[-1.,  1.,  0.,  0.,  0.,  0.],
603                           [ 0.,  0., -1.,  1.,  0.,  0.],
604                           [ 0.,  0.,  0.,  0., -1.,  1.]])
605        res_t[4] = ['a0_b1-a0_b0', 'a1_b1-a1_b0', 'a2_b1-a2_b0']
606        for ii in range(5):
607            np.testing.assert_equal(res_cp[ii], res_t[ii], err_msg=str(ii))
608
609    def test_dummy_limits(self):
610        b,e = dummy_limits(self.d1)
611        assert_equal(b, np.array([0, 4, 8]))
612        assert_equal(e, np.array([ 4,  8, 12]))
613
614
615
616
617if __name__ == '__main__':
618    tt = TestContrastTools()
619    tt.test_contrast_product()
620    tt.test_dummy_1d()
621    tt.test_dummy_limits()
622
623    import statsmodels.api as sm
624
625    examples = ['small', 'large', None][1]
626
627    v1name = ['a0', 'a1', 'a2']
628    v2name = ['b0', 'b1']
629    res_cp = contrast_product(v1name, v2name)
630    print(res_cp)
631
632    y = np.arange(12)
633    x1 = np.arange(12)//4
634    x2 = np.arange(12)//2 % 2
635
636    if 'small' in examples:
637        d1, d1_labels = dummy_1d(x1)
638        d2, d2_labels = dummy_1d(x2)
639
640
641    if 'large' in examples:
642        x1 = np.repeat(x1, 5, axis=0)
643        x2 = np.repeat(x2, 5, axis=0)
644
645    nobs = x1.shape[0]
646    d1, d1_labels = dummy_1d(x1)
647    d2, d2_labels = dummy_1d(x2)
648
649    dd_full = dummy_product(d1, d2, method='full')
650    dd_dropl = dummy_product(d1, d2, method='drop-last')
651    dd_dropf = dummy_product(d1, d2, method='drop-first')
652
653    #Note: full parameterization of dummies is orthogonal
654    #np.eye(6)*10 in "large" example
655    print((np.dot(dd_full.T, dd_full) == np.diag(dd_full.sum(0))).all())
656
657    #check that transforms work
658    #generate 3 data sets with the 3 different parameterizations
659
660    effect_size = [1., 0.01][1]
661    noise_scale = [0.001, 0.1][0]
662    noise = noise_scale * np.random.randn(nobs)
663    beta = effect_size * np.arange(1,7)
664    ydata_full = (dd_full * beta).sum(1) + noise
665    ydata_dropl = (dd_dropl * beta).sum(1) + noise
666    ydata_dropf = (dd_dropf * beta).sum(1) + noise
667
668    resols_full_full = sm.OLS(ydata_full, dd_full).fit()
669    resols_full_dropf = sm.OLS(ydata_full, dd_dropf).fit()
670    params_f_f = resols_full_full.params
671    params_f_df = resols_full_dropf.params
672
673    resols_dropf_full = sm.OLS(ydata_dropf, dd_full).fit()
674    resols_dropf_dropf = sm.OLS(ydata_dropf, dd_dropf).fit()
675    params_df_f = resols_dropf_full.params
676    params_df_df = resols_dropf_dropf.params
677
678
679    tr_of = np.linalg.lstsq(dd_dropf, dd_full, rcond=-1)[0]
680    tr_fo = np.linalg.lstsq(dd_full, dd_dropf, rcond=-1)[0]
681    print(np.dot(tr_fo, params_df_df) - params_df_f)
682    print(np.dot(tr_of, params_f_f) - params_f_df)
683
684    transf_f_df = DummyTransform(dd_full, dd_dropf)
685    print(np.max(np.abs((dd_full - transf_f_df.inv_dot_right(dd_dropf)))))
686    print(np.max(np.abs((dd_dropf - transf_f_df.dot_right(dd_full)))))
687    print(np.max(np.abs((params_df_df
688                         - transf_f_df.inv_dot_left(params_df_f)))))
689    np.max(np.abs((params_f_df
690                         - transf_f_df.inv_dot_left(params_f_f))))
691
692    prodlab, C1, C1lab, C2, C2lab,_ = contrast_product(v1name, v2name)
693
694    print('\ntvalues for no effect of factor 1')
695    print('each test is conditional on a level of factor 2')
696    print(C1lab)
697    print(resols_dropf_full.t_test(C1).tvalue)
698
699    print('\ntvalues for no effect of factor 2')
700    print('each test is conditional on a level of factor 1')
701    print(C2lab)
702    print(resols_dropf_full.t_test(C2).tvalue)
703
704    #covariance matrix of restrictions C2, note: orthogonal
705    resols_dropf_full.cov_params(C2)
706
707    #testing for no interaction effect
708    R_noint = np.hstack((np.zeros((2,4)), np.eye(2)))
709    inter_direct = resols_full_dropf.tvalues[-2:]
710    inter_transf = resols_full_full.t_test(transf_f_df.inv_dot_right(R_noint)).tvalue
711    print(np.max(np.abs((inter_direct - inter_transf))))
712
713    #now with class version
714    tw = TwoWay(ydata_dropf, x1, x2)
715    print(tw.ttest_interaction().tvalue)
716    print(tw.ttest_interaction().pvalue)
717    print(tw.ftest_interaction().fvalue)
718    print(tw.ftest_interaction().pvalue)
719    print(tw.ttest_conditional_effect(1)[0].tvalue)
720    print(tw.ttest_conditional_effect(2)[0].tvalue)
721    print(tw.summary_coeff())
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739''' documentation for early examples while developing - some have changed already
740>>> y = np.arange(12)
741>>> y
742array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
743>>> x1 = np.arange(12)//4
744>>> x1
745array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
746>>> x2 = np.arange(12)//2%2
747>>> x2
748array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1])
749
750>>> d1 = dummy_1d(x1)
751>>> d1
752array([[1, 0, 0],
753       [1, 0, 0],
754       [1, 0, 0],
755       [1, 0, 0],
756       [0, 1, 0],
757       [0, 1, 0],
758       [0, 1, 0],
759       [0, 1, 0],
760       [0, 0, 1],
761       [0, 0, 1],
762       [0, 0, 1],
763       [0, 0, 1]])
764
765>>> d2 = dummy_1d(x2)
766>>> d2
767array([[1, 0],
768       [1, 0],
769       [0, 1],
770       [0, 1],
771       [1, 0],
772       [1, 0],
773       [0, 1],
774       [0, 1],
775       [1, 0],
776       [1, 0],
777       [0, 1],
778       [0, 1]])
779
780>>> d12 = dummy_product(d1, d2)
781>>> d12
782array([[1, 0, 0, 0, 0, 0],
783       [1, 0, 0, 0, 0, 0],
784       [0, 1, 0, 0, 0, 0],
785       [0, 1, 0, 0, 0, 0],
786       [0, 0, 1, 0, 0, 0],
787       [0, 0, 1, 0, 0, 0],
788       [0, 0, 0, 1, 0, 0],
789       [0, 0, 0, 1, 0, 0],
790       [0, 0, 0, 0, 1, 0],
791       [0, 0, 0, 0, 1, 0],
792       [0, 0, 0, 0, 0, 1],
793       [0, 0, 0, 0, 0, 1]])
794
795
796>>> d12rl = dummy_product(d1[:,:-1], d2[:,:-1])
797>>> np.column_stack((np.ones(d1.shape[0]), d1[:,:-1], d2[:,:-1],d12rl))
798array([[ 1.,  1.,  0.,  1.,  1.,  0.],
799       [ 1.,  1.,  0.,  1.,  1.,  0.],
800       [ 1.,  1.,  0.,  0.,  0.,  0.],
801       [ 1.,  1.,  0.,  0.,  0.,  0.],
802       [ 1.,  0.,  1.,  1.,  0.,  1.],
803       [ 1.,  0.,  1.,  1.,  0.,  1.],
804       [ 1.,  0.,  1.,  0.,  0.,  0.],
805       [ 1.,  0.,  1.,  0.,  0.,  0.],
806       [ 1.,  0.,  0.,  1.,  0.,  0.],
807       [ 1.,  0.,  0.,  1.,  0.,  0.],
808       [ 1.,  0.,  0.,  0.,  0.,  0.],
809       [ 1.,  0.,  0.,  0.,  0.,  0.]])
810'''
811
812
813
814
815#nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']]
816#>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0])
817#     for row in np.kron(dd[1:], np.eye(2))]
818
819
820
821'''
822>>> nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']]
823>>> nprod
824['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1']
825>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0]) for row in np.kron(dd[1:], np.eye(2))]
826['-a0b0+a1b0', '-a0b1+a1b1', '-a0b0+a2b0', '-a0b1+a2b1']
827>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod)[::-1] if c != 0]) for row in np.kron(dd[1:], np.eye(2))]
828['+a1_b0-a0_b0', '+a1_b1-a0_b1', '+a2_b0-a0_b0', '+a2_b1-a0_b1']
829
830>>> np.r_[[[1,0,0,0,0]],contrast_all_one(5)]
831array([[ 1.,  0.,  0.,  0.,  0.],
832       [ 1., -1.,  0.,  0.,  0.],
833       [ 1.,  0., -1.,  0.,  0.],
834       [ 1.,  0.,  0., -1.,  0.],
835       [ 1.,  0.,  0.,  0., -1.]])
836
837>>> idxprod = [(i,j) for i in range(3) for j in range(2)]
838>>> idxprod
839[(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)]
840>>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,0]
841array([[0, 1, 2],
842       [0, 1, 2]])
843>>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,1]
844array([[0, 0, 0],
845       [1, 1, 1]])
846>>> dd3_ = np.r_[[[0,0,0]],contrast_all_one(3)]
847
848
849
850pairwise contrasts and reparameterization
851
852dd = np.r_[[[1,0,0,0,0]],-contrast_all_one(5)]
853>>> dd
854array([[ 1.,  0.,  0.,  0.,  0.],
855       [-1.,  1.,  0.,  0.,  0.],
856       [-1.,  0.,  1.,  0.,  0.],
857       [-1.,  0.,  0.,  1.,  0.],
858       [-1.,  0.,  0.,  0.,  1.]])
859>>> np.dot(dd.T, np.arange(5))
860array([-10.,   1.,   2.,   3.,   4.])
861>>> np.round(np.linalg.inv(dd.T)).astype(int)
862array([[1, 1, 1, 1, 1],
863       [0, 1, 0, 0, 0],
864       [0, 0, 1, 0, 0],
865       [0, 0, 0, 1, 0],
866       [0, 0, 0, 0, 1]])
867>>> np.round(np.linalg.inv(dd)).astype(int)
868array([[1, 0, 0, 0, 0],
869       [1, 1, 0, 0, 0],
870       [1, 0, 1, 0, 0],
871       [1, 0, 0, 1, 0],
872       [1, 0, 0, 0, 1]])
873>>> dd
874array([[ 1.,  0.,  0.,  0.,  0.],
875       [-1.,  1.,  0.,  0.,  0.],
876       [-1.,  0.,  1.,  0.,  0.],
877       [-1.,  0.,  0.,  1.,  0.],
878       [-1.,  0.,  0.,  0.,  1.]])
879>>> ddinv=np.round(np.linalg.inv(dd.T)).astype(int)
880>>> np.dot(ddinv, np.arange(5))
881array([10,  1,  2,  3,  4])
882>>> np.dot(dd, np.arange(5))
883array([ 0.,  1.,  2.,  3.,  4.])
884>>> np.dot(dd, 5+np.arange(5))
885array([ 5.,  1.,  2.,  3.,  4.])
886>>> ddinv2 = np.round(np.linalg.inv(dd)).astype(int)
887>>> np.dot(ddinv2, np.arange(5))
888array([0, 1, 2, 3, 4])
889>>> np.dot(ddinv2, 5+np.arange(5))
890array([ 5, 11, 12, 13, 14])
891>>> np.dot(ddinv2, [5, 0, 0 , 1, 2])
892array([5, 5, 5, 6, 7])
893>>> np.dot(ddinv2, dd)
894array([[ 1.,  0.,  0.,  0.,  0.],
895       [ 0.,  1.,  0.,  0.,  0.],
896       [ 0.,  0.,  1.,  0.,  0.],
897       [ 0.,  0.,  0.,  1.,  0.],
898       [ 0.,  0.,  0.,  0.,  1.]])
899
900
901
902>>> dd3 = -np.r_[[[1,0,0]],contrast_all_one(3)]
903>>> dd2 = -np.r_[[[1,0]],contrast_all_one(2)]
904>>> np.kron(np.eye(3), dd2)
905array([[-1.,  0.,  0.,  0.,  0.,  0.],
906       [-1.,  1.,  0.,  0.,  0.,  0.],
907       [ 0.,  0., -1.,  0.,  0.,  0.],
908       [ 0.,  0., -1.,  1.,  0.,  0.],
909       [ 0.,  0.,  0.,  0., -1.,  0.],
910       [ 0.,  0.,  0.,  0., -1.,  1.]])
911>>> dd2
912array([[-1.,  0.],
913       [-1.,  1.]])
914>>> np.kron(np.eye(3), dd2[1:])
915array([[-1.,  1.,  0.,  0.,  0.,  0.],
916       [ 0.,  0., -1.,  1.,  0.,  0.],
917       [ 0.,  0.,  0.,  0., -1.,  1.]])
918>>> np.kron(dd[1:], np.eye(2))
919array([[-1.,  0.,  1.,  0.,  0.,  0.],
920       [ 0., -1.,  0.,  1.,  0.,  0.],
921       [-1.,  0.,  0.,  0.,  1.,  0.],
922       [ 0., -1.,  0.,  0.,  0.,  1.]])
923
924
925
926d_ = np.r_[[[1,0,0,0,0]],contrast_all_one(5)]
927>>> d_
928array([[ 1.,  0.,  0.,  0.,  0.],
929       [ 1., -1.,  0.,  0.,  0.],
930       [ 1.,  0., -1.,  0.,  0.],
931       [ 1.,  0.,  0., -1.,  0.],
932       [ 1.,  0.,  0.,  0., -1.]])
933>>> np.round(np.linalg.pinv(d_)).astype(int)
934array([[ 1,  0,  0,  0,  0],
935       [ 1, -1,  0,  0,  0],
936       [ 1,  0, -1,  0,  0],
937       [ 1,  0,  0, -1,  0],
938       [ 1,  0,  0,  0, -1]])
939>>> np.linalg.inv(d_).astype(int)
940array([[ 1,  0,  0,  0,  0],
941       [ 1, -1,  0,  0,  0],
942       [ 1,  0, -1,  0,  0],
943       [ 1,  0,  0, -1,  0],
944       [ 1,  0,  0,  0, -1]])
945
946
947group means
948
949>>> sli = [slice(None)] + [None]*(3-2) + [slice(None)]
950>>> (np.column_stack((y, x1, x2))[...,None] * d1[sli]).sum(0)*1./d1.sum(0)
951array([[ 1.5,  5.5,  9.5],
952       [ 0. ,  1. ,  2. ],
953       [ 0.5,  0.5,  0.5]])
954
955>>> [(z[:,None] * d1).sum(0)*1./d1.sum(0) for z in np.column_stack((y, x1, x2)).T]
956[array([ 1.5,  5.5,  9.5]), array([ 0.,  1.,  2.]), array([ 0.5,  0.5,  0.5])]
957>>>
958
959'''
960