1'''
2Spatial Two Stages Least Squares with Regimes
3'''
4
5__author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu, David C. Folch david.folch@asu.edu"
6
7import numpy as np
8from . import regimes as REGI
9from . import user_output as USER
10from . import summary_output as SUMMARY
11import multiprocessing as mp
12from .twosls_regimes import TSLS_Regimes, _optimal_weight
13from .twosls import BaseTSLS
14from .utils import set_endog, set_endog_sparse, sp_att, set_warn, sphstack, spdot
15from .robust import hac_multi
16
17
18class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame):
19
20    """
21    Spatial two stage least squares (S2SLS) with regimes;
22    :cite:`Anselin1988`
23
24    Parameters
25    ----------
26    y            : array
27                   nx1 array for dependent variable
28    x            : array
29                   Two dimensional array with n rows and one column for each
30                   independent (exogenous) variable, excluding the constant
31    regimes      : list
32                   List of n values with the mapping of each
33                   observation to a regime. Assumed to be aligned with 'x'.
34    yend         : array
35                   Two dimensional array with n rows and one column for each
36                   endogenous variable
37    q            : array
38                   Two dimensional array with n rows and one column for each
39                   external exogenous variable to use as instruments (note:
40                   this should not contain any variables from x); cannot be
41                   used in combination with h
42    constant_regi: string
43                   Switcher controlling the constant term setup. It may take
44                   the following values:
45
46                   * 'one': a vector of ones is appended to x and held constant across regimes.
47
48                   * 'many': a vector of ones is appended to x and considered different per regime (default).
49    cols2regi    : list, 'all'
50                   Argument indicating whether each
51                   column of x should be considered as different per regime
52                   or held constant across regimes (False).
53                   If a list, k booleans indicating for each variable the
54                   option (True if one per regime, False to be held constant).
55                   If 'all' (default), all the variables vary by regime.
56    w            : pysal W object
57                   Spatial weights object
58    w_lags       : integer
59                   Orders of W to include as instruments for the spatially
60                   lagged dependent variable. For example, w_lags=1, then
61                   instruments are WX; if w_lags=2, then WX, WWX; and so on.
62    lag_q        : boolean
63                   If True, then include spatial lags of the additional
64                   instruments (q).
65    regime_lag_sep: boolean
66                    If True (default), the spatial parameter for spatial lag is also
67                    computed according to different regimes. If False,
68                    the spatial parameter is fixed accross regimes.
69                    Option valid only when regime_err_sep=True
70    regime_err_sep: boolean
71                    If True, a separate regression is run for each regime.
72    robust       : string
73                   If 'white', then a White consistent estimator of the
74                   variance-covariance matrix is given.
75                   If 'hac', then a HAC consistent estimator of the
76                   variance-covariance matrix is given.
77                   If 'ogmm', then Optimal GMM is used to estimate
78                   betas and the variance-covariance matrix.
79                   Default set to None.
80    gwk          : pysal W object
81                   Kernel spatial weights needed for HAC estimation. Note:
82                   matrix must have ones along the main diagonal.
83    sig2n_k      : boolean
84                   If True, then use n-k to estimate sigma^2. If False, use n.
85    spat_diag    : boolean
86                   If True, then compute Anselin-Kelejian test
87    vm           : boolean
88                   If True, include variance-covariance matrix in summary
89                   results
90    cores        : boolean
91                   Specifies if multiprocessing is to be used
92                   Default: no multiprocessing, cores = False
93                   Note: Multiprocessing may not work on all platforms.
94    name_y       : string
95                   Name of dependent variable for use in output
96    name_x       : list of strings
97                   Names of independent variables for use in output
98    name_yend    : list of strings
99                   Names of endogenous variables for use in output
100    name_q       : list of strings
101                   Names of instruments for use in output
102    name_w       : string
103                   Name of weights matrix for use in output
104    name_gwk     : string
105                   Name of kernel weights matrix for use in output
106    name_ds      : string
107                   Name of dataset for use in output
108    name_regimes : string
109                   Name of regimes variable for use in output
110
111    Attributes
112    ----------
113    summary      : string
114                   Summary of regression results and diagnostics (note: use in
115                   conjunction with the print command)
116    betas        : array
117                   kx1 array of estimated coefficients
118    u            : array
119                   nx1 array of residuals
120    e_pred       : array
121                   nx1 array of residuals (using reduced form)
122    predy        : array
123                   nx1 array of predicted y values
124    predy_e      : array
125                   nx1 array of predicted y values (using reduced form)
126    n            : integer
127                   Number of observations
128    k            : integer
129                   Number of variables for which coefficients are estimated
130                   (including the constant)
131                   Only available in dictionary 'multi' when multiple regressions
132                   (see 'multi' below for details)
133    kstar        : integer
134                   Number of endogenous variables.
135                   Only available in dictionary 'multi' when multiple regressions
136                   (see 'multi' below for details)
137    y            : array
138                   nx1 array for dependent variable
139    x            : array
140                   Two dimensional array with n rows and one column for each
141                   independent (exogenous) variable, including the constant
142                   Only available in dictionary 'multi' when multiple regressions
143                   (see 'multi' below for details)
144    yend         : array
145                   Two dimensional array with n rows and one column for each
146                   endogenous variable
147                   Only available in dictionary 'multi' when multiple regressions
148                   (see 'multi' below for details)
149    q            : array
150                   Two dimensional array with n rows and one column for each
151                   external exogenous variable used as instruments
152                   Only available in dictionary 'multi' when multiple regressions
153                   (see 'multi' below for details)
154    z            : array
155                   nxk array of variables (combination of x and yend)
156                   Only available in dictionary 'multi' when multiple regressions
157                   (see 'multi' below for details)
158    h            : array
159                   nxl array of instruments (combination of x and q)
160                   Only available in dictionary 'multi' when multiple regressions
161                   (see 'multi' below for details)
162    robust       : string
163                   Adjustment for robust standard errors
164                   Only available in dictionary 'multi' when multiple regressions
165                   (see 'multi' below for details)
166    mean_y       : float
167                   Mean of dependent variable
168    std_y        : float
169                   Standard deviation of dependent variable
170    vm           : array
171                   Variance covariance matrix (kxk)
172    pr2          : float
173                   Pseudo R squared (squared correlation between y and ypred)
174                   Only available in dictionary 'multi' when multiple regressions
175                   (see 'multi' below for details)
176    pr2_e        : float
177                   Pseudo R squared (squared correlation between y and ypred_e
178                   (using reduced form))
179                   Only available in dictionary 'multi' when multiple regressions
180                   (see 'multi' below for details)
181    utu          : float
182                   Sum of squared residuals
183    sig2         : float
184                   Sigma squared used in computations
185                   Only available in dictionary 'multi' when multiple regressions
186                   (see 'multi' below for details)
187    std_err      : array
188                   1xk array of standard errors of the betas
189                   Only available in dictionary 'multi' when multiple regressions
190                   (see 'multi' below for details)
191    z_stat       : list of tuples
192                   z statistic; each tuple contains the pair (statistic,
193                   p-value), where each is a float
194                   Only available in dictionary 'multi' when multiple regressions
195                   (see 'multi' below for details)
196    ak_test      : tuple
197                   Anselin-Kelejian test; tuple contains the pair (statistic,
198                   p-value)
199                   Only available in dictionary 'multi' when multiple regressions
200                   (see 'multi' below for details)
201    name_y       : string
202                   Name of dependent variable for use in output
203    name_x       : list of strings
204                   Names of independent variables for use in output
205    name_yend    : list of strings
206                   Names of endogenous variables for use in output
207    name_z       : list of strings
208                   Names of exogenous and endogenous variables for use in
209                   output
210    name_q       : list of strings
211                   Names of external instruments
212    name_h       : list of strings
213                   Names of all instruments used in ouput
214    name_w       : string
215                   Name of weights matrix for use in output
216    name_gwk     : string
217                   Name of kernel weights matrix for use in output
218    name_ds      : string
219                   Name of dataset for use in output
220    name_regimes : string
221                   Name of regimes variable for use in output
222    title        : string
223                   Name of the regression method used
224                   Only available in dictionary 'multi' when multiple regressions
225                   (see 'multi' below for details)
226    sig2n        : float
227                   Sigma squared (computed with n in the denominator)
228    sig2n_k      : float
229                   Sigma squared (computed with n-k in the denominator)
230    hth          : float
231                   :math:`H'H`.
232                   Only available in dictionary 'multi' when multiple regressions
233                   (see 'multi' below for details)
234    hthi         : float
235                   :math:`(H'H)^{-1}`.
236                   Only available in dictionary 'multi' when multiple regressions
237                   (see 'multi' below for details)
238    varb         : array
239                   :math:`(Z'H (H'H)^{-1} H'Z)^{-1}`.
240                   Only available in dictionary 'multi' when multiple regressions
241                   (see 'multi' below for details)
242    zthhthi      : array
243                   :math:`Z'H(H'H)^{-1}`.
244                   Only available in dictionary 'multi' when multiple regressions
245                   (see 'multi' below for details)
246    pfora1a2     : array
247                   n(zthhthi)'varb
248                   Only available in dictionary 'multi' when multiple regressions
249                   (see 'multi' below for details)
250    regimes      : list
251                   List of n values with the mapping of each
252                   observation to a regime. Assumed to be aligned with 'x'.
253    constant_regi: string
254                   Ignored if regimes=False. Constant option for regimes.
255                   Switcher controlling the constant term setup. It may take
256                   the following values:
257
258                   * 'one': a vector of ones is appended to x and held constant across regimes.
259
260                   * 'many': a vector of ones is appended to x and considered different per regime.
261    cols2regi    : list, 'all'
262                   Ignored if regimes=False. Argument indicating whether each
263                   column of x should be considered as different per regime
264                   or held constant across regimes (False).
265                   If a list, k booleans indicating for each variable the
266                   option (True if one per regime, False to be held constant).
267                   If 'all', all the variables vary by regime.
268    regime_lag_sep: boolean
269                    If True, the spatial parameter for spatial lag is also
270                    computed according to different regimes. If False (default),
271                    the spatial parameter is fixed accross regimes.
272    regime_err_sep: boolean
273                    If True, a separate regression is run for each regime.
274    kr           : int
275                   Number of variables/columns to be "regimized" or subject
276                   to change by regime. These will result in one parameter
277                   estimate by regime for each variable (i.e. nr parameters per
278                   variable)
279    kf           : int
280                   Number of variables/columns to be considered fixed or
281                   global across regimes and hence only obtain one parameter
282                   estimate
283    nr           : int
284                   Number of different regimes in the 'regimes' list
285    multi        : dictionary
286                   Only available when multiple regressions are estimated,
287                   i.e. when regime_err_sep=True and no variable is fixed
288                   across regimes.
289                   Contains all attributes of each individual regression
290
291    Examples
292    --------
293
294    We first need to import the needed modules, namely numpy to convert the
295    data we read into arrays that ``spreg`` understands and ``pysal`` to
296    perform all the analysis.
297
298    >>> import numpy as np
299    >>> import libpysal
300    >>> from libpysal import examples
301
302    Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open().
303    This is the DBF associated with the NAT shapefile.  Note that
304    libpysal.io.open() also reads data in CSV format; since the actual class
305    requires data to be passed in as numpy arrays, the user can read their
306    data in using any method.
307
308    >>> db = libpysal.io.open(examples.get_path("NAT.dbf"),'r')
309
310    Extract the HR90 column (homicide rates in 1990) from the DBF file and make it the
311    dependent variable for the regression. Note that PySAL requires this to be
312    an numpy array of shape (n, 1) as opposed to the also common shape of (n, )
313    that other packages accept.
314
315    >>> y_var = 'HR90'
316    >>> y = np.array([db.by_col(y_var)]).reshape(3085,1)
317
318    Extract UE90 (unemployment rate) and PS90 (population structure) vectors from
319    the DBF to be used as independent variables in the regression. Other variables
320    can be inserted by adding their names to x_var, such as x_var = ['Var1','Var2','...]
321    Note that PySAL requires this to be an nxj numpy array, where j is the
322    number of independent variables (not including a constant). By default
323    this model adds a vector of ones to the independent variables passed in.
324
325    >>> x_var = ['PS90','UE90']
326    >>> x = np.array([db.by_col(name) for name in x_var]).T
327
328    The different regimes in this data are given according to the North and
329    South dummy (SOUTH).
330
331    >>> r_var = 'SOUTH'
332    >>> regimes = db.by_col(r_var)
333
334    Since we want to run a spatial lag model, we need to specify
335    the spatial weights matrix that includes the spatial configuration of the
336    observations. To do that, we can open an already existing gal file or
337    create a new one. In this case, we will create one from ``NAT.shp``.
338
339    >>> from libpysal import weights
340    >>> w = weights.Rook.from_shapefile(examples.get_path("NAT.shp"))
341
342    Unless there is a good reason not to do it, the weights have to be
343    row-standardized so every row of the matrix sums to one. Among other
344    things, this allows to interpret the spatial lag of a variable as the
345    average value of the neighboring observations. In PySAL, this can be
346    easily performed in the following way:
347
348    >>> w.transform = 'r'
349
350    This class runs a lag model, which means that includes the spatial lag of
351    the dependent variable on the right-hand side of the equation. If we want
352    to have the names of the variables printed in the output summary, we will
353    have to pass them in as well, although this is optional.
354
355    >>> from spreg import GM_Lag_Regimes
356    >>> model=GM_Lag_Regimes(y, x, regimes, w=w, regime_lag_sep=False, regime_err_sep=False, name_y=y_var, name_x=x_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp')
357    >>> model.betas
358    array([[ 1.28897623],
359           [ 0.79777722],
360           [ 0.56366891],
361           [ 8.73327838],
362           [ 1.30433406],
363           [ 0.62418643],
364           [-0.39993716]])
365
366    Once the model is run, we can have a summary of the output by typing:
367    model.summary . Alternatively, we can obtain the standard error of
368    the coefficient estimates by calling:
369
370    >>> model.std_err
371    array([0.44682888, 0.14358192, 0.05655124, 1.06044865, 0.20184548,
372           0.06118262, 0.12387232])
373
374    In the example above, all coefficients but the spatial lag vary
375    according to the regime. It is also possible to have the spatial lag
376    varying according to the regime, which effective will result in an
377    independent spatial lag model estimated for each regime. To run these
378    models, the argument regime_lag_sep must be set to True:
379
380    >>> model=GM_Lag_Regimes(y, x, regimes, w=w, regime_lag_sep=True, name_y=y_var, name_x=x_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp')
381    >>> print(np.hstack((np.array(model.name_z).reshape(8,1),model.betas,np.sqrt(model.vm.diagonal().reshape(8,1)))))
382    [['0_CONSTANT' '1.3658476998618099' '0.3985472089832652']
383     ['0_PS90' '0.8087573074246643' '0.11324884794883601']
384     ['0_UE90' '0.5694681319188577' '0.04625087717092595']
385     ['0_W_HR90' '-0.43424389464634316' '0.13350159258670305']
386     ['1_CONSTANT' '7.90731073341874' '1.6360187416950998']
387     ['1_PS90' '1.2746570332609135' '0.2470987049452741']
388     ['1_UE90' '0.6016769336173784' '0.07993322102145078']
389     ['1_W_HR90' '-0.2960338343846942' '0.19934459782427025']]
390
391    Alternatively, we can type: 'model.summary' to see the organized results output.
392    The class is flexible enough to accomodate a spatial lag model that,
393    besides the spatial lag of the dependent variable, includes other
394    non-spatial endogenous regressors. As an example, we will add the endogenous
395    variable RD90 (resource deprivation) and we decide to instrument for it with
396    FP89 (families below poverty):
397
398    >>> yd_var = ['RD90']
399    >>> yd = np.array([db.by_col(name) for name in yd_var]).T
400    >>> q_var = ['FP89']
401    >>> q = np.array([db.by_col(name) for name in q_var]).T
402
403    And we can run the model again:
404
405    >>> model = GM_Lag_Regimes(y, x, regimes, yend=yd, q=q, w=w, regime_lag_sep=False, regime_err_sep=False, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp')
406    >>> model.betas
407    array([[ 3.42195202],
408           [ 1.03311878],
409           [ 0.14308741],
410           [ 8.99740066],
411           [ 1.91877758],
412           [-0.32084816],
413           [ 2.38918212],
414           [ 3.67243761],
415           [ 0.06959139]])
416
417    Once the model is run, we can obtain the standard error of the coefficient
418    estimates. Alternatively, we can have a summary of the output by typing:
419    model.summary
420
421    >>> model.std_err
422    array([0.49163311, 0.12237382, 0.05633464, 0.72555909, 0.17250521,
423           0.06749131, 0.27370369, 0.25106224, 0.05804213])
424    """
425
426    def __init__(self, y, x, regimes, yend=None, q=None,
427                 w=None, w_lags=1, lag_q=True,
428                 robust=None, gwk=None, sig2n_k=False,
429                 spat_diag=False, constant_regi='many',
430                 cols2regi='all', regime_lag_sep=False, regime_err_sep=True,
431                 cores=False, vm=False, name_y=None, name_x=None,
432                 name_yend=None, name_q=None, name_regimes=None,
433                 name_w=None, name_gwk=None, name_ds=None):
434
435        n = USER.check_arrays(y, x)
436        y = USER.check_y(y, n)
437        USER.check_weights(w, y, w_required=True)
438        USER.check_robust(robust, gwk)
439        USER.check_spat_diag(spat_diag, w)
440        x_constant,name_x,warn = USER.check_constant(x,name_x,just_rem=True)
441        set_warn(self,warn)
442        name_x = USER.set_name_x(name_x, x_constant, constant=True)
443        name_y = USER.set_name_y(name_y)
444        name_yend = USER.set_name_yend(name_yend, yend)
445        name_q = USER.set_name_q(name_q, q)
446        name_q.extend(
447            USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True))
448        self.name_regimes = USER.set_name_ds(name_regimes)
449        self.constant_regi = constant_regi
450        self.n = n
451        cols2regi = REGI.check_cols2regi(
452            constant_regi, cols2regi, x_constant, yend=yend, add_cons=False)
453        self.cols2regi = cols2regi
454        self.regimes_set = REGI._get_regimes_set(regimes)
455        self.regimes = regimes
456        USER.check_regimes(self.regimes_set, self.n, x_constant.shape[1])
457        if regime_err_sep == True and robust == 'hac':
458            set_warn(
459                self, "Error by regimes is incompatible with HAC estimation for Spatial Lag models. Hence, error and lag by regimes have been disabled for this model.")
460            regime_err_sep = False
461            regime_lag_sep = False
462        self.regime_err_sep = regime_err_sep
463        self.regime_lag_sep = regime_lag_sep
464        if regime_lag_sep == True:
465            if not regime_err_sep:
466                raise Exception("regime_err_sep must be True when regime_lag_sep=True.")
467            cols2regi += [True]
468            w_i, regi_ids, warn = REGI.w_regimes(
469                w, regimes, self.regimes_set, transform=True, get_ids=True, min_n=len(cols2regi) + 1)
470            set_warn(self, warn)
471
472        else:
473            cols2regi += [False]
474
475        if regime_err_sep == True and set(cols2regi) == set([True]) and constant_regi == 'many':
476            self.y = y
477            self.GM_Lag_Regimes_Multi(y, x_constant, w_i, w, regi_ids,
478                                      yend=yend, q=q, w_lags=w_lags, lag_q=lag_q, cores=cores,
479                                      robust=robust, gwk=gwk, sig2n_k=sig2n_k, cols2regi=cols2regi,
480                                      spat_diag=spat_diag, vm=vm, name_y=name_y, name_x=name_x,
481                                      name_yend=name_yend, name_q=name_q, name_regimes=self.name_regimes,
482                                      name_w=name_w, name_gwk=name_gwk, name_ds=name_ds)
483        else:
484            if regime_lag_sep == True:
485                w = REGI.w_regimes_union(w, w_i, self.regimes_set)
486            yend2, q2 = set_endog(y, x_constant, w, yend, q, w_lags, lag_q)
487            name_yend.append(USER.set_name_yend_sp(name_y))
488            TSLS_Regimes.__init__(self, y=y, x=x_constant, yend=yend2, q=q2,
489                                  regimes=regimes, w=w, robust=robust, gwk=gwk,
490                                  sig2n_k=sig2n_k, spat_diag=spat_diag, vm=vm,
491                                  constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep,
492                                  name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q,
493                                  name_regimes=name_regimes, name_w=name_w, name_gwk=name_gwk,
494                                  name_ds=name_ds, summ=False)
495            if regime_lag_sep:
496                self.sp_att_reg(w_i, regi_ids, yend2[:, -1].reshape(self.n, 1))
497            else:
498                self.rho = self.betas[-1]
499                self.predy_e, self.e_pred, warn = sp_att(w, self.y, self.predy,
500                                                         yend2[:, -1].reshape(self.n, 1), self.rho)
501                set_warn(self, warn)
502            self.regime_lag_sep = regime_lag_sep
503            self.title = "SPATIAL " + self.title
504            SUMMARY.GM_Lag(
505                reg=self, w=w, vm=vm, spat_diag=spat_diag, regimes=True)
506
507    def GM_Lag_Regimes_Multi(self, y, x, w_i, w, regi_ids, cores=False,
508                             yend=None, q=None, w_lags=1, lag_q=True,
509                             robust=None, gwk=None, sig2n_k=False, cols2regi='all',
510                             spat_diag=False, vm=False, name_y=None, name_x=None,
511                             name_yend=None, name_q=None, name_regimes=None,
512                             name_w=None, name_gwk=None, name_ds=None):
513        #        pool = mp.Pool(cores)
514        self.name_ds = USER.set_name_ds(name_ds)
515        name_yend.append(USER.set_name_yend_sp(name_y))
516        self.name_w = USER.set_name_w(name_w, w_i)
517        self.name_gwk = USER.set_name_w(name_gwk, gwk)
518        results_p = {}
519        """
520        for r in self.regimes_set:
521            w_r = w_i[r].sparse
522            if system() == 'Windows':
523                is_win = True
524                results_p[r] = _work(*(y,x,regi_ids,r,yend,q,w_r,w_lags,lag_q,robust,sig2n_k,self.name_ds,name_y,name_x,name_yend,name_q,self.name_w,name_regimes))
525            else:
526                results_p[r] = pool.apply_async(_work,args=(y,x,regi_ids,r,yend,q,w_r,w_lags,lag_q,robust,sig2n_k,self.name_ds,name_y,name_x,name_yend,name_q,self.name_w,name_regimes, ))
527                is_win = False
528        """
529        x_constant,name_x = REGI.check_const_regi(self,x,name_x,regi_ids)
530        self.name_x_r = name_x
531        for r in self.regimes_set:
532            w_r = w_i[r].sparse
533            if cores:
534                pool = mp.Pool(None)
535                results_p[r] = pool.apply_async(_work, args=(
536                    y, x_constant, regi_ids, r, yend, q, w_r, w_lags, lag_q, robust, sig2n_k, self.name_ds, name_y, name_x, name_yend, name_q, self.name_w, name_regimes, ))
537            else:
538                results_p[r] = _work(*(y, x_constant, regi_ids, r, yend, q, w_r, w_lags, lag_q, robust,
539                                       sig2n_k, self.name_ds, name_y, name_x, name_yend, name_q, self.name_w, name_regimes))
540
541        self.kryd = 0
542        self.kr = len(cols2regi)+1
543        self.kf = 0
544        self.nr = len(self.regimes_set)
545        self.name_x_r = name_x + name_yend
546        self.name_regimes = name_regimes
547        self.vm = np.zeros((self.nr * self.kr, self.nr * self.kr), float)
548        self.betas = np.zeros((self.nr * self.kr, 1), float)
549        self.u = np.zeros((self.n, 1), float)
550        self.predy = np.zeros((self.n, 1), float)
551        self.predy_e = np.zeros((self.n, 1), float)
552        self.e_pred = np.zeros((self.n, 1), float)
553        """
554        if not is_win:
555            pool.close()
556            pool.join()
557        """
558        if cores:
559            pool.close()
560            pool.join()
561        results = {}
562        self.name_y, self.name_x, self.name_yend, self.name_q, self.name_z, self.name_h = [
563        ], [], [], [], [], []
564        counter = 0
565        for r in self.regimes_set:
566            """
567            if is_win:
568                results[r] = results_p[r]
569            else:
570                results[r] = results_p[r].get()
571            """
572            if not cores:
573                results[r] = results_p[r]
574            else:
575                results[r] = results_p[r].get()
576            results[r].predy_e, results[r].e_pred, warn = sp_att(w_i[r], results[r].y, results[
577                                                                 r].predy, results[r].yend[:, -1].reshape(results[r].n, 1), results[r].rho)
578            set_warn(results[r], warn)
579            results[r].w = w_i[r]
580            self.vm[(counter * self.kr):((counter + 1) * self.kr),
581                    (counter * self.kr):((counter + 1) * self.kr)] = results[r].vm
582            self.betas[
583                (counter * self.kr):((counter + 1) * self.kr), ] = results[r].betas
584            self.u[regi_ids[r], ] = results[r].u
585            self.predy[regi_ids[r], ] = results[r].predy
586            self.predy_e[regi_ids[r], ] = results[r].predy_e
587            self.e_pred[regi_ids[r], ] = results[r].e_pred
588            self.name_y += results[r].name_y
589            self.name_x += results[r].name_x
590            self.name_yend += results[r].name_yend
591            self.name_q += results[r].name_q
592            self.name_z += results[r].name_z
593            self.name_h += results[r].name_h
594            if r == self.regimes_set[0]:
595                self.hac_var = np.zeros((self.n, results[r].h.shape[1]), float)
596            self.hac_var[regi_ids[r], ] = results[r].h
597            counter += 1
598        self.multi = results
599        if robust == 'hac':
600            hac_multi(self, gwk, constant=True)
601        if robust == 'ogmm':
602            set_warn(
603                self, "Residuals treated as homoskedastic for the purpose of diagnostics.")
604        self.chow = REGI.Chow(self)
605        if spat_diag:
606            pass
607            #self._get_spat_diag_props(y, x, w, yend, q, w_lags, lag_q)
608        SUMMARY.GM_Lag_multi(
609            reg=self, multireg=self.multi, vm=vm, spat_diag=spat_diag, regimes=True, w=w)
610
611    def sp_att_reg(self, w_i, regi_ids, wy):
612        predy_e_r, e_pred_r = {}, {}
613        self.predy_e = np.zeros((self.n, 1), float)
614        self.e_pred = np.zeros((self.n, 1), float)
615        counter = 1
616        for r in self.regimes_set:
617            self.rho = self.betas[(self.kr - self.kryd) * self.nr + self.kf - (
618                self.yend.shape[1] - self.nr * self.kryd) + self.kryd * counter - 1]
619            self.predy_e[regi_ids[r], ], self.e_pred[regi_ids[r], ], warn = sp_att(w_i[r],
620                                                                                   self.y[regi_ids[r]], self.predy[
621                                                                                       regi_ids[r]],
622                                                                                   wy[regi_ids[r]], self.rho)
623            counter += 1
624
625    def _get_spat_diag_props(self, y, x, w, yend, q, w_lags, lag_q):
626        self._cache = {}
627        yend, q = set_endog(y, x[:,1:], w, yend, q, w_lags, lag_q)
628        #x = USER.check_constant(x)
629        x = REGI.regimeX_setup(
630            x, self.regimes, [True] * x.shape[1], self.regimes_set)
631        self.z = sphstack(x, REGI.regimeX_setup(
632            yend, self.regimes, [True] * (yend.shape[1] - 1) + [False], self.regimes_set))
633        self.h = sphstack(
634            x, REGI.regimeX_setup(q, self.regimes, [True] * q.shape[1], self.regimes_set))
635        hthi = np.linalg.inv(spdot(self.h.T, self.h))
636        zth = spdot(self.z.T, self.h)
637        self.varb = np.linalg.inv(spdot(spdot(zth, hthi), zth.T))
638
639
640def _work(y, x, regi_ids, r, yend, q, w_r, w_lags, lag_q, robust, sig2n_k, name_ds, name_y, name_x, name_yend, name_q, name_w, name_regimes):
641    y_r = y[regi_ids[r]]
642    x_r = x[regi_ids[r]]
643    if yend is not None:
644        yend_r = yend[regi_ids[r]]
645    else:
646        yend_r = yend
647    if q is not None:
648        q_r = q[regi_ids[r]]
649    else:
650        q_r = q
651    yend_r, q_r = set_endog_sparse(y_r, x_r[:,1:], w_r, yend_r, q_r, w_lags, lag_q)
652    #x_constant = USER.check_constant(x_r)
653    if robust == 'hac' or robust == 'ogmm':
654        robust2 = None
655    else:
656        robust2 = robust
657    model = BaseTSLS(
658        y_r, x_r, yend_r, q_r, robust=robust2, sig2n_k=sig2n_k)
659    model.title = "SPATIAL TWO STAGE LEAST SQUARES ESTIMATION - REGIME %s" % r
660    if robust == 'ogmm':
661        _optimal_weight(model, sig2n_k, warn=False)
662    model.rho = model.betas[-1]
663    model.robust = USER.set_robust(robust)
664    model.name_ds = name_ds
665    model.name_y = '%s_%s' % (str(r), name_y)
666    model.name_x = ['%s_%s' % (str(r), i) for i in name_x]
667    model.name_yend = ['%s_%s' % (str(r), i) for i in name_yend]
668    model.name_z = model.name_x + model.name_yend
669    model.name_q = ['%s_%s' % (str(r), i) for i in name_q]
670    model.name_h = model.name_x + model.name_q
671    model.name_w = name_w
672    model.name_regimes = name_regimes
673    return model
674
675
676def _test():
677    import doctest
678    start_suppress = np.get_printoptions()['suppress']
679    np.set_printoptions(suppress=True)
680    doctest.testmod()
681    np.set_printoptions(suppress=start_suppress)
682
683
684if __name__ == '__main__':
685    _test()
686    import numpy as np
687    import libpysal
688    from libpysal import examples
689    db = libpysal.io.open(examples.get_path("columbus.dbf"), 'r')
690    y_var = 'CRIME'
691    y = np.array([db.by_col(y_var)]).reshape(49, 1)
692    x_var = ['INC']
693    x = np.array([db.by_col(name) for name in x_var]).T
694    yd_var = ['HOVAL']
695    yd = np.array([db.by_col(name) for name in yd_var]).T
696    q_var = ['DISCBD']
697    q = np.array([db.by_col(name) for name in q_var]).T
698    r_var = 'NSA'
699    regimes = db.by_col(r_var)
700    w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path("columbus.shp"))
701    w.transform = 'r'
702    model = GM_Lag_Regimes(y, x, regimes, yend=yd, q=q, w=w, constant_regi='many', spat_diag=True, sig2n_k=False, lag_q=True, name_y=y_var,
703                           name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='columbus', name_w='columbus.gal', regime_err_sep=True, robust='white')
704    print(model.summary)
705