1''' 2Spatial Two Stages Least Squares with Regimes 3''' 4 5__author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu, David C. Folch david.folch@asu.edu" 6 7import numpy as np 8from . import regimes as REGI 9from . import user_output as USER 10from . import summary_output as SUMMARY 11import multiprocessing as mp 12from .twosls_regimes import TSLS_Regimes, _optimal_weight 13from .twosls import BaseTSLS 14from .utils import set_endog, set_endog_sparse, sp_att, set_warn, sphstack, spdot 15from .robust import hac_multi 16 17 18class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): 19 20 """ 21 Spatial two stage least squares (S2SLS) with regimes; 22 :cite:`Anselin1988` 23 24 Parameters 25 ---------- 26 y : array 27 nx1 array for dependent variable 28 x : array 29 Two dimensional array with n rows and one column for each 30 independent (exogenous) variable, excluding the constant 31 regimes : list 32 List of n values with the mapping of each 33 observation to a regime. Assumed to be aligned with 'x'. 34 yend : array 35 Two dimensional array with n rows and one column for each 36 endogenous variable 37 q : array 38 Two dimensional array with n rows and one column for each 39 external exogenous variable to use as instruments (note: 40 this should not contain any variables from x); cannot be 41 used in combination with h 42 constant_regi: string 43 Switcher controlling the constant term setup. It may take 44 the following values: 45 46 * 'one': a vector of ones is appended to x and held constant across regimes. 47 48 * 'many': a vector of ones is appended to x and considered different per regime (default). 49 cols2regi : list, 'all' 50 Argument indicating whether each 51 column of x should be considered as different per regime 52 or held constant across regimes (False). 53 If a list, k booleans indicating for each variable the 54 option (True if one per regime, False to be held constant). 55 If 'all' (default), all the variables vary by regime. 56 w : pysal W object 57 Spatial weights object 58 w_lags : integer 59 Orders of W to include as instruments for the spatially 60 lagged dependent variable. For example, w_lags=1, then 61 instruments are WX; if w_lags=2, then WX, WWX; and so on. 62 lag_q : boolean 63 If True, then include spatial lags of the additional 64 instruments (q). 65 regime_lag_sep: boolean 66 If True (default), the spatial parameter for spatial lag is also 67 computed according to different regimes. If False, 68 the spatial parameter is fixed accross regimes. 69 Option valid only when regime_err_sep=True 70 regime_err_sep: boolean 71 If True, a separate regression is run for each regime. 72 robust : string 73 If 'white', then a White consistent estimator of the 74 variance-covariance matrix is given. 75 If 'hac', then a HAC consistent estimator of the 76 variance-covariance matrix is given. 77 If 'ogmm', then Optimal GMM is used to estimate 78 betas and the variance-covariance matrix. 79 Default set to None. 80 gwk : pysal W object 81 Kernel spatial weights needed for HAC estimation. Note: 82 matrix must have ones along the main diagonal. 83 sig2n_k : boolean 84 If True, then use n-k to estimate sigma^2. If False, use n. 85 spat_diag : boolean 86 If True, then compute Anselin-Kelejian test 87 vm : boolean 88 If True, include variance-covariance matrix in summary 89 results 90 cores : boolean 91 Specifies if multiprocessing is to be used 92 Default: no multiprocessing, cores = False 93 Note: Multiprocessing may not work on all platforms. 94 name_y : string 95 Name of dependent variable for use in output 96 name_x : list of strings 97 Names of independent variables for use in output 98 name_yend : list of strings 99 Names of endogenous variables for use in output 100 name_q : list of strings 101 Names of instruments for use in output 102 name_w : string 103 Name of weights matrix for use in output 104 name_gwk : string 105 Name of kernel weights matrix for use in output 106 name_ds : string 107 Name of dataset for use in output 108 name_regimes : string 109 Name of regimes variable for use in output 110 111 Attributes 112 ---------- 113 summary : string 114 Summary of regression results and diagnostics (note: use in 115 conjunction with the print command) 116 betas : array 117 kx1 array of estimated coefficients 118 u : array 119 nx1 array of residuals 120 e_pred : array 121 nx1 array of residuals (using reduced form) 122 predy : array 123 nx1 array of predicted y values 124 predy_e : array 125 nx1 array of predicted y values (using reduced form) 126 n : integer 127 Number of observations 128 k : integer 129 Number of variables for which coefficients are estimated 130 (including the constant) 131 Only available in dictionary 'multi' when multiple regressions 132 (see 'multi' below for details) 133 kstar : integer 134 Number of endogenous variables. 135 Only available in dictionary 'multi' when multiple regressions 136 (see 'multi' below for details) 137 y : array 138 nx1 array for dependent variable 139 x : array 140 Two dimensional array with n rows and one column for each 141 independent (exogenous) variable, including the constant 142 Only available in dictionary 'multi' when multiple regressions 143 (see 'multi' below for details) 144 yend : array 145 Two dimensional array with n rows and one column for each 146 endogenous variable 147 Only available in dictionary 'multi' when multiple regressions 148 (see 'multi' below for details) 149 q : array 150 Two dimensional array with n rows and one column for each 151 external exogenous variable used as instruments 152 Only available in dictionary 'multi' when multiple regressions 153 (see 'multi' below for details) 154 z : array 155 nxk array of variables (combination of x and yend) 156 Only available in dictionary 'multi' when multiple regressions 157 (see 'multi' below for details) 158 h : array 159 nxl array of instruments (combination of x and q) 160 Only available in dictionary 'multi' when multiple regressions 161 (see 'multi' below for details) 162 robust : string 163 Adjustment for robust standard errors 164 Only available in dictionary 'multi' when multiple regressions 165 (see 'multi' below for details) 166 mean_y : float 167 Mean of dependent variable 168 std_y : float 169 Standard deviation of dependent variable 170 vm : array 171 Variance covariance matrix (kxk) 172 pr2 : float 173 Pseudo R squared (squared correlation between y and ypred) 174 Only available in dictionary 'multi' when multiple regressions 175 (see 'multi' below for details) 176 pr2_e : float 177 Pseudo R squared (squared correlation between y and ypred_e 178 (using reduced form)) 179 Only available in dictionary 'multi' when multiple regressions 180 (see 'multi' below for details) 181 utu : float 182 Sum of squared residuals 183 sig2 : float 184 Sigma squared used in computations 185 Only available in dictionary 'multi' when multiple regressions 186 (see 'multi' below for details) 187 std_err : array 188 1xk array of standard errors of the betas 189 Only available in dictionary 'multi' when multiple regressions 190 (see 'multi' below for details) 191 z_stat : list of tuples 192 z statistic; each tuple contains the pair (statistic, 193 p-value), where each is a float 194 Only available in dictionary 'multi' when multiple regressions 195 (see 'multi' below for details) 196 ak_test : tuple 197 Anselin-Kelejian test; tuple contains the pair (statistic, 198 p-value) 199 Only available in dictionary 'multi' when multiple regressions 200 (see 'multi' below for details) 201 name_y : string 202 Name of dependent variable for use in output 203 name_x : list of strings 204 Names of independent variables for use in output 205 name_yend : list of strings 206 Names of endogenous variables for use in output 207 name_z : list of strings 208 Names of exogenous and endogenous variables for use in 209 output 210 name_q : list of strings 211 Names of external instruments 212 name_h : list of strings 213 Names of all instruments used in ouput 214 name_w : string 215 Name of weights matrix for use in output 216 name_gwk : string 217 Name of kernel weights matrix for use in output 218 name_ds : string 219 Name of dataset for use in output 220 name_regimes : string 221 Name of regimes variable for use in output 222 title : string 223 Name of the regression method used 224 Only available in dictionary 'multi' when multiple regressions 225 (see 'multi' below for details) 226 sig2n : float 227 Sigma squared (computed with n in the denominator) 228 sig2n_k : float 229 Sigma squared (computed with n-k in the denominator) 230 hth : float 231 :math:`H'H`. 232 Only available in dictionary 'multi' when multiple regressions 233 (see 'multi' below for details) 234 hthi : float 235 :math:`(H'H)^{-1}`. 236 Only available in dictionary 'multi' when multiple regressions 237 (see 'multi' below for details) 238 varb : array 239 :math:`(Z'H (H'H)^{-1} H'Z)^{-1}`. 240 Only available in dictionary 'multi' when multiple regressions 241 (see 'multi' below for details) 242 zthhthi : array 243 :math:`Z'H(H'H)^{-1}`. 244 Only available in dictionary 'multi' when multiple regressions 245 (see 'multi' below for details) 246 pfora1a2 : array 247 n(zthhthi)'varb 248 Only available in dictionary 'multi' when multiple regressions 249 (see 'multi' below for details) 250 regimes : list 251 List of n values with the mapping of each 252 observation to a regime. Assumed to be aligned with 'x'. 253 constant_regi: string 254 Ignored if regimes=False. Constant option for regimes. 255 Switcher controlling the constant term setup. It may take 256 the following values: 257 258 * 'one': a vector of ones is appended to x and held constant across regimes. 259 260 * 'many': a vector of ones is appended to x and considered different per regime. 261 cols2regi : list, 'all' 262 Ignored if regimes=False. Argument indicating whether each 263 column of x should be considered as different per regime 264 or held constant across regimes (False). 265 If a list, k booleans indicating for each variable the 266 option (True if one per regime, False to be held constant). 267 If 'all', all the variables vary by regime. 268 regime_lag_sep: boolean 269 If True, the spatial parameter for spatial lag is also 270 computed according to different regimes. If False (default), 271 the spatial parameter is fixed accross regimes. 272 regime_err_sep: boolean 273 If True, a separate regression is run for each regime. 274 kr : int 275 Number of variables/columns to be "regimized" or subject 276 to change by regime. These will result in one parameter 277 estimate by regime for each variable (i.e. nr parameters per 278 variable) 279 kf : int 280 Number of variables/columns to be considered fixed or 281 global across regimes and hence only obtain one parameter 282 estimate 283 nr : int 284 Number of different regimes in the 'regimes' list 285 multi : dictionary 286 Only available when multiple regressions are estimated, 287 i.e. when regime_err_sep=True and no variable is fixed 288 across regimes. 289 Contains all attributes of each individual regression 290 291 Examples 292 -------- 293 294 We first need to import the needed modules, namely numpy to convert the 295 data we read into arrays that ``spreg`` understands and ``pysal`` to 296 perform all the analysis. 297 298 >>> import numpy as np 299 >>> import libpysal 300 >>> from libpysal import examples 301 302 Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open(). 303 This is the DBF associated with the NAT shapefile. Note that 304 libpysal.io.open() also reads data in CSV format; since the actual class 305 requires data to be passed in as numpy arrays, the user can read their 306 data in using any method. 307 308 >>> db = libpysal.io.open(examples.get_path("NAT.dbf"),'r') 309 310 Extract the HR90 column (homicide rates in 1990) from the DBF file and make it the 311 dependent variable for the regression. Note that PySAL requires this to be 312 an numpy array of shape (n, 1) as opposed to the also common shape of (n, ) 313 that other packages accept. 314 315 >>> y_var = 'HR90' 316 >>> y = np.array([db.by_col(y_var)]).reshape(3085,1) 317 318 Extract UE90 (unemployment rate) and PS90 (population structure) vectors from 319 the DBF to be used as independent variables in the regression. Other variables 320 can be inserted by adding their names to x_var, such as x_var = ['Var1','Var2','...] 321 Note that PySAL requires this to be an nxj numpy array, where j is the 322 number of independent variables (not including a constant). By default 323 this model adds a vector of ones to the independent variables passed in. 324 325 >>> x_var = ['PS90','UE90'] 326 >>> x = np.array([db.by_col(name) for name in x_var]).T 327 328 The different regimes in this data are given according to the North and 329 South dummy (SOUTH). 330 331 >>> r_var = 'SOUTH' 332 >>> regimes = db.by_col(r_var) 333 334 Since we want to run a spatial lag model, we need to specify 335 the spatial weights matrix that includes the spatial configuration of the 336 observations. To do that, we can open an already existing gal file or 337 create a new one. In this case, we will create one from ``NAT.shp``. 338 339 >>> from libpysal import weights 340 >>> w = weights.Rook.from_shapefile(examples.get_path("NAT.shp")) 341 342 Unless there is a good reason not to do it, the weights have to be 343 row-standardized so every row of the matrix sums to one. Among other 344 things, this allows to interpret the spatial lag of a variable as the 345 average value of the neighboring observations. In PySAL, this can be 346 easily performed in the following way: 347 348 >>> w.transform = 'r' 349 350 This class runs a lag model, which means that includes the spatial lag of 351 the dependent variable on the right-hand side of the equation. If we want 352 to have the names of the variables printed in the output summary, we will 353 have to pass them in as well, although this is optional. 354 355 >>> from spreg import GM_Lag_Regimes 356 >>> model=GM_Lag_Regimes(y, x, regimes, w=w, regime_lag_sep=False, regime_err_sep=False, name_y=y_var, name_x=x_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp') 357 >>> model.betas 358 array([[ 1.28897623], 359 [ 0.79777722], 360 [ 0.56366891], 361 [ 8.73327838], 362 [ 1.30433406], 363 [ 0.62418643], 364 [-0.39993716]]) 365 366 Once the model is run, we can have a summary of the output by typing: 367 model.summary . Alternatively, we can obtain the standard error of 368 the coefficient estimates by calling: 369 370 >>> model.std_err 371 array([0.44682888, 0.14358192, 0.05655124, 1.06044865, 0.20184548, 372 0.06118262, 0.12387232]) 373 374 In the example above, all coefficients but the spatial lag vary 375 according to the regime. It is also possible to have the spatial lag 376 varying according to the regime, which effective will result in an 377 independent spatial lag model estimated for each regime. To run these 378 models, the argument regime_lag_sep must be set to True: 379 380 >>> model=GM_Lag_Regimes(y, x, regimes, w=w, regime_lag_sep=True, name_y=y_var, name_x=x_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp') 381 >>> print(np.hstack((np.array(model.name_z).reshape(8,1),model.betas,np.sqrt(model.vm.diagonal().reshape(8,1))))) 382 [['0_CONSTANT' '1.3658476998618099' '0.3985472089832652'] 383 ['0_PS90' '0.8087573074246643' '0.11324884794883601'] 384 ['0_UE90' '0.5694681319188577' '0.04625087717092595'] 385 ['0_W_HR90' '-0.43424389464634316' '0.13350159258670305'] 386 ['1_CONSTANT' '7.90731073341874' '1.6360187416950998'] 387 ['1_PS90' '1.2746570332609135' '0.2470987049452741'] 388 ['1_UE90' '0.6016769336173784' '0.07993322102145078'] 389 ['1_W_HR90' '-0.2960338343846942' '0.19934459782427025']] 390 391 Alternatively, we can type: 'model.summary' to see the organized results output. 392 The class is flexible enough to accomodate a spatial lag model that, 393 besides the spatial lag of the dependent variable, includes other 394 non-spatial endogenous regressors. As an example, we will add the endogenous 395 variable RD90 (resource deprivation) and we decide to instrument for it with 396 FP89 (families below poverty): 397 398 >>> yd_var = ['RD90'] 399 >>> yd = np.array([db.by_col(name) for name in yd_var]).T 400 >>> q_var = ['FP89'] 401 >>> q = np.array([db.by_col(name) for name in q_var]).T 402 403 And we can run the model again: 404 405 >>> model = GM_Lag_Regimes(y, x, regimes, yend=yd, q=q, w=w, regime_lag_sep=False, regime_err_sep=False, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp') 406 >>> model.betas 407 array([[ 3.42195202], 408 [ 1.03311878], 409 [ 0.14308741], 410 [ 8.99740066], 411 [ 1.91877758], 412 [-0.32084816], 413 [ 2.38918212], 414 [ 3.67243761], 415 [ 0.06959139]]) 416 417 Once the model is run, we can obtain the standard error of the coefficient 418 estimates. Alternatively, we can have a summary of the output by typing: 419 model.summary 420 421 >>> model.std_err 422 array([0.49163311, 0.12237382, 0.05633464, 0.72555909, 0.17250521, 423 0.06749131, 0.27370369, 0.25106224, 0.05804213]) 424 """ 425 426 def __init__(self, y, x, regimes, yend=None, q=None, 427 w=None, w_lags=1, lag_q=True, 428 robust=None, gwk=None, sig2n_k=False, 429 spat_diag=False, constant_regi='many', 430 cols2regi='all', regime_lag_sep=False, regime_err_sep=True, 431 cores=False, vm=False, name_y=None, name_x=None, 432 name_yend=None, name_q=None, name_regimes=None, 433 name_w=None, name_gwk=None, name_ds=None): 434 435 n = USER.check_arrays(y, x) 436 y = USER.check_y(y, n) 437 USER.check_weights(w, y, w_required=True) 438 USER.check_robust(robust, gwk) 439 USER.check_spat_diag(spat_diag, w) 440 x_constant,name_x,warn = USER.check_constant(x,name_x,just_rem=True) 441 set_warn(self,warn) 442 name_x = USER.set_name_x(name_x, x_constant, constant=True) 443 name_y = USER.set_name_y(name_y) 444 name_yend = USER.set_name_yend(name_yend, yend) 445 name_q = USER.set_name_q(name_q, q) 446 name_q.extend( 447 USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) 448 self.name_regimes = USER.set_name_ds(name_regimes) 449 self.constant_regi = constant_regi 450 self.n = n 451 cols2regi = REGI.check_cols2regi( 452 constant_regi, cols2regi, x_constant, yend=yend, add_cons=False) 453 self.cols2regi = cols2regi 454 self.regimes_set = REGI._get_regimes_set(regimes) 455 self.regimes = regimes 456 USER.check_regimes(self.regimes_set, self.n, x_constant.shape[1]) 457 if regime_err_sep == True and robust == 'hac': 458 set_warn( 459 self, "Error by regimes is incompatible with HAC estimation for Spatial Lag models. Hence, error and lag by regimes have been disabled for this model.") 460 regime_err_sep = False 461 regime_lag_sep = False 462 self.regime_err_sep = regime_err_sep 463 self.regime_lag_sep = regime_lag_sep 464 if regime_lag_sep == True: 465 if not regime_err_sep: 466 raise Exception("regime_err_sep must be True when regime_lag_sep=True.") 467 cols2regi += [True] 468 w_i, regi_ids, warn = REGI.w_regimes( 469 w, regimes, self.regimes_set, transform=True, get_ids=True, min_n=len(cols2regi) + 1) 470 set_warn(self, warn) 471 472 else: 473 cols2regi += [False] 474 475 if regime_err_sep == True and set(cols2regi) == set([True]) and constant_regi == 'many': 476 self.y = y 477 self.GM_Lag_Regimes_Multi(y, x_constant, w_i, w, regi_ids, 478 yend=yend, q=q, w_lags=w_lags, lag_q=lag_q, cores=cores, 479 robust=robust, gwk=gwk, sig2n_k=sig2n_k, cols2regi=cols2regi, 480 spat_diag=spat_diag, vm=vm, name_y=name_y, name_x=name_x, 481 name_yend=name_yend, name_q=name_q, name_regimes=self.name_regimes, 482 name_w=name_w, name_gwk=name_gwk, name_ds=name_ds) 483 else: 484 if regime_lag_sep == True: 485 w = REGI.w_regimes_union(w, w_i, self.regimes_set) 486 yend2, q2 = set_endog(y, x_constant, w, yend, q, w_lags, lag_q) 487 name_yend.append(USER.set_name_yend_sp(name_y)) 488 TSLS_Regimes.__init__(self, y=y, x=x_constant, yend=yend2, q=q2, 489 regimes=regimes, w=w, robust=robust, gwk=gwk, 490 sig2n_k=sig2n_k, spat_diag=spat_diag, vm=vm, 491 constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, 492 name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q, 493 name_regimes=name_regimes, name_w=name_w, name_gwk=name_gwk, 494 name_ds=name_ds, summ=False) 495 if regime_lag_sep: 496 self.sp_att_reg(w_i, regi_ids, yend2[:, -1].reshape(self.n, 1)) 497 else: 498 self.rho = self.betas[-1] 499 self.predy_e, self.e_pred, warn = sp_att(w, self.y, self.predy, 500 yend2[:, -1].reshape(self.n, 1), self.rho) 501 set_warn(self, warn) 502 self.regime_lag_sep = regime_lag_sep 503 self.title = "SPATIAL " + self.title 504 SUMMARY.GM_Lag( 505 reg=self, w=w, vm=vm, spat_diag=spat_diag, regimes=True) 506 507 def GM_Lag_Regimes_Multi(self, y, x, w_i, w, regi_ids, cores=False, 508 yend=None, q=None, w_lags=1, lag_q=True, 509 robust=None, gwk=None, sig2n_k=False, cols2regi='all', 510 spat_diag=False, vm=False, name_y=None, name_x=None, 511 name_yend=None, name_q=None, name_regimes=None, 512 name_w=None, name_gwk=None, name_ds=None): 513 # pool = mp.Pool(cores) 514 self.name_ds = USER.set_name_ds(name_ds) 515 name_yend.append(USER.set_name_yend_sp(name_y)) 516 self.name_w = USER.set_name_w(name_w, w_i) 517 self.name_gwk = USER.set_name_w(name_gwk, gwk) 518 results_p = {} 519 """ 520 for r in self.regimes_set: 521 w_r = w_i[r].sparse 522 if system() == 'Windows': 523 is_win = True 524 results_p[r] = _work(*(y,x,regi_ids,r,yend,q,w_r,w_lags,lag_q,robust,sig2n_k,self.name_ds,name_y,name_x,name_yend,name_q,self.name_w,name_regimes)) 525 else: 526 results_p[r] = pool.apply_async(_work,args=(y,x,regi_ids,r,yend,q,w_r,w_lags,lag_q,robust,sig2n_k,self.name_ds,name_y,name_x,name_yend,name_q,self.name_w,name_regimes, )) 527 is_win = False 528 """ 529 x_constant,name_x = REGI.check_const_regi(self,x,name_x,regi_ids) 530 self.name_x_r = name_x 531 for r in self.regimes_set: 532 w_r = w_i[r].sparse 533 if cores: 534 pool = mp.Pool(None) 535 results_p[r] = pool.apply_async(_work, args=( 536 y, x_constant, regi_ids, r, yend, q, w_r, w_lags, lag_q, robust, sig2n_k, self.name_ds, name_y, name_x, name_yend, name_q, self.name_w, name_regimes, )) 537 else: 538 results_p[r] = _work(*(y, x_constant, regi_ids, r, yend, q, w_r, w_lags, lag_q, robust, 539 sig2n_k, self.name_ds, name_y, name_x, name_yend, name_q, self.name_w, name_regimes)) 540 541 self.kryd = 0 542 self.kr = len(cols2regi)+1 543 self.kf = 0 544 self.nr = len(self.regimes_set) 545 self.name_x_r = name_x + name_yend 546 self.name_regimes = name_regimes 547 self.vm = np.zeros((self.nr * self.kr, self.nr * self.kr), float) 548 self.betas = np.zeros((self.nr * self.kr, 1), float) 549 self.u = np.zeros((self.n, 1), float) 550 self.predy = np.zeros((self.n, 1), float) 551 self.predy_e = np.zeros((self.n, 1), float) 552 self.e_pred = np.zeros((self.n, 1), float) 553 """ 554 if not is_win: 555 pool.close() 556 pool.join() 557 """ 558 if cores: 559 pool.close() 560 pool.join() 561 results = {} 562 self.name_y, self.name_x, self.name_yend, self.name_q, self.name_z, self.name_h = [ 563 ], [], [], [], [], [] 564 counter = 0 565 for r in self.regimes_set: 566 """ 567 if is_win: 568 results[r] = results_p[r] 569 else: 570 results[r] = results_p[r].get() 571 """ 572 if not cores: 573 results[r] = results_p[r] 574 else: 575 results[r] = results_p[r].get() 576 results[r].predy_e, results[r].e_pred, warn = sp_att(w_i[r], results[r].y, results[ 577 r].predy, results[r].yend[:, -1].reshape(results[r].n, 1), results[r].rho) 578 set_warn(results[r], warn) 579 results[r].w = w_i[r] 580 self.vm[(counter * self.kr):((counter + 1) * self.kr), 581 (counter * self.kr):((counter + 1) * self.kr)] = results[r].vm 582 self.betas[ 583 (counter * self.kr):((counter + 1) * self.kr), ] = results[r].betas 584 self.u[regi_ids[r], ] = results[r].u 585 self.predy[regi_ids[r], ] = results[r].predy 586 self.predy_e[regi_ids[r], ] = results[r].predy_e 587 self.e_pred[regi_ids[r], ] = results[r].e_pred 588 self.name_y += results[r].name_y 589 self.name_x += results[r].name_x 590 self.name_yend += results[r].name_yend 591 self.name_q += results[r].name_q 592 self.name_z += results[r].name_z 593 self.name_h += results[r].name_h 594 if r == self.regimes_set[0]: 595 self.hac_var = np.zeros((self.n, results[r].h.shape[1]), float) 596 self.hac_var[regi_ids[r], ] = results[r].h 597 counter += 1 598 self.multi = results 599 if robust == 'hac': 600 hac_multi(self, gwk, constant=True) 601 if robust == 'ogmm': 602 set_warn( 603 self, "Residuals treated as homoskedastic for the purpose of diagnostics.") 604 self.chow = REGI.Chow(self) 605 if spat_diag: 606 pass 607 #self._get_spat_diag_props(y, x, w, yend, q, w_lags, lag_q) 608 SUMMARY.GM_Lag_multi( 609 reg=self, multireg=self.multi, vm=vm, spat_diag=spat_diag, regimes=True, w=w) 610 611 def sp_att_reg(self, w_i, regi_ids, wy): 612 predy_e_r, e_pred_r = {}, {} 613 self.predy_e = np.zeros((self.n, 1), float) 614 self.e_pred = np.zeros((self.n, 1), float) 615 counter = 1 616 for r in self.regimes_set: 617 self.rho = self.betas[(self.kr - self.kryd) * self.nr + self.kf - ( 618 self.yend.shape[1] - self.nr * self.kryd) + self.kryd * counter - 1] 619 self.predy_e[regi_ids[r], ], self.e_pred[regi_ids[r], ], warn = sp_att(w_i[r], 620 self.y[regi_ids[r]], self.predy[ 621 regi_ids[r]], 622 wy[regi_ids[r]], self.rho) 623 counter += 1 624 625 def _get_spat_diag_props(self, y, x, w, yend, q, w_lags, lag_q): 626 self._cache = {} 627 yend, q = set_endog(y, x[:,1:], w, yend, q, w_lags, lag_q) 628 #x = USER.check_constant(x) 629 x = REGI.regimeX_setup( 630 x, self.regimes, [True] * x.shape[1], self.regimes_set) 631 self.z = sphstack(x, REGI.regimeX_setup( 632 yend, self.regimes, [True] * (yend.shape[1] - 1) + [False], self.regimes_set)) 633 self.h = sphstack( 634 x, REGI.regimeX_setup(q, self.regimes, [True] * q.shape[1], self.regimes_set)) 635 hthi = np.linalg.inv(spdot(self.h.T, self.h)) 636 zth = spdot(self.z.T, self.h) 637 self.varb = np.linalg.inv(spdot(spdot(zth, hthi), zth.T)) 638 639 640def _work(y, x, regi_ids, r, yend, q, w_r, w_lags, lag_q, robust, sig2n_k, name_ds, name_y, name_x, name_yend, name_q, name_w, name_regimes): 641 y_r = y[regi_ids[r]] 642 x_r = x[regi_ids[r]] 643 if yend is not None: 644 yend_r = yend[regi_ids[r]] 645 else: 646 yend_r = yend 647 if q is not None: 648 q_r = q[regi_ids[r]] 649 else: 650 q_r = q 651 yend_r, q_r = set_endog_sparse(y_r, x_r[:,1:], w_r, yend_r, q_r, w_lags, lag_q) 652 #x_constant = USER.check_constant(x_r) 653 if robust == 'hac' or robust == 'ogmm': 654 robust2 = None 655 else: 656 robust2 = robust 657 model = BaseTSLS( 658 y_r, x_r, yend_r, q_r, robust=robust2, sig2n_k=sig2n_k) 659 model.title = "SPATIAL TWO STAGE LEAST SQUARES ESTIMATION - REGIME %s" % r 660 if robust == 'ogmm': 661 _optimal_weight(model, sig2n_k, warn=False) 662 model.rho = model.betas[-1] 663 model.robust = USER.set_robust(robust) 664 model.name_ds = name_ds 665 model.name_y = '%s_%s' % (str(r), name_y) 666 model.name_x = ['%s_%s' % (str(r), i) for i in name_x] 667 model.name_yend = ['%s_%s' % (str(r), i) for i in name_yend] 668 model.name_z = model.name_x + model.name_yend 669 model.name_q = ['%s_%s' % (str(r), i) for i in name_q] 670 model.name_h = model.name_x + model.name_q 671 model.name_w = name_w 672 model.name_regimes = name_regimes 673 return model 674 675 676def _test(): 677 import doctest 678 start_suppress = np.get_printoptions()['suppress'] 679 np.set_printoptions(suppress=True) 680 doctest.testmod() 681 np.set_printoptions(suppress=start_suppress) 682 683 684if __name__ == '__main__': 685 _test() 686 import numpy as np 687 import libpysal 688 from libpysal import examples 689 db = libpysal.io.open(examples.get_path("columbus.dbf"), 'r') 690 y_var = 'CRIME' 691 y = np.array([db.by_col(y_var)]).reshape(49, 1) 692 x_var = ['INC'] 693 x = np.array([db.by_col(name) for name in x_var]).T 694 yd_var = ['HOVAL'] 695 yd = np.array([db.by_col(name) for name in yd_var]).T 696 q_var = ['DISCBD'] 697 q = np.array([db.by_col(name) for name in q_var]).T 698 r_var = 'NSA' 699 regimes = db.by_col(r_var) 700 w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path("columbus.shp")) 701 w.transform = 'r' 702 model = GM_Lag_Regimes(y, x, regimes, yend=yd, q=q, w=w, constant_regi='many', spat_diag=True, sig2n_k=False, lag_q=True, name_y=y_var, 703 name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='columbus', name_w='columbus.gal', regime_err_sep=True, robust='white') 704 print(model.summary) 705