1""" 2ML Estimation of Spatial Error Model 3""" 4 5__author__ = "Luc Anselin luc.anselin@asu.edu,\ 6 Serge Rey srey@asu.edu, \ 7 Levi Wolf levi.john.wolf@asu.edu" 8 9import numpy as np 10import numpy.linalg as la 11from scipy import sparse as sp 12from scipy.sparse.linalg import splu as SuperLU 13from .utils import RegressionPropsY, RegressionPropsVM, set_warn 14from . import diagnostics as DIAG 15from . import user_output as USER 16from . import summary_output as SUMMARY 17from . import regimes as REGI 18from .w_utils import symmetrize 19try: 20 from scipy.optimize import minimize_scalar 21 minimize_scalar_available = True 22except ImportError: 23 minimize_scalar_available = False 24from .sputils import spdot, spfill_diagonal, spinv 25from libpysal import weights 26 27__all__ = ["ML_Error"] 28 29 30class BaseML_Error(RegressionPropsY, RegressionPropsVM, REGI.Regimes_Frame): 31 32 """ 33 ML estimation of the spatial error model (note no consistency 34 checks, diagnostics or constants added): :cite:`Anselin1988` 35 36 Parameters 37 ---------- 38 y : array 39 nx1 array for dependent variable 40 x : array 41 Two dimensional array with n rows and one column for each 42 independent (exogenous) variable, excluding the constant 43 w : Sparse matrix 44 Spatial weights sparse matrix 45 method : string 46 if 'full', brute force calculation (full matrix expressions) 47 if 'ord', Ord eigenvalue calculation 48 if 'LU', LU decomposition for sparse matrices 49 epsilon : float 50 tolerance criterion in mimimize_scalar function and inverse_product 51 regimes_att : dictionary 52 Dictionary containing elements to be used in case of a regimes model, 53 i.e. 'x' before regimes, 'regimes' list and 'cols2regi' 54 55 56 Attributes 57 ---------- 58 betas : array 59 kx1 array of estimated coefficients 60 lam : float 61 estimate of spatial autoregressive coefficient 62 u : array 63 nx1 array of residuals 64 e_filtered : array 65 spatially filtered residuals 66 predy : array 67 nx1 array of predicted y values 68 n : integer 69 Number of observations 70 k : integer 71 Number of variables for which coefficients are estimated 72 (including the constant, excluding the rho) 73 y : array 74 nx1 array for dependent variable 75 x : array 76 Two dimensional array with n rows and one column for each 77 independent (exogenous) variable, including the constant 78 method : string 79 log Jacobian method 80 if 'full': brute force (full matrix computations) 81 if 'ord' : Ord eigenvalue method 82 epsilon : float 83 tolerance criterion used in minimize_scalar function and inverse_product 84 mean_y : float 85 Mean of dependent variable 86 std_y : float 87 Standard deviation of dependent variable 88 vm : array 89 Variance covariance matrix (k+1 x k+1) - includes lambda 90 vm1 : array 91 2x2 array of variance covariance for lambda, sigma 92 sig2 : float 93 Sigma squared used in computations 94 logll : float 95 maximized log-likelihood (including constant terms) 96 97 Examples 98 -------- 99 >>> import numpy as np 100 >>> import libpysal 101 >>> from libpysal import examples 102 >>> from libpysal.examples import load_example 103 >>> import spreg 104 >>> np.set_printoptions(suppress=True) #prevent scientific format 105 >>> south = load_example('South') 106 >>> db = libpysal.io.open(south.get_path("south.dbf"),'r') 107 >>> y_name = "HR90" 108 >>> y = np.array(db.by_col(y_name)) 109 >>> y.shape = (len(y),1) 110 >>> x_names = ["RD90","PS90","UE90","DV90"] 111 >>> x = np.array([db.by_col(var) for var in x_names]).T 112 >>> x = np.hstack((np.ones((len(y),1)),x)) 113 >>> w = libpysal.weights.Queen.from_shapefile(south.get_path("south.shp")) 114 >>> w.transform = 'r' 115 >>> mlerr = spreg.ml.error.BaseML_Error(y,x,w) #doctest: +SKIP 116 >>> "{0:.6f}".format(mlerr.lam) #doctest: +SKIP 117 '0.299078' 118 >>> np.around(mlerr.betas, decimals=4) #doctest: +SKIP 119 array([[ 6.1492], 120 [ 4.4024], 121 [ 1.7784], 122 [-0.3781], 123 [ 0.4858], 124 [ 0.2991]]) 125 >>> "{0:.6f}".format(mlerr.mean_y) #doctest: +SKIP 126 '9.549293' 127 >>> "{0:.6f}".format(mlerr.std_y) #doctest: +SKIP 128 '7.038851' 129 >>> np.diag(mlerr.vm) #doctest: +SKIP 130 array([ 1.06476526, 0.05548248, 0.04544514, 0.00614425, 0.01481356, 131 0.00143001]) 132 >>> "{0:.6f}".format(mlerr.sig2[0][0]) #doctest: +SKIP 133 '32.406854' 134 >>> "{0:.6f}".format(mlerr.logll) #doctest: +SKIP 135 '-4471.407067' 136 >>> mlerr1 = BaseML_Error(y,x,w,method='ord') #doctest: +SKIP 137 >>> "{0:.6f}".format(mlerr1.lam) #doctest: +SKIP 138 '0.299078' 139 >>> np.around(mlerr1.betas, decimals=4) #doctest: +SKIP 140 array([[ 6.1492], 141 [ 4.4024], 142 [ 1.7784], 143 [-0.3781], 144 [ 0.4858], 145 [ 0.2991]]) 146 >>> "{0:.6f}".format(mlerr1.mean_y) #doctest: +SKIP 147 '9.549293' 148 >>> "{0:.6f}".format(mlerr1.std_y) #doctest: +SKIP 149 '7.038851' 150 >>> np.around(np.diag(mlerr1.vm), decimals=4) #doctest: +SKIP 151 array([ 1.0648, 0.0555, 0.0454, 0.0061, 0.0148, 0.0014]) 152 >>> "{0:.4f}".format(mlerr1.sig2[0][0]) #doctest: +SKIP 153 '32.4069' 154 >>> "{0:.4f}".format(mlerr1.logll) #doctest: +SKIP 155 '-4471.4071' 156 157 """ 158 159 def __init__(self, y, x, w, method='full', epsilon=0.0000001, regimes_att=None): 160 # set up main regression variables and spatial filters 161 self.y = y 162 if regimes_att: 163 self.x = x.toarray() 164 else: 165 self.x = x 166 self.n, self.k = self.x.shape 167 self.method = method 168 self.epsilon = epsilon 169 170 #W = w.full()[0] #wait to build pending what is needed 171 #Wsp = w.sparse 172 173 ylag = weights.lag_spatial(w, self.y) 174 xlag = self.get_x_lag(w, regimes_att) 175 176 # call minimizer using concentrated log-likelihood to get lambda 177 methodML = method.upper() 178 if methodML in ['FULL', 'LU', 'ORD']: 179 if methodML == 'FULL': 180 W = w.full()[0] # need dense here 181 res = minimize_scalar(err_c_loglik, 0.0, bounds=(-1.0, 1.0), 182 args=(self.n, self.y, ylag, self.x, 183 xlag, W), method='bounded', 184 tol=epsilon) 185 elif methodML == 'LU': 186 I = sp.identity(w.n) 187 Wsp = w.sparse # need sparse here 188 res = minimize_scalar(err_c_loglik_sp, 0.0, bounds=(-1.0,1.0), 189 args=(self.n, self.y, ylag, 190 self.x, xlag, I, Wsp), 191 method='bounded', tol=epsilon) 192 W = Wsp 193 elif methodML == 'ORD': 194 # check on symmetry structure 195 if w.asymmetry(intrinsic=False) == []: 196 ww = symmetrize(w) 197 WW = np.array(ww.todense()) 198 evals = la.eigvalsh(WW) 199 W = WW 200 else: 201 W = w.full()[0] # need dense here 202 evals = la.eigvals(W) 203 res = minimize_scalar( 204 err_c_loglik_ord, 0.0, bounds=(-1.0, 1.0), 205 args=(self.n, self.y, ylag, self.x, 206 xlag, evals), method='bounded', 207 tol=epsilon) 208 else: 209 raise Exception("{0} is an unsupported method".format(method)) 210 211 self.lam = res.x 212 213 # compute full log-likelihood, including constants 214 ln2pi = np.log(2.0 * np.pi) 215 llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0 216 217 self.logll = llik 218 219 # b, residuals and predicted values 220 221 ys = self.y - self.lam * ylag 222 xs = self.x - self.lam * xlag 223 xsxs = np.dot(xs.T, xs) 224 xsxsi = np.linalg.inv(xsxs) 225 xsys = np.dot(xs.T, ys) 226 b = np.dot(xsxsi, xsys) 227 228 self.betas = np.vstack((b, self.lam)) 229 230 self.u = y - np.dot(self.x, b) 231 self.predy = self.y - self.u 232 233 # residual variance 234 235 self.e_filtered = self.u - self.lam * weights.lag_spatial(w, self.u) 236 self.sig2 = np.dot(self.e_filtered.T, self.e_filtered) / self.n 237 238 # variance-covariance matrix betas 239 240 varb = self.sig2 * xsxsi 241 242 # variance-covariance matrix lambda, sigma 243 244 a = -self.lam * W 245 spfill_diagonal(a, 1.0) 246 ai = spinv(a) 247 wai = spdot(W, ai) 248 tr1 = wai.diagonal().sum() 249 250 wai2 = spdot(wai, wai) 251 tr2 = wai2.diagonal().sum() 252 253 waiTwai = spdot(wai.T, wai) 254 tr3 = waiTwai.diagonal().sum() 255 256 v1 = np.vstack((tr2 + tr3, 257 tr1 / self.sig2)) 258 v2 = np.vstack((tr1 / self.sig2, 259 self.n / (2.0 * self.sig2 ** 2))) 260 261 v = np.hstack((v1, v2)) 262 263 self.vm1 = np.linalg.inv(v) 264 265 # create variance matrix for beta, lambda 266 vv = np.hstack((varb, np.zeros((self.k, 1)))) 267 vv1 = np.hstack( 268 (np.zeros((1, self.k)), self.vm1[0, 0] * np.ones((1, 1)))) 269 270 self.vm = np.vstack((vv, vv1)) 271 272 273 def get_x_lag(self, w, regimes_att): 274 if regimes_att: 275 xlag = weights.lag_spatial(w, regimes_att['x']) 276 xlag = REGI.Regimes_Frame.__init__(self, xlag, 277 regimes_att['regimes'], constant_regi=None, cols2regi=regimes_att['cols2regi'])[0] 278 xlag = xlag.toarray() 279 else: 280 xlag = weights.lag_spatial(w, self.x) 281 return xlag 282 283 284class ML_Error(BaseML_Error): 285 286 """ 287 ML estimation of the spatial error model with all results and diagnostics; 288 :cite:`Anselin1988` 289 290 Parameters 291 ---------- 292 y : array 293 nx1 array for dependent variable 294 x : array 295 Two dimensional array with n rows and one column for each 296 independent (exogenous) variable, excluding the constant 297 w : Sparse matrix 298 Spatial weights sparse matrix 299 method : string 300 if 'full', brute force calculation (full matrix expressions) 301 if 'ord', Ord eigenvalue method 302 if 'LU', LU sparse matrix decomposition 303 epsilon : float 304 tolerance criterion in mimimize_scalar function and inverse_product 305 vm : boolean 306 if True, include variance-covariance matrix in summary 307 results 308 name_y : string 309 Name of dependent variable for use in output 310 name_x : list of strings 311 Names of independent variables for use in output 312 name_w : string 313 Name of weights matrix for use in output 314 name_ds : string 315 Name of dataset for use in output 316 317 Attributes 318 ---------- 319 betas : array 320 (k+1)x1 array of estimated coefficients (rho first) 321 lam : float 322 estimate of spatial autoregressive coefficient 323 u : array 324 nx1 array of residuals 325 e_filtered : array 326 nx1 array of spatially filtered residuals 327 predy : array 328 nx1 array of predicted y values 329 n : integer 330 Number of observations 331 k : integer 332 Number of variables for which coefficients are estimated 333 (including the constant, excluding lambda) 334 y : array 335 nx1 array for dependent variable 336 x : array 337 Two dimensional array with n rows and one column for each 338 independent (exogenous) variable, including the constant 339 method : string 340 log Jacobian method 341 if 'full': brute force (full matrix computations) 342 epsilon : float 343 tolerance criterion used in minimize_scalar function and inverse_product 344 mean_y : float 345 Mean of dependent variable 346 std_y : float 347 Standard deviation of dependent variable 348 varb : array 349 Variance covariance matrix (k+1 x k+1) - includes var(lambda) 350 vm1 : array 351 variance covariance matrix for lambda, sigma (2 x 2) 352 sig2 : float 353 Sigma squared used in computations 354 logll : float 355 maximized log-likelihood (including constant terms) 356 pr2 : float 357 Pseudo R squared (squared correlation between y and ypred) 358 utu : float 359 Sum of squared residuals 360 std_err : array 361 1xk array of standard errors of the betas 362 z_stat : list of tuples 363 z statistic; each tuple contains the pair (statistic, 364 p-value), where each is a float 365 name_y : string 366 Name of dependent variable for use in output 367 name_x : list of strings 368 Names of independent variables for use in output 369 name_w : string 370 Name of weights matrix for use in output 371 name_ds : string 372 Name of dataset for use in output 373 title : string 374 Name of the regression method used 375 376 Examples 377 -------- 378 379 >>> import numpy as np 380 >>> import libpysal 381 >>> from libpysal.examples import load_example 382 >>> from libpysal.weights import Queen 383 >>> from spreg import ML_Error 384 >>> np.set_printoptions(suppress=True) #prevent scientific format 385 >>> south = load_example('South') 386 >>> db = libpysal.io.open(south.get_path("south.dbf"),'r') 387 >>> y_name = "HR90" 388 >>> y = np.array(db.by_col(y_name)) 389 >>> y.shape = (len(y),1) 390 >>> x_names = ["RD90","PS90","UE90","DV90"] 391 >>> x = np.array([db.by_col(var) for var in x_names]).T 392 >>> w = Queen.from_shapefile(south.get_path("south.shp")) 393 >>> w_name = "south_q.gal" 394 >>> w.transform = 'r' 395 >>> mlerr = ML_Error(y,x,w,name_y=y_name,name_x=x_names,\ 396 name_w=w_name,name_ds=ds_name) #doctest: +SKIP 397 >>> np.around(mlerr.betas, decimals=4) #doctest: +SKIP 398 array([[ 6.1492], 399 [ 4.4024], 400 [ 1.7784], 401 [-0.3781], 402 [ 0.4858], 403 [ 0.2991]]) 404 >>> "{0:.4f}".format(mlerr.lam) #doctest: +SKIP 405 '0.2991' 406 >>> "{0:.4f}".format(mlerr.mean_y) #doctest: +SKIP 407 '9.5493' 408 >>> "{0:.4f}".format(mlerr.std_y) #doctest: +SKIP 409 '7.0389' 410 >>> np.around(np.diag(mlerr.vm), decimals=4) #doctest: +SKIP 411 array([ 1.0648, 0.0555, 0.0454, 0.0061, 0.0148, 0.0014]) 412 >>> np.around(mlerr.sig2, decimals=4) #doctest: +SKIP 413 array([[ 32.4069]]) 414 >>> "{0:.4f}".format(mlerr.logll) #doctest: +SKIP 415 '-4471.4071' 416 >>> "{0:.4f}".format(mlerr.aic) #doctest: +SKIP 417 '8952.8141' 418 >>> "{0:.4f}".format(mlerr.schwarz) #doctest: +SKIP 419 '8979.0779' 420 >>> "{0:.4f}".format(mlerr.pr2) #doctest: +SKIP 421 '0.3058' 422 >>> "{0:.4f}".format(mlerr.utu) #doctest: +SKIP 423 '48534.9148' 424 >>> np.around(mlerr.std_err, decimals=4) #doctest: +SKIP 425 array([ 1.0319, 0.2355, 0.2132, 0.0784, 0.1217, 0.0378]) 426 >>> np.around(mlerr.z_stat, decimals=4) #doctest: +SKIP 427 array([[ 5.9593, 0. ], 428 [ 18.6902, 0. ], 429 [ 8.3422, 0. ], 430 [ -4.8233, 0. ], 431 [ 3.9913, 0.0001], 432 [ 7.9089, 0. ]]) 433 >>> mlerr.name_y #doctest: +SKIP 434 'HR90' 435 >>> mlerr.name_x #doctest: +SKIP 436 ['CONSTANT', 'RD90', 'PS90', 'UE90', 'DV90', 'lambda'] 437 >>> mlerr.name_w #doctest: +SKIP 438 'south_q.gal' 439 >>> mlerr.name_ds #doctest: +SKIP 440 'south.dbf' 441 >>> mlerr.title #doctest: +SKIP 442 'MAXIMUM LIKELIHOOD SPATIAL ERROR (METHOD = FULL)' 443 444 445 """ 446 447 def __init__(self, y, x, w, method='full', epsilon=0.0000001, 448 vm=False, name_y=None, name_x=None, 449 name_w=None, name_ds=None): 450 n = USER.check_arrays(y, x) 451 y = USER.check_y(y, n) 452 USER.check_weights(w, y, w_required=True) 453 x_constant,name_x,warn = USER.check_constant(x,name_x) 454 set_warn(self, warn) 455 method = method.upper() 456 BaseML_Error.__init__(self, y=y, x=x_constant, 457 w=w, method=method, epsilon=epsilon) 458 self.title = "MAXIMUM LIKELIHOOD SPATIAL ERROR" + \ 459 " (METHOD = " + method + ")" 460 self.name_ds = USER.set_name_ds(name_ds) 461 self.name_y = USER.set_name_y(name_y) 462 self.name_x = USER.set_name_x(name_x, x) 463 self.name_x.append('lambda') 464 self.name_w = USER.set_name_w(name_w, w) 465 self.aic = DIAG.akaike(reg=self) 466 self.schwarz = DIAG.schwarz(reg=self) 467 SUMMARY.ML_Error(reg=self, w=w, vm=vm, spat_diag=False) 468 469 470def err_c_loglik(lam, n, y, ylag, x, xlag, W): 471 # concentrated log-lik for error model, no constants, brute force 472 ys = y - lam * ylag 473 xs = x - lam * xlag 474 ysys = np.dot(ys.T, ys) 475 xsxs = np.dot(xs.T, xs) 476 xsxsi = np.linalg.inv(xsxs) 477 xsys = np.dot(xs.T, ys) 478 x1 = np.dot(xsxsi, xsys) 479 x2 = np.dot(xsys.T, x1) 480 ee = ysys - x2 481 sig2 = ee[0][0] / n 482 nlsig2 = (n / 2.0) * np.log(sig2) 483 a = -lam * W 484 np.fill_diagonal(a, 1.0) 485 jacob = np.log(np.linalg.det(a)) 486 # this is the negative of the concentrated log lik for minimization 487 clik = nlsig2 - jacob 488 return clik 489 490def err_c_loglik_sp(lam, n, y, ylag, x, xlag, I, Wsp): 491 # concentrated log-lik for error model, no constants, LU 492 if isinstance(lam, np.ndarray): 493 if lam.shape == (1,1): 494 lam = lam[0][0] #why does the interior value change? 495 ys = y - lam * ylag 496 xs = x - lam * xlag 497 ysys = np.dot(ys.T, ys) 498 xsxs = np.dot(xs.T, xs) 499 xsxsi = np.linalg.inv(xsxs) 500 xsys = np.dot(xs.T, ys) 501 x1 = np.dot(xsxsi, xsys) 502 x2 = np.dot(xsys.T, x1) 503 ee = ysys - x2 504 sig2 = ee[0][0] / n 505 nlsig2 = (n / 2.0) * np.log(sig2) 506 a = I - lam * Wsp 507 LU = SuperLU(a.tocsc()) 508 jacob = np.sum(np.log(np.abs(LU.U.diagonal()))) 509 # this is the negative of the concentrated log lik for minimization 510 clik = nlsig2 - jacob 511 return clik 512 513 514def err_c_loglik_ord(lam, n, y, ylag, x, xlag, evals): 515 # concentrated log-lik for error model, no constants, eigenvalues 516 ys = y - lam * ylag 517 xs = x - lam * xlag 518 ysys = np.dot(ys.T, ys) 519 xsxs = np.dot(xs.T, xs) 520 xsxsi = np.linalg.inv(xsxs) 521 xsys = np.dot(xs.T, ys) 522 x1 = np.dot(xsxsi, xsys) 523 x2 = np.dot(xsys.T, x1) 524 ee = ysys - x2 525 sig2 = ee[0][0] / n 526 nlsig2 = (n / 2.0) * np.log(sig2) 527 revals = lam * evals 528 jacob = np.log(1 - revals).sum() 529 if isinstance(jacob, complex): 530 jacob = jacob.real 531 # this is the negative of the concentrated log lik for minimization 532 clik = nlsig2 - jacob 533 return clik 534 535 536def _test(): 537 import doctest 538 start_suppress = np.get_printoptions()['suppress'] 539 np.set_printoptions(suppress=True) 540 doctest.testmod() 541 np.set_printoptions(suppress=start_suppress) 542