1# coding=utf-8 2""" 3 Wilsonian (1967) family of gravity-type spatial interaction models 4 5References 6---------- 7 8Fotheringham, A. S. and O'Kelly, M. E. (1989). Spatial Interaction Models: Formulations 9 and Applications. London: Kluwer Academic Publishers. 10 11Wilson, A. G. (1967). A statistical theory of spatial distribution models. 12 Transportation Research, 1, 253–269. 13 14""" 15 16__author__ = "Taylor Oshan tayoshan@gmail.com" 17 18from types import FunctionType 19import numpy as np 20from scipy import sparse as sp 21from spreg import user_output as User 22from spreg.utils import sphstack 23from spglm.utils import cache_readonly 24from .count_model import CountModel 25from .utils import sorensen, srmse, spcategorical 26 27 28class BaseGravity(CountModel): 29 """ 30 Base class to set up gravity-type spatial interaction models and dispatch 31 estimaton technqiues. 32 33 Parameters 34 ---------- 35 flows : array of integers 36 n x 1; observed flows between O origins and D destinations 37 origins : array of strings 38 n x 1; unique identifiers of origins of n flows 39 destinations : array of strings 40 n x 1; unique identifiers of destinations of n flows 41 cost : array 42 n x 1; cost to overcome separation between each origin and 43 destination associated with a flow; typically distance or time 44 cost_func : string or function that has scalar input and output 45 functional form of the cost function; 46 'exp' | 'pow' | custom function 47 o_vars : array (optional) 48 n x p; p attributes for each origin of n flows; default 49 is None 50 d_vars : array (optional) 51 n x p; p attributes for each destination of n flows; 52 default is None 53 constant : boolean 54 True to include intercept in model; True by default 55 framework : string 56 estimation technique; currently only 'GLM' is avaialble 57 Quasi : boolean 58 True to estimate QuasiPoisson model; should result in same 59 parameters as Poisson but with altered covariance; default 60 to true which estimates Poisson model 61 SF : array 62 n x 1; eigenvector spatial filter to include in the model; 63 default to None which does not include a filter; not yet 64 implemented 65 CD : array 66 n x 1; competing destination term that accounts for the 67 likelihood that alternative destinations are considered 68 along with each destination under consideration for every 69 OD pair; defaults to None which does not include a CD 70 term; not yet implemented 71 Lag : W object 72 spatial weight for n observations (OD pairs) used to 73 construct a spatial autoregressive model and estimator; 74 defaults to None which does not include an autoregressive 75 term; not yet implemented 76 77 78 Attributes 79 ---------- 80 f : array 81 n x 1; observed flows; dependent variable; y 82 n : integer 83 number of observations 84 k : integer 85 number of parameters 86 c : array 87 n x 1; cost to overcome separation between each origin and 88 destination associated with a flow; typically distance or time 89 cf : function 90 cost function; used to transform cost variable 91 ov : array 92 n x p(o); p attributes for each origin of n flows 93 dv : array 94 n x p(d); p attributes for each destination of n flows 95 constant : boolean 96 True to include intercept in model; True by default 97 y : array 98 n x 1; dependent variable used in estimation including any 99 transformations 100 X : array 101 n x k, design matrix used in estimation 102 params : array 103 n x k, k estimated beta coefficients; k = p(o) + p(d) + 1 104 yhat : array 105 n x 1, predicted value of y (i.e., fittedvalues) 106 cov_params : array 107 Variance covariance matrix (k x k) of betas 108 std_err : array 109 k x 1, standard errors of betas 110 pvalues : array 111 k x 1, two-tailed pvalues of parameters 112 tvalues : array 113 k x 1, the tvalues of the standard errors 114 deviance : float 115 value of the deviance function evalued at params; 116 see family.py for distribution-specific deviance 117 resid_dev : array 118 n x 1, residual deviance of model 119 llf : float 120 value of the loglikelihood function evalued at params; 121 see family.py for distribution-specific loglikelihoods 122 llnull : float 123 value of the loglikelihood function evaluated with only an 124 intercept; see family.py for distribution-specific 125 loglikelihoods 126 AIC : float 127 Akaike information criterion 128 D2 : float 129 percentage of explained deviance 130 adj_D2 : float 131 adjusted percentage of explained deviance 132 pseudo_R2 : float 133 McFadden's pseudo R2 (coefficient of determination) 134 adj_pseudoR2 : float 135 adjusted McFadden's pseudo R2 136 SRMSE : float 137 standardized root mean square error 138 SSI : float 139 Sorensen similarity index 140 results : object 141 full results from estimated model. May contain addtional 142 diagnostics 143 Example 144 ------- 145 >>> import numpy as np 146 >>> import libpysal 147 >>> from spint.gravity import BaseGravity 148 >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv')) 149 >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1)) 150 >>> flows = np.array(db.by_col('count')).reshape((-1,1)) 151 >>> model = BaseGravity(flows, cost) 152 >>> model.params 153 array([17.84839637, -1.68325787]) 154 155 """ 156 157 def __init__( 158 self, 159 flows, 160 cost, 161 cost_func='pow', 162 o_vars=None, 163 d_vars=None, 164 origins=None, 165 destinations=None, 166 constant=True, 167 framework='GLM', 168 SF=None, 169 CD=None, 170 Lag=None, 171 Quasi=False): 172 n = User.check_arrays(flows, cost) 173 #User.check_y(flows, n) 174 self.n = n 175 self.f = flows 176 self.c = cost 177 self.ov = o_vars 178 self.dv = d_vars 179 if isinstance(cost_func, str): 180 if cost_func.lower() == 'pow': 181 self.cf = np.log 182 if (self.c == 0).any(): 183 raise ValueError( 184 "Zero values detected: cost function 'pow'" 185 "requires the logarithm of the cost variable which" 186 "is undefined at 0") 187 elif cost_func.lower() == 'exp': 188 self.cf = lambda x: x * 1.0 189 elif (isinstance(cost_func, FunctionType)) | (isinstance(cost_func, np.ufunc)): 190 self.cf = cost_func 191 else: 192 raise ValueError( 193 "cost_func must be 'exp', 'pow' or a valid " 194 " function that has a scalar as a input and output") 195 196 y = np.reshape(self.f, (-1, 1)) 197 if isinstance(self, Gravity): 198 X = np.empty((self.n, 0)) 199 else: 200 X = sp.csr_matrix((self.n, 1)) 201 if isinstance(self, Production) | isinstance(self, Doubly): 202 o_dummies = spcategorical(origins.flatten()) 203 if constant: 204 o_dummies = o_dummies[:, 1:] 205 X = sphstack(X, o_dummies, array_out=False) 206 if isinstance(self, Attraction) | isinstance(self, Doubly): 207 d_dummies = spcategorical(destinations.flatten()) 208 if constant | isinstance(self, Doubly): 209 d_dummies = d_dummies[:, 1:] 210 X = sphstack(X, d_dummies, array_out=False) 211 if self.ov is not None: 212 if isinstance(self, Gravity): 213 for each in range(self.ov.shape[1]): 214 if (self.ov[:, each] == 0).any(): 215 raise ValueError( 216 "Zero values detected in column %s " 217 "of origin variables, which are undefined for " 218 "Poisson log-linear spatial interaction models" % 219 each) 220 X = np.hstack( 221 (X, np.log(np.reshape(self.ov[:, each], (-1, 1))))) 222 else: 223 for each in range(self.ov.shape[1]): 224 if (self.ov[:, each] == 0).any(): 225 raise ValueError( 226 "Zero values detected in column %s " 227 "of origin variables, which are undefined for " 228 "Poisson log-linear spatial interaction models" % 229 each) 230 ov = sp.csr_matrix( 231 np.log(np.reshape(self.ov[:, each], ((-1, 1))))) 232 X = sphstack(X, ov, array_out=False) 233 if self.dv is not None: 234 if isinstance(self, Gravity): 235 for each in range(self.dv.shape[1]): 236 if (self.dv[:, each] == 0).any(): 237 raise ValueError( 238 "Zero values detected in column %s " 239 "of destination variables, which are undefined for " 240 "Poisson log-linear spatial interaction models" % 241 each) 242 X = np.hstack( 243 (X, np.log(np.reshape(self.dv[:, each], (-1, 1))))) 244 else: 245 for each in range(self.dv.shape[1]): 246 if (self.dv[:, each] == 0).any(): 247 raise ValueError( 248 "Zero values detected in column %s " 249 "of destination variables, which are undefined for " 250 "Poisson log-linear spatial interaction models" % 251 each) 252 dv = sp.csr_matrix( 253 np.log(np.reshape(self.dv[:, each], ((-1, 1))))) 254 X = sphstack(X, dv, array_out=False) 255 if isinstance(self, Gravity): 256 X = np.hstack((X, self.cf(np.reshape(self.c, (-1, 1))))) 257 else: 258 c = sp.csr_matrix(self.cf(np.reshape(self.c, (-1, 1)))) 259 X = sphstack(X, c, array_out=False) 260 X = X[:, 1:] # because empty array instantiated with extra column 261 if not isinstance(self, (Gravity, Production, Attraction, Doubly)): 262 X = self.cf(np.reshape(self.c, (-1, 1))) 263 if SF: 264 raise NotImplementedError( 265 "Spatial filter model not yet implemented") 266 if CD: 267 raise NotImplementedError( 268 "Competing destination model not yet implemented") 269 if Lag: 270 raise NotImplementedError( 271 "Spatial Lag autoregressive model not yet implemented") 272 273 CountModel.__init__(self, y, X, constant=constant) 274 if (framework.lower() == 'glm'): 275 if not Quasi: 276 results = self.fit(framework='glm') 277 else: 278 results = self.fit(framework='glm', Quasi=True) 279 else: 280 raise NotImplementedError('Only GLM is currently implemented') 281 282 self.params = results.params 283 self.yhat = results.yhat 284 self.cov_params = results.cov_params 285 self.std_err = results.std_err 286 self.pvalues = results.pvalues 287 self.tvalues = results.tvalues 288 self.deviance = results.deviance 289 self.resid_dev = results.resid_dev 290 self.llf = results.llf 291 self.llnull = results.llnull 292 self.AIC = results.AIC 293 self.k = results.k 294 self.D2 = results.D2 295 self.adj_D2 = results.adj_D2 296 self.pseudoR2 = results.pseudoR2 297 self.adj_pseudoR2 = results.adj_pseudoR2 298 self.results = results 299 self._cache = {} 300 301 @cache_readonly 302 def SSI(self): 303 return sorensen(self) 304 305 @cache_readonly 306 def SRMSE(self): 307 return srmse(self) 308 309 def reshape(self, array): 310 if isinstance(array, np.ndarray): 311 return array.reshape((-1, 1)) 312 elif isinstance(array, list): 313 return np.array(array).reshape((-1, 1)) 314 else: 315 raise TypeError( 316 "input must be an numpy array or list that can be coerced" 317 " into the dimensions n x 1") 318 319 320class Gravity(BaseGravity): 321 """ 322 Unconstrained (traditional gravity) gravity-type spatial interaction model 323 324 Parameters 325 ---------- 326 flows : array of integers 327 n x 1; observed flows between O origins and D destinations 328 cost : array 329 n x 1; cost to overcome separation between each origin and 330 destination associated with a flow; typically distance or time 331 cost_func : string or function that has scalar input and output 332 functional form of the cost function; 333 'exp' | 'pow' | custom function 334 o_vars : array (optional) 335 n x p; p attributes for each origin of n flows; default 336 is None 337 d_vars : array (optional) 338 n x p; p attributes for each destination of n flows; 339 default is None 340 constant : boolean 341 True to include intercept in model; True by default 342 framework : string 343 estimation technique; currently only 'GLM' is avaialble 344 Quasi : boolean 345 True to estimate QuasiPoisson model; should result in same 346 parameters as Poisson but with altered covariance; default 347 to true which estimates Poisson model 348 SF : array 349 n x 1; eigenvector spatial filter to include in the model; 350 default to None which does not include a filter; not yet 351 implemented 352 CD : array 353 n x 1; competing destination term that accounts for the 354 likelihood that alternative destinations are considered 355 along with each destination under consideration for every 356 OD pair; defaults to None which does not include a CD 357 term; not yet implemented 358 Lag : W object 359 spatial weight for n observations (OD pairs) used to 360 construct a spatial autoregressive model and estimator; 361 defaults to None which does not include an autoregressive 362 term; not yet implemented 363 364 Attributes 365 ---------- 366 f : array 367 n x 1; observed flows; dependent variable; y 368 n : integer 369 number of observations 370 k : integer 371 number of parameters 372 c : array 373 n x 1; cost to overcome separation between each origin and 374 destination associated with a flow; typically distance or time 375 cf : function 376 cost function; used to transform cost variable 377 ov : array 378 n x p(o); p attributes for each origin of n flows 379 dv : array 380 n x p(d); p attributes for each destination of n flows 381 constant : boolean 382 True to include intercept in model; True by default 383 y : array 384 n x 1; dependent variable used in estimation including any 385 transformations 386 X : array 387 n x k, design matrix used in estimation 388 params : array 389 n x k, k estimated beta coefficients; k = p(o) + p(d) + 1 390 yhat : array 391 n x 1, predicted value of y (i.e., fittedvalues) 392 cov_params : array 393 Variance covariance matrix (kxk) of betas 394 std_err : array 395 k x 1, standard errors of betas 396 pvalues : array 397 k x 1, two-tailed pvalues of parameters 398 tvalues : array 399 k x 1, the tvalues of the standard errors 400 deviance : float 401 value of the deviance function evalued at params; 402 see family.py for distribution-specific deviance 403 resid_dev : array 404 n x 1, residual deviance of model 405 llf : float 406 value of the loglikelihood function evalued at params; 407 see family.py for distribution-specific loglikelihoods 408 llnull : float 409 value of the loglikelihood function evaluated with only an 410 intercept; see family.py for distribution-specific 411 loglikelihoods 412 AIC : float 413 Akaike information criterion 414 D2 : float 415 percentage of explained deviance 416 adj_D2 : float 417 adjusted percentage of explained deviance 418 pseudo_R2 : float 419 McFadden's pseudo R2 (coefficient of determination) 420 adj_pseudoR2 : float 421 adjusted McFadden's pseudo R2 422 SRMSE : float 423 standardized root mean square error 424 SSI : float 425 Sorensen similarity index 426 results : object 427 Full results from estimated model. May contain addtional 428 diagnostics 429 Example 430 ------- 431 >>> import numpy as np 432 >>> import libpysal 433 >>> from spint.gravity import Gravity 434 >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv')) 435 >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1)) 436 >>> flows = np.array(db.by_col('count')).reshape((-1,1)) 437 >>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1)) 438 >>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1)) 439 >>> model = Gravity(flows, o_cap, d_cap, cost, 'exp') 440 >>> model.params 441 array([ 3.80050153e+00, 5.54103854e-01, 3.94282921e-01, -2.27091686e-03]) 442 443 """ 444 445 def __init__(self, flows, o_vars, d_vars, cost, 446 cost_func, constant=True, framework='GLM', SF=None, CD=None, 447 Lag=None, Quasi=False): 448 self.f = np.reshape(flows, (-1, 1)) 449 if len(o_vars.shape) > 1: 450 p = o_vars.shape[1] 451 else: 452 p = 1 453 self.ov = np.reshape(o_vars, (-1, p)) 454 if len(d_vars.shape) > 1: 455 p = d_vars.shape[1] 456 else: 457 p = 1 458 self.dv = np.reshape(d_vars, (-1, p)) 459 self.c = np.reshape(cost, (-1, 1)) 460 #User.check_arrays(self.f, self.ov, self.dv, self.c) 461 462 BaseGravity.__init__( 463 self, 464 self.f, 465 self.c, 466 cost_func=cost_func, 467 o_vars=self.ov, 468 d_vars=self.dv, 469 constant=constant, 470 framework=framework, 471 SF=SF, 472 CD=CD, 473 Lag=Lag, 474 Quasi=Quasi) 475 476 def local(self, loc_index, locs): 477 """ 478 Calibrate local models for subsets of data from a single location to all 479 other locations 480 481 Parameters 482 ---------- 483 loc_index : n x 1 array of either origin or destination id label for 484 flows; must be explicitly provided for local version of 485 basic gravity model since these are not passed to the 486 global model. 487 488 locs : iterable of either origin or destination labels for which 489 to calibrate local models; must also be explicitly 490 provided since local gravity models can be calibrated from origins 491 or destinations. If all origins are also destinations and 492 a local model is desired for each location then use 493 np.unique(loc_index) 494 495 Returns 496 ------- 497 results : dict where keys are names of model outputs and diagnostics 498 and values are lists of location specific values. 499 """ 500 results = {} 501 covs = self.ov.shape[1] + self.dv.shape[1] + 1 502 results['AIC'] = [] 503 results['deviance'] = [] 504 results['pseudoR2'] = [] 505 results['adj_pseudoR2'] = [] 506 results['D2'] = [] 507 results['adj_D2'] = [] 508 results['SSI'] = [] 509 results['SRMSE'] = [] 510 for cov in range(covs): 511 results['param' + str(cov)] = [] 512 results['stde' + str(cov)] = [] 513 results['pvalue' + str(cov)] = [] 514 results['tvalue' + str(cov)] = [] 515 for loc in locs: 516 subset = loc_index == loc 517 f = self.reshape(self.f[subset]) 518 o_vars = self.ov[subset.reshape(self.ov.shape[0]), :] 519 d_vars = self.dv[subset.reshape(self.dv.shape[0]), :] 520 dij = self.reshape(self.c[subset]) 521 model = Gravity(f, o_vars, d_vars, dij, self.cf, 522 constant=False) 523 results['AIC'].append(model.AIC) 524 results['deviance'].append(model.deviance) 525 results['pseudoR2'].append(model.pseudoR2) 526 results['adj_pseudoR2'].append(model.adj_pseudoR2) 527 results['D2'].append(model.D2) 528 results['adj_D2'].append(model.adj_D2) 529 results['SSI'].append(model.SSI) 530 results['SRMSE'].append(model.SRMSE) 531 for cov in range(covs): 532 results['param' + str(cov)].append(model.params[cov]) 533 results['stde' + str(cov)].append(model.std_err[cov]) 534 results['pvalue' + str(cov)].append(model.pvalues[cov]) 535 results['tvalue' + str(cov)].append(model.tvalues[cov]) 536 return results 537 538 539class Production(BaseGravity): 540 """ 541 Production-constrained (origin-constrained) gravity-type spatial interaction model 542 543 Parameters 544 ---------- 545 flows : array of integers 546 n x 1; observed flows between O origins and D destinations 547 origins : array of strings 548 n x 1; unique identifiers of origins of n flows; when 549 there are many origins it will be faster to use integers 550 rather than strings for id labels. 551 cost : array 552 n x 1; cost to overcome separation between each origin and 553 destination associated with a flow; typically distance or time 554 cost_func : string or function that has scalar input and output 555 functional form of the cost function; 556 'exp' | 'pow' | custom function 557 d_vars : array (optional) 558 n x p; p attributes for each destination of n flows; 559 default is None 560 constant : boolean 561 True to include intercept in model; True by default 562 framework : string 563 estimation technique; currently only 'GLM' is avaialble 564 Quasi : boolean 565 True to estimate QuasiPoisson model; should result in same 566 parameters as Poisson but with altered covariance; default 567 to true which estimates Poisson model 568 SF : array 569 n x 1; eigenvector spatial filter to include in the model; 570 default to None which does not include a filter; not yet 571 implemented 572 CD : array 573 n x 1; competing destination term that accounts for the 574 likelihood that alternative destinations are considered 575 along with each destination under consideration for every 576 OD pair; defaults to None which does not include a CD 577 term; not yet implemented 578 Lag : W object 579 spatial weight for n observations (OD pairs) used to 580 construct a spatial autoregressive model and estimator; 581 defaults to None which does not include an autoregressive 582 term; not yet implemented 583 584 Attributes 585 ---------- 586 f : array 587 n x 1; observed flows; dependent variable; y 588 n : integer 589 number of observations 590 k : integer 591 number of parameters 592 c : array 593 n x 1; cost to overcome separation between each origin and 594 destination associated with a flow; typically distance or time 595 cf : function 596 cost function; used to transform cost variable 597 o : array 598 n x 1; index of origin id's 599 dv : array 600 n x p; p attributes for each destination of n flows 601 constant : boolean 602 True to include intercept in model; True by default 603 y : array 604 n x 1; dependent variable used in estimation including any 605 transformations 606 X : array 607 n x k, design matrix used in estimation 608 params : array 609 n x k, k estimated beta coefficients; k = # of origins + p + 1 610 yhat : array 611 n x 1, predicted value of y (i.e., fittedvalues) 612 cov_params : array 613 Variance covariance matrix (kxk) of betas 614 std_err : array 615 k x 1, standard errors of betas 616 pvalues : array 617 k x 1, two-tailed pvalues of parameters 618 tvalues : array 619 k x 1, the tvalues of the standard errors 620 deviance : float 621 value of the deviance function evalued at params; 622 see family.py for distribution-specific deviance 623 resid_dev : array 624 n x 1, residual deviance of model 625 llf : float 626 value of the loglikelihood function evalued at params; 627 see family.py for distribution-specific loglikelihoods 628 llnull : float 629 value of the loglikelihood function evaluated with only an 630 intercept; see family.py for distribution-specific 631 loglikelihoods 632 AIC : float 633 Akaike information criterion 634 D2 : float 635 percentage of explained deviance 636 adj_D2 : float 637 adjusted percentage of explained deviance 638 pseudo_R2 : float 639 McFadden's pseudo R2 (coefficient of determination) 640 adj_pseudoR2 : float 641 adjusted McFadden's pseudo R2 642 SRMSE : float 643 standardized root mean square error 644 SSI : float 645 Sorensen similarity index 646 results : object 647 Full results from estimated model. May contain addtional 648 diagnostics 649 Example 650 ------- 651 652 >>> import numpy as np 653 >>> import libpysal 654 >>> from spint.gravity import Production 655 >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv')) 656 >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1)) 657 >>> flows = np.array(db.by_col('count')).reshape((-1,1)) 658 >>> o = np.array(db.by_col('o_tract')).reshape((-1,1)) 659 >>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1)) 660 >>> model = Production(flows, o, d_cap, cost, 'exp') 661 >>> model.params[-4:] 662 array([ 1.34721352, 0.96357345, 0.85535775, -0.00227444]) 663 664 """ 665 666 def __init__(self, flows, origins, d_vars, cost, cost_func, constant=True, 667 framework='GLM', SF=None, CD=None, Lag=None, Quasi=False): 668 self.constant = constant 669 self.f = self.reshape(flows) 670 self.o = self.reshape(origins) 671 672 try: 673 if d_vars.shape[1]: 674 p = d_vars.shape[1] 675 except BaseException: 676 p = 1 677 self.dv = np.reshape(d_vars, (-1, p)) 678 self.c = self.reshape(cost) 679 #User.check_arrays(self.f, self.o, self.dv, self.c) 680 681 BaseGravity.__init__( 682 self, 683 self.f, 684 self.c, 685 cost_func=cost_func, 686 d_vars=self.dv, 687 origins=self.o, 688 constant=constant, 689 framework=framework, 690 SF=SF, 691 CD=CD, 692 Lag=Lag, 693 Quasi=Quasi) 694 695 def local(self, locs=None): 696 """ 697 Calibrate local models for subsets of data from a single location to all 698 other locations 699 700 Parameters 701 ---------- 702 locs : iterable of location (origins) labels; default is 703 None which calibrates a local model for each origin 704 705 Returns 706 ------- 707 results : dict where keys are names of model outputs and diagnostics 708 and values are lists of location specific values 709 """ 710 results = {} 711 offset = 1 712 covs = self.dv.shape[1] + 1 713 results['AIC'] = [] 714 results['deviance'] = [] 715 results['pseudoR2'] = [] 716 results['adj_pseudoR2'] = [] 717 results['D2'] = [] 718 results['adj_D2'] = [] 719 results['SSI'] = [] 720 results['SRMSE'] = [] 721 for cov in range(covs): 722 results['param' + str(cov)] = [] 723 results['stde' + str(cov)] = [] 724 results['pvalue' + str(cov)] = [] 725 results['tvalue' + str(cov)] = [] 726 if locs is None: 727 locs = np.unique(self.o) 728 for loc in np.unique(locs): 729 subset = self.o == loc 730 f = self.reshape(self.f[subset]) 731 o = self.reshape(self.o[subset]) 732 d_vars = self.dv[subset.reshape(self.dv.shape[0]), :] 733 dij = self.reshape(self.c[subset]) 734 model = Production(f, o, d_vars, dij, self.cf, constant=False) 735 results['AIC'].append(model.AIC) 736 results['deviance'].append(model.deviance) 737 results['pseudoR2'].append(model.pseudoR2) 738 results['adj_pseudoR2'].append(model.adj_pseudoR2) 739 results['D2'].append(model.D2) 740 results['adj_D2'].append(model.adj_D2) 741 results['SSI'].append(model.SSI) 742 results['SRMSE'].append(model.SRMSE) 743 for cov in range(covs): 744 results['param' + str(cov)].append(model.params[offset + cov]) 745 results['stde' + str(cov)].append(model.std_err[offset + cov]) 746 results['pvalue' + 747 str(cov)].append(model.pvalues[offset + cov]) 748 results['tvalue' + 749 str(cov)].append(model.tvalues[offset + cov]) 750 return results 751 752 753class Attraction(BaseGravity): 754 """ 755 Attraction-constrained (destination-constrained) gravity-type spatial interaction model 756 757 Parameters 758 ---------- 759 flows : array of integers 760 n x 1; observed flows between O origins and D destinations 761 destinations : array of strings 762 n x 1; unique identifiers of destinations of n flows; when 763 there are many destinations it will be faster to use 764 integers over strings for id labels. 765 cost : array 766 n x 1; cost to overcome separation between each origin and 767 destination associated with a flow; typically distance or time 768 cost_func : string or function that has scalar input and output 769 functional form of the cost function; 770 'exp' | 'pow' | custom function 771 o_vars : array (optional) 772 n x p; p attributes for each origin of n flows; default 773 is None 774 constant : boolean 775 True to include intercept in model; True by default 776 y : array 777 n x 1; dependent variable used in estimation including any 778 transformations 779 X : array 780 n x k, design matrix used in estimation 781 framework : string 782 estimation technique; currently only 'GLM' is avaialble 783 Quasi : boolean 784 True to estimate QuasiPoisson model; should result in same 785 parameters as Poisson but with altered covariance; default 786 to true which estimates Poisson model 787 SF : array 788 n x 1; eigenvector spatial filter to include in the model; 789 default to None which does not include a filter; not yet 790 implemented 791 CD : array 792 n x 1; competing destination term that accounts for the 793 likelihood that alternative destinations are considered 794 along with each destination under consideration for every 795 OD pair; defaults to None which does not include a CD 796 term; not yet implemented 797 Lag : W object 798 spatial weight for n observations (OD pairs) used to 799 construct a spatial autoregressive model and estimator; 800 defaults to None which does not include an autoregressive 801 term; not yet implemented 802 803 Attributes 804 ---------- 805 f : array 806 n x 1; observed flows; dependent variable; y 807 n : integer 808 number of observations 809 k : integer 810 number of parameters 811 c : array 812 n x 1; cost to overcome separation between each origin and 813 destination associated with a flow; typically distance or time 814 cf : function 815 cost function; used to transform cost variable 816 d : array 817 n x 1; index of destination id's 818 ov : array 819 n x p; p attributes for each origin of n flows 820 constant : boolean 821 True to include intercept in model; True by default 822 params : array 823 n x k, k estimated beta coefficients; k = # of 824 destinations + p + 1 825 yhat : array 826 n x 1, predicted value of y (i.e., fittedvalues) 827 cov_params : array 828 Variance covariance matrix (kxk) of betas 829 std_err : array 830 k x 1, standard errors of betas 831 pvalues : array 832 k x 1, two-tailed pvalues of parameters 833 tvalues : array 834 k x 1, the tvalues of the standard errors 835 deviance : float 836 value of the deviance function evalued at params; 837 see family.py for distribution-specific deviance 838 resid_dev : array 839 n x 1, residual deviance of model 840 llf : float 841 value of the loglikelihood function evalued at params; 842 see family.py for distribution-specific loglikelihoods 843 llnull : float 844 value of the loglikelihood function evaluated with only an 845 intercept; see family.py for distribution-specific 846 loglikelihoods 847 AIC : float 848 Akaike information criterion 849 D2 : float 850 percentage of explained deviance 851 adj_D2 : float 852 adjusted percentage of explained deviance 853 pseudo_R2 : float 854 McFadden's pseudo R2 (coefficient of determination) 855 adj_pseudoR2 : float 856 adjusted McFadden's pseudo R2 857 SRMSE : float 858 standardized root mean square error 859 SSI : float 860 Sorensen similarity index 861 results : object 862 Full results from estimated model. May contain addtional 863 diagnostics 864 Example 865 ------- 866 >>> import numpy as np 867 >>> import libpysal 868 >>> from spint.gravity import Attraction 869 >>> nyc_bikes = libpysal.examples.load_example('nyc_bikes') 870 >>> db = libpysal.io.open(nyc_bikes.get_path('nyc_bikes_ct.csv')) 871 >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1)) 872 >>> flows = np.array(db.by_col('count')).reshape((-1,1)) 873 >>> d = np.array(db.by_col('d_tract')).reshape((-1,1)) 874 >>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1)) 875 >>> model = Attraction(flows, d, o_cap, cost, 'exp') 876 >>> model.params[-4:] 877 array([ 1.21962276, 0.87634028, 0.88290909, -0.00229081]) 878 879 """ 880 881 def __init__(self, flows, destinations, o_vars, cost, cost_func, 882 constant=True, framework='GLM', SF=None, CD=None, Lag=None, 883 Quasi=False): 884 self.f = np.reshape(flows, (-1, 1)) 885 if len(o_vars.shape) > 1: 886 p = o_vars.shape[1] 887 else: 888 p = 1 889 self.ov = np.reshape(o_vars, (-1, p)) 890 self.d = np.reshape(destinations, (-1, 1)) 891 self.c = np.reshape(cost, (-1, 1)) 892 #User.check_arrays(self.f, self.d, self.ov, self.c) 893 894 BaseGravity.__init__( 895 self, 896 self.f, 897 self.c, 898 cost_func=cost_func, 899 o_vars=self.ov, 900 destinations=self.d, 901 constant=constant, 902 framework=framework, 903 SF=SF, 904 CD=CD, 905 Lag=Lag, 906 Quasi=Quasi) 907 908 def local(self, locs=None): 909 """ 910 Calibrate local models for subsets of data from a single location to all 911 other locations 912 913 Parameters 914 ---------- 915 locs : iterable of location (destinations) labels; default is 916 None which calibrates a local model for each destination 917 918 Returns 919 ------- 920 results : dict where keys are names of model outputs and diagnostics 921 and values are lists of location specific values 922 """ 923 results = {} 924 offset = 1 925 covs = self.ov.shape[1] + 1 926 results['AIC'] = [] 927 results['deviance'] = [] 928 results['pseudoR2'] = [] 929 results['adj_pseudoR2'] = [] 930 results['D2'] = [] 931 results['adj_D2'] = [] 932 results['SSI'] = [] 933 results['SRMSE'] = [] 934 for cov in range(covs): 935 results['param' + str(cov)] = [] 936 results['stde' + str(cov)] = [] 937 results['pvalue' + str(cov)] = [] 938 results['tvalue' + str(cov)] = [] 939 if locs is None: 940 locs = np.unique(self.d) 941 for loc in np.unique(locs): 942 subset = self.d == loc 943 f = self.reshape(self.f[subset]) 944 d = self.reshape(self.d[subset]) 945 o_vars = self.ov[subset.reshape(self.ov.shape[0]), :] 946 dij = self.reshape(self.c[subset]) 947 model = Attraction(f, d, o_vars, dij, self.cf, constant=False) 948 results['AIC'].append(model.AIC) 949 results['deviance'].append(model.deviance) 950 results['pseudoR2'].append(model.pseudoR2) 951 results['adj_pseudoR2'].append(model.adj_pseudoR2) 952 results['D2'].append(model.D2) 953 results['adj_D2'].append(model.adj_D2) 954 results['SSI'].append(model.SSI) 955 results['SRMSE'].append(model.SRMSE) 956 for cov in range(covs): 957 results['param' + str(cov)].append(model.params[offset + cov]) 958 results['stde' + str(cov)].append(model.std_err[offset + cov]) 959 results['pvalue' + 960 str(cov)].append(model.pvalues[offset + cov]) 961 results['tvalue' + 962 str(cov)].append(model.tvalues[offset + cov]) 963 return results 964 965 966class Doubly(BaseGravity): 967 """ 968 Doubly-constrained gravity-type spatial interaction model 969 970 Parameters 971 ---------- 972 flows : array of integers 973 n x 1; observed flows between O origins and D destinations 974 origins : array of strings 975 n x 1; unique identifiers of origins of n flows; when 976 there are many origins it will be faster to use integers 977 rather than strings for id labels. 978 destinations : array of strings 979 n x 1; unique identifiers of destinations of n flows; when 980 there are many destinations it will be faster to use 981 integers rather than strings for id labels 982 cost : array 983 n x 1; cost to overcome separation between each origin and 984 destination associated with a flow; typically distance or time 985 cost_func : string or function that has scalar input and output 986 functional form of the cost function; 987 'exp' | 'pow' | custom function 988 constant : boolean 989 True to include intercept in model; True by default 990 y : array 991 n x 1; dependent variable used in estimation including any 992 transformations 993 X : array 994 n x k, design matrix used in estimation 995 framework : string 996 estimation technique; currently only 'GLM' is avaialble 997 Quasi : boolean 998 True to estimate QuasiPoisson model; should result in same 999 parameters as Poisson but with altered covariance; default 1000 to true which estimates Poisson model 1001 SF : array 1002 n x 1; eigenvector spatial filter to include in the model; 1003 default to None which does not include a filter; not yet 1004 implemented 1005 CD : array 1006 n x 1; competing destination term that accounts for the 1007 likelihood that alternative destinations are considered 1008 along with each destination under consideration for every 1009 OD pair; defaults to None which does not include a CD 1010 term; not yet implemented 1011 Lag : W object 1012 spatial weight for n observations (OD pairs) used to 1013 construct a spatial autoregressive model and estimator; 1014 defaults to None which does not include an autoregressive 1015 term; not yet implemented 1016 1017 Attributes 1018 ---------- 1019 f : array 1020 n x 1; observed flows; dependent variable; y 1021 n : integer 1022 number of observations 1023 k : integer 1024 number of parameters 1025 c : array 1026 n x 1; cost to overcome separation between each origin and 1027 destination associated with a flow; typically distance or time 1028 cf : function 1029 cost function; used to transform cost variable 1030 o : array 1031 n x 1; index of origin id's 1032 d : array 1033 n x 1; index of destination id's 1034 constant : boolean 1035 True to include intercept in model; True by default 1036 params : array 1037 n x k, estimated beta coefficients; k = # of origins + # 1038 of destinations; the first x-1 values 1039 pertain to the x destinations (leaving out the first 1040 destination to avoid perfect collinearity; no fixed 1041 effect), the next x values pertain to the x origins, and the 1042 final value is the distance decay coefficient 1043 yhat : array 1044 n x 1, predicted value of y (i.e., fittedvalues) 1045 cov_params : array 1046 Variance covariance matrix (kxk) of betas 1047 std_err : array 1048 k x 1, standard errors of betas 1049 pvalues : array 1050 k x 1, two-tailed pvalues of parameters 1051 tvalues : array 1052 k x 1, the tvalues of the standard errors 1053 deviance : float 1054 value of the deviance function evalued at params; 1055 see family.py for distribution-specific deviance 1056 resid_dev : array 1057 n x 1, residual deviance of model 1058 llf : float 1059 value of the loglikelihood function evalued at params; 1060 see family.py for distribution-specific loglikelihoods 1061 llnull : float 1062 value of the loglikelihood function evaluated with only an 1063 intercept; see family.py for distribution-specific 1064 loglikelihoods 1065 AIC : float 1066 Akaike information criterion 1067 D2 : float 1068 percentage of explained deviance 1069 adj_D2 : float 1070 adjusted percentage of explained deviance 1071 pseudo_R2 : float 1072 McFadden's pseudo R2 (coefficient of determination) 1073 adj_pseudoR2 : float 1074 adjusted McFadden's pseudo R2 1075 SRMSE : float 1076 standardized root mean square error 1077 SSI : float 1078 Sorensen similarity index 1079 results : object 1080 Full results from estimated model. May contain addtional 1081 diagnostics 1082 Example 1083 ------- 1084 >>> import numpy as np 1085 >>> import libpysal 1086 >>> from spint.gravity import Doubly 1087 >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv')) 1088 >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1)) 1089 >>> flows = np.array(db.by_col('count')).reshape((-1,1)) 1090 >>> d = np.array(db.by_col('d_tract')).reshape((-1,1)) 1091 >>> o = np.array(db.by_col('o_tract')).reshape((-1,1)) 1092 >>> model = Doubly(flows, o, d, cost, 'exp') 1093 >>> model.params[-1:] 1094 array([-0.00232112]) 1095 1096 """ 1097 1098 def __init__(self, flows, origins, destinations, cost, cost_func, 1099 constant=True, framework='GLM', SF=None, CD=None, Lag=None, 1100 Quasi=False): 1101 1102 self.f = np.reshape(flows, (-1, 1)) 1103 self.o = np.reshape(origins, (-1, 1)) 1104 self.d = np.reshape(destinations, (-1, 1)) 1105 self.c = np.reshape(cost, (-1, 1)) 1106 #User.check_arrays(self.f, self.o, self.d, self.c) 1107 1108 BaseGravity.__init__( 1109 self, 1110 self.f, 1111 self.c, 1112 cost_func=cost_func, 1113 origins=self.o, 1114 destinations=self.d, 1115 constant=constant, 1116 framework=framework, 1117 SF=SF, 1118 CD=CD, 1119 Lag=Lag, 1120 Quasi=Quasi) 1121 1122 def local(self, locs=None): 1123 """ 1124 **Not inmplemented for doubly-constrained models** Not possible due to 1125 insufficient degrees of freedom. 1126 1127 Calibrate local models for subsets of data from a single location to all 1128 other locations 1129 """ 1130 raise NotImplementedError( 1131 "Local models not possible for" 1132 " doubly-constrained model due to insufficient degrees of freedom.") 1133