1# coding=utf-8
2"""
3 Wilsonian (1967) family of gravity-type spatial interaction models
4
5References
6----------
7
8Fotheringham, A. S. and O'Kelly, M. E. (1989). Spatial Interaction Models: Formulations
9 and Applications. London: Kluwer Academic Publishers.
10
11Wilson, A. G. (1967). A statistical theory of spatial distribution models.
12 Transportation Research, 1, 253–269.
13
14"""
15
16__author__ = "Taylor Oshan tayoshan@gmail.com"
17
18from types import FunctionType
19import numpy as np
20from scipy import sparse as sp
21from spreg import user_output as User
22from spreg.utils import sphstack
23from spglm.utils import cache_readonly
24from .count_model import CountModel
25from .utils import sorensen, srmse, spcategorical
26
27
28class BaseGravity(CountModel):
29    """
30    Base class to set up gravity-type spatial interaction models and dispatch
31    estimaton technqiues.
32
33    Parameters
34    ----------
35    flows           : array of integers
36                      n x 1; observed flows between O origins and D destinations
37    origins         : array of strings
38                      n x 1; unique identifiers of origins of n flows
39    destinations    : array of strings
40                      n x 1; unique identifiers of destinations of n flows
41    cost            : array
42                      n x 1; cost to overcome separation between each origin and
43                      destination associated with a flow; typically distance or time
44    cost_func       : string or function that has scalar input and output
45                      functional form of the cost function;
46                      'exp' | 'pow' | custom function
47    o_vars          : array (optional)
48                      n x p; p attributes for each origin of  n flows; default
49                      is None
50    d_vars          : array (optional)
51                      n x p; p attributes for each destination of n flows;
52                      default is None
53    constant        : boolean
54                      True to include intercept in model; True by default
55    framework       : string
56                      estimation technique; currently only 'GLM' is avaialble
57    Quasi           : boolean
58                      True to estimate QuasiPoisson model; should result in same
59                      parameters as Poisson but with altered covariance; default
60                      to true which estimates Poisson model
61    SF              : array
62                      n x 1; eigenvector spatial filter to include in the model;
63                      default to None which does not include a filter; not yet
64                      implemented
65    CD              : array
66                      n x 1; competing destination term that accounts for the
67                      likelihood that alternative destinations are considered
68                      along with each destination under consideration for every
69                      OD pair; defaults to None which does not include a CD
70                      term; not yet implemented
71    Lag             : W object
72                      spatial weight for n observations (OD pairs) used to
73                      construct a spatial autoregressive model and estimator;
74                      defaults to None which does not include an autoregressive
75                      term; not yet implemented
76
77
78    Attributes
79    ----------
80    f               : array
81                      n x 1; observed flows; dependent variable; y
82    n               : integer
83                      number of observations
84    k               : integer
85                      number of parameters
86    c               : array
87                      n x 1; cost to overcome separation between each origin and
88                      destination associated with a flow; typically distance or time
89    cf              : function
90                      cost function; used to transform cost variable
91    ov              : array
92                      n x p(o); p attributes for each origin of n flows
93    dv              : array
94                      n x p(d); p attributes for each destination of n flows
95    constant        : boolean
96                      True to include intercept in model; True by default
97    y               : array
98                      n x 1; dependent variable used in estimation including any
99                      transformations
100    X               : array
101                      n x k, design matrix used in estimation
102    params          : array
103                      n x k, k estimated beta coefficients; k = p(o) + p(d) + 1
104    yhat            : array
105                      n x 1, predicted value of y (i.e., fittedvalues)
106    cov_params      : array
107                      Variance covariance matrix (k x k) of betas
108    std_err         : array
109                      k x 1, standard errors of betas
110    pvalues         : array
111                      k x 1, two-tailed pvalues of parameters
112    tvalues         : array
113                      k x 1, the tvalues of the standard errors
114    deviance        : float
115                      value of the deviance function evalued at params;
116                      see family.py for distribution-specific deviance
117    resid_dev       : array
118                      n x 1, residual deviance of model
119    llf             : float
120                      value of the loglikelihood function evalued at params;
121                      see family.py for distribution-specific loglikelihoods
122    llnull          : float
123                      value of the loglikelihood function evaluated with only an
124                      intercept; see family.py for distribution-specific
125                      loglikelihoods
126    AIC             : float
127                      Akaike information criterion
128    D2              : float
129                      percentage of explained deviance
130    adj_D2          : float
131                      adjusted percentage of explained deviance
132    pseudo_R2       : float
133                      McFadden's pseudo R2  (coefficient of determination)
134    adj_pseudoR2    : float
135                      adjusted McFadden's pseudo R2
136    SRMSE           : float
137                      standardized root mean square error
138    SSI             : float
139                      Sorensen similarity index
140    results         : object
141                      full results from estimated model. May contain addtional
142                      diagnostics
143    Example
144    -------
145    >>> import numpy as np
146    >>> import libpysal
147    >>> from spint.gravity import BaseGravity
148    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
149    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
150    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
151    >>> model = BaseGravity(flows, cost)
152    >>> model.params
153    array([17.84839637, -1.68325787])
154
155    """
156
157    def __init__(
158            self,
159            flows,
160            cost,
161            cost_func='pow',
162            o_vars=None,
163            d_vars=None,
164            origins=None,
165            destinations=None,
166            constant=True,
167            framework='GLM',
168            SF=None,
169            CD=None,
170            Lag=None,
171            Quasi=False):
172        n = User.check_arrays(flows, cost)
173        #User.check_y(flows, n)
174        self.n = n
175        self.f = flows
176        self.c = cost
177        self.ov = o_vars
178        self.dv = d_vars
179        if isinstance(cost_func, str):
180            if cost_func.lower() == 'pow':
181                self.cf = np.log
182                if (self.c == 0).any():
183                    raise ValueError(
184                        "Zero values detected: cost function 'pow'"
185                        "requires the logarithm of the cost variable which"
186                        "is undefined at 0")
187            elif cost_func.lower() == 'exp':
188                self.cf = lambda x: x * 1.0
189        elif (isinstance(cost_func, FunctionType)) | (isinstance(cost_func, np.ufunc)):
190            self.cf = cost_func
191        else:
192            raise ValueError(
193                "cost_func must be 'exp', 'pow' or a valid "
194                " function that has a scalar as a input and output")
195
196        y = np.reshape(self.f, (-1, 1))
197        if isinstance(self, Gravity):
198            X = np.empty((self.n, 0))
199        else:
200            X = sp.csr_matrix((self.n, 1))
201        if isinstance(self, Production) | isinstance(self, Doubly):
202            o_dummies = spcategorical(origins.flatten())
203            if constant:
204                o_dummies = o_dummies[:, 1:]
205            X = sphstack(X, o_dummies, array_out=False)
206        if isinstance(self, Attraction) | isinstance(self, Doubly):
207            d_dummies = spcategorical(destinations.flatten())
208            if constant | isinstance(self, Doubly):
209                d_dummies = d_dummies[:, 1:]
210            X = sphstack(X, d_dummies, array_out=False)
211        if self.ov is not None:
212            if isinstance(self, Gravity):
213                for each in range(self.ov.shape[1]):
214                    if (self.ov[:, each] == 0).any():
215                        raise ValueError(
216                            "Zero values detected in column %s "
217                            "of origin variables, which are undefined for "
218                            "Poisson log-linear spatial interaction models" %
219                            each)
220                    X = np.hstack(
221                        (X, np.log(np.reshape(self.ov[:, each], (-1, 1)))))
222            else:
223                for each in range(self.ov.shape[1]):
224                    if (self.ov[:, each] == 0).any():
225                        raise ValueError(
226                            "Zero values detected in column %s "
227                            "of origin variables, which are undefined for "
228                            "Poisson log-linear spatial interaction models" %
229                            each)
230                    ov = sp.csr_matrix(
231                        np.log(np.reshape(self.ov[:, each], ((-1, 1)))))
232                    X = sphstack(X, ov, array_out=False)
233        if self.dv is not None:
234            if isinstance(self, Gravity):
235                for each in range(self.dv.shape[1]):
236                    if (self.dv[:, each] == 0).any():
237                        raise ValueError(
238                            "Zero values detected in column %s "
239                            "of destination variables, which are undefined for "
240                            "Poisson log-linear spatial interaction models" %
241                            each)
242                    X = np.hstack(
243                        (X, np.log(np.reshape(self.dv[:, each], (-1, 1)))))
244            else:
245                for each in range(self.dv.shape[1]):
246                    if (self.dv[:, each] == 0).any():
247                        raise ValueError(
248                            "Zero values detected in column %s "
249                            "of destination variables, which are undefined for "
250                            "Poisson log-linear spatial interaction models" %
251                            each)
252                    dv = sp.csr_matrix(
253                        np.log(np.reshape(self.dv[:, each], ((-1, 1)))))
254                    X = sphstack(X, dv, array_out=False)
255        if isinstance(self, Gravity):
256            X = np.hstack((X, self.cf(np.reshape(self.c, (-1, 1)))))
257        else:
258            c = sp.csr_matrix(self.cf(np.reshape(self.c, (-1, 1))))
259            X = sphstack(X, c, array_out=False)
260            X = X[:, 1:]  # because empty array instantiated with extra column
261        if not isinstance(self, (Gravity, Production, Attraction, Doubly)):
262            X = self.cf(np.reshape(self.c, (-1, 1)))
263        if SF:
264            raise NotImplementedError(
265                "Spatial filter model not yet implemented")
266        if CD:
267            raise NotImplementedError(
268                "Competing destination model not yet implemented")
269        if Lag:
270            raise NotImplementedError(
271                "Spatial Lag autoregressive model not yet implemented")
272
273        CountModel.__init__(self, y, X, constant=constant)
274        if (framework.lower() == 'glm'):
275            if not Quasi:
276                results = self.fit(framework='glm')
277            else:
278                results = self.fit(framework='glm', Quasi=True)
279        else:
280            raise NotImplementedError('Only GLM is currently implemented')
281
282        self.params = results.params
283        self.yhat = results.yhat
284        self.cov_params = results.cov_params
285        self.std_err = results.std_err
286        self.pvalues = results.pvalues
287        self.tvalues = results.tvalues
288        self.deviance = results.deviance
289        self.resid_dev = results.resid_dev
290        self.llf = results.llf
291        self.llnull = results.llnull
292        self.AIC = results.AIC
293        self.k = results.k
294        self.D2 = results.D2
295        self.adj_D2 = results.adj_D2
296        self.pseudoR2 = results.pseudoR2
297        self.adj_pseudoR2 = results.adj_pseudoR2
298        self.results = results
299        self._cache = {}
300
301    @cache_readonly
302    def SSI(self):
303        return sorensen(self)
304
305    @cache_readonly
306    def SRMSE(self):
307        return srmse(self)
308
309    def reshape(self, array):
310        if isinstance(array, np.ndarray):
311            return array.reshape((-1, 1))
312        elif isinstance(array, list):
313            return np.array(array).reshape((-1, 1))
314        else:
315            raise TypeError(
316                "input must be an numpy array or list that can be coerced"
317                " into the dimensions n x 1")
318
319
320class Gravity(BaseGravity):
321    """
322    Unconstrained (traditional gravity) gravity-type spatial interaction model
323
324    Parameters
325    ----------
326    flows           : array of integers
327                      n x 1; observed flows between O origins and D destinations
328    cost            : array
329                      n x 1; cost to overcome separation between each origin and
330                      destination associated with a flow; typically distance or time
331    cost_func       : string or function that has scalar input and output
332                      functional form of the cost function;
333                      'exp' | 'pow' | custom function
334    o_vars          : array (optional)
335                      n x p; p attributes for each origin of  n flows; default
336                      is None
337    d_vars          : array (optional)
338                      n x p; p attributes for each destination of n flows;
339                      default is None
340    constant        : boolean
341                      True to include intercept in model; True by default
342    framework       : string
343                      estimation technique; currently only 'GLM' is avaialble
344    Quasi           : boolean
345                      True to estimate QuasiPoisson model; should result in same
346                      parameters as Poisson but with altered covariance; default
347                      to true which estimates Poisson model
348    SF              : array
349                      n x 1; eigenvector spatial filter to include in the model;
350                      default to None which does not include a filter; not yet
351                      implemented
352    CD              : array
353                      n x 1; competing destination term that accounts for the
354                      likelihood that alternative destinations are considered
355                      along with each destination under consideration for every
356                      OD pair; defaults to None which does not include a CD
357                      term; not yet implemented
358    Lag             : W object
359                      spatial weight for n observations (OD pairs) used to
360                      construct a spatial autoregressive model and estimator;
361                      defaults to None which does not include an autoregressive
362                      term; not yet implemented
363
364    Attributes
365    ----------
366    f               : array
367                      n x 1; observed flows; dependent variable; y
368    n               : integer
369                      number of observations
370    k               : integer
371                      number of parameters
372    c               : array
373                      n x 1; cost to overcome separation between each origin and
374                      destination associated with a flow; typically distance or time
375    cf              : function
376                      cost function; used to transform cost variable
377    ov              : array
378                      n x p(o); p attributes for each origin of n flows
379    dv              : array
380                      n x p(d); p attributes for each destination of n flows
381    constant        : boolean
382                      True to include intercept in model; True by default
383    y               : array
384                      n x 1; dependent variable used in estimation including any
385                      transformations
386    X               : array
387                      n x k, design matrix used in estimation
388    params          : array
389                      n x k, k estimated beta coefficients; k = p(o) + p(d) + 1
390    yhat            : array
391                      n x 1, predicted value of y (i.e., fittedvalues)
392    cov_params      : array
393                      Variance covariance matrix (kxk) of betas
394    std_err         : array
395                      k x 1, standard errors of betas
396    pvalues         : array
397                      k x 1, two-tailed pvalues of parameters
398    tvalues         : array
399                      k x 1, the tvalues of the standard errors
400    deviance        : float
401                      value of the deviance function evalued at params;
402                      see family.py for distribution-specific deviance
403    resid_dev       : array
404                      n x 1, residual deviance of model
405    llf             : float
406                      value of the loglikelihood function evalued at params;
407                      see family.py for distribution-specific loglikelihoods
408    llnull          : float
409                      value of the loglikelihood function evaluated with only an
410                      intercept; see family.py for distribution-specific
411                      loglikelihoods
412    AIC             : float
413                      Akaike information criterion
414    D2              : float
415                      percentage of explained deviance
416    adj_D2          : float
417                      adjusted percentage of explained deviance
418    pseudo_R2       : float
419                      McFadden's pseudo R2  (coefficient of determination)
420    adj_pseudoR2    : float
421                      adjusted McFadden's pseudo R2
422    SRMSE           : float
423                      standardized root mean square error
424    SSI             : float
425                      Sorensen similarity index
426    results         : object
427                      Full results from estimated model. May contain addtional
428                      diagnostics
429    Example
430    -------
431    >>> import numpy as np
432    >>> import libpysal
433    >>> from spint.gravity import Gravity
434    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
435    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
436    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
437    >>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1))
438    >>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1))
439    >>> model = Gravity(flows, o_cap, d_cap, cost, 'exp')
440    >>> model.params
441    array([ 3.80050153e+00,  5.54103854e-01,  3.94282921e-01, -2.27091686e-03])
442
443    """
444
445    def __init__(self, flows, o_vars, d_vars, cost,
446                 cost_func, constant=True, framework='GLM', SF=None, CD=None,
447                 Lag=None, Quasi=False):
448        self.f = np.reshape(flows, (-1, 1))
449        if len(o_vars.shape) > 1:
450            p = o_vars.shape[1]
451        else:
452            p = 1
453        self.ov = np.reshape(o_vars, (-1, p))
454        if len(d_vars.shape) > 1:
455            p = d_vars.shape[1]
456        else:
457            p = 1
458        self.dv = np.reshape(d_vars, (-1, p))
459        self.c = np.reshape(cost, (-1, 1))
460        #User.check_arrays(self.f, self.ov, self.dv, self.c)
461
462        BaseGravity.__init__(
463            self,
464            self.f,
465            self.c,
466            cost_func=cost_func,
467            o_vars=self.ov,
468            d_vars=self.dv,
469            constant=constant,
470            framework=framework,
471            SF=SF,
472            CD=CD,
473            Lag=Lag,
474            Quasi=Quasi)
475
476    def local(self, loc_index, locs):
477        """
478        Calibrate local models for subsets of data from a single location to all
479        other locations
480
481        Parameters
482        ----------
483        loc_index   : n x 1 array of either origin or destination id label for
484                      flows; must be explicitly provided for local version of
485                      basic gravity model since these are not passed to the
486                      global model.
487
488        locs        : iterable of either origin or destination labels for which
489                      to calibrate local models; must also be explicitly
490                      provided since local gravity models can be calibrated from origins
491                      or destinations. If all origins are also destinations and
492                      a local model is desired for each location then use
493                      np.unique(loc_index)
494
495        Returns
496        -------
497        results     : dict where keys are names of model outputs and diagnostics
498                      and values are lists of location specific values.
499        """
500        results = {}
501        covs = self.ov.shape[1] + self.dv.shape[1] + 1
502        results['AIC'] = []
503        results['deviance'] = []
504        results['pseudoR2'] = []
505        results['adj_pseudoR2'] = []
506        results['D2'] = []
507        results['adj_D2'] = []
508        results['SSI'] = []
509        results['SRMSE'] = []
510        for cov in range(covs):
511            results['param' + str(cov)] = []
512            results['stde' + str(cov)] = []
513            results['pvalue' + str(cov)] = []
514            results['tvalue' + str(cov)] = []
515        for loc in locs:
516            subset = loc_index == loc
517            f = self.reshape(self.f[subset])
518            o_vars = self.ov[subset.reshape(self.ov.shape[0]), :]
519            d_vars = self.dv[subset.reshape(self.dv.shape[0]), :]
520            dij = self.reshape(self.c[subset])
521            model = Gravity(f, o_vars, d_vars, dij, self.cf,
522                            constant=False)
523            results['AIC'].append(model.AIC)
524            results['deviance'].append(model.deviance)
525            results['pseudoR2'].append(model.pseudoR2)
526            results['adj_pseudoR2'].append(model.adj_pseudoR2)
527            results['D2'].append(model.D2)
528            results['adj_D2'].append(model.adj_D2)
529            results['SSI'].append(model.SSI)
530            results['SRMSE'].append(model.SRMSE)
531            for cov in range(covs):
532                results['param' + str(cov)].append(model.params[cov])
533                results['stde' + str(cov)].append(model.std_err[cov])
534                results['pvalue' + str(cov)].append(model.pvalues[cov])
535                results['tvalue' + str(cov)].append(model.tvalues[cov])
536        return results
537
538
539class Production(BaseGravity):
540    """
541    Production-constrained (origin-constrained) gravity-type spatial interaction model
542
543    Parameters
544    ----------
545    flows           : array of integers
546                      n x 1; observed flows between O origins and D destinations
547    origins         : array of strings
548                      n x 1; unique identifiers of origins of n flows; when
549                      there are many origins it will be faster to use integers
550                      rather than strings for id labels.
551    cost            : array
552                      n x 1; cost to overcome separation between each origin and
553                      destination associated with a flow; typically distance or time
554    cost_func       : string or function that has scalar input and output
555                      functional form of the cost function;
556                      'exp' | 'pow' | custom function
557    d_vars          : array (optional)
558                      n x p; p attributes for each destination of n flows;
559                      default is None
560    constant        : boolean
561                      True to include intercept in model; True by default
562    framework       : string
563                      estimation technique; currently only 'GLM' is avaialble
564    Quasi           : boolean
565                      True to estimate QuasiPoisson model; should result in same
566                      parameters as Poisson but with altered covariance; default
567                      to true which estimates Poisson model
568    SF              : array
569                      n x 1; eigenvector spatial filter to include in the model;
570                      default to None which does not include a filter; not yet
571                      implemented
572    CD              : array
573                      n x 1; competing destination term that accounts for the
574                      likelihood that alternative destinations are considered
575                      along with each destination under consideration for every
576                      OD pair; defaults to None which does not include a CD
577                      term; not yet implemented
578    Lag             : W object
579                      spatial weight for n observations (OD pairs) used to
580                      construct a spatial autoregressive model and estimator;
581                      defaults to None which does not include an autoregressive
582                      term; not yet implemented
583
584    Attributes
585    ----------
586    f               : array
587                      n x 1; observed flows; dependent variable; y
588    n               : integer
589                      number of observations
590    k               : integer
591                      number of parameters
592    c               : array
593                      n x 1; cost to overcome separation between each origin and
594                      destination associated with a flow; typically distance or time
595    cf              : function
596                      cost function; used to transform cost variable
597    o               : array
598                      n x 1; index of origin id's
599    dv              : array
600                      n x p; p attributes for each destination of n flows
601    constant        : boolean
602                      True to include intercept in model; True by default
603    y               : array
604                      n x 1; dependent variable used in estimation including any
605                      transformations
606    X               : array
607                      n x k, design matrix used in estimation
608    params          : array
609                      n x k, k estimated beta coefficients; k = # of origins + p + 1
610    yhat            : array
611                      n x 1, predicted value of y (i.e., fittedvalues)
612    cov_params      : array
613                      Variance covariance matrix (kxk) of betas
614    std_err         : array
615                      k x 1, standard errors of betas
616    pvalues         : array
617                      k x 1, two-tailed pvalues of parameters
618    tvalues         : array
619                      k x 1, the tvalues of the standard errors
620    deviance        : float
621                      value of the deviance function evalued at params;
622                      see family.py for distribution-specific deviance
623    resid_dev       : array
624                      n x 1, residual deviance of model
625    llf             : float
626                      value of the loglikelihood function evalued at params;
627                      see family.py for distribution-specific loglikelihoods
628    llnull          : float
629                      value of the loglikelihood function evaluated with only an
630                      intercept; see family.py for distribution-specific
631                      loglikelihoods
632    AIC             : float
633                      Akaike information criterion
634    D2              : float
635                      percentage of explained deviance
636    adj_D2          : float
637                      adjusted percentage of explained deviance
638    pseudo_R2       : float
639                      McFadden's pseudo R2  (coefficient of determination)
640    adj_pseudoR2    : float
641                      adjusted McFadden's pseudo R2
642    SRMSE           : float
643                      standardized root mean square error
644    SSI             : float
645                      Sorensen similarity index
646    results         : object
647                      Full results from estimated model. May contain addtional
648                      diagnostics
649    Example
650    -------
651
652    >>> import numpy as np
653    >>> import libpysal
654    >>> from spint.gravity import Production
655    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
656    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
657    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
658    >>> o = np.array(db.by_col('o_tract')).reshape((-1,1))
659    >>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1))
660    >>> model = Production(flows, o, d_cap, cost, 'exp')
661    >>> model.params[-4:]
662    array([ 1.34721352,  0.96357345,  0.85535775, -0.00227444])
663
664    """
665
666    def __init__(self, flows, origins, d_vars, cost, cost_func, constant=True,
667                 framework='GLM', SF=None, CD=None, Lag=None, Quasi=False):
668        self.constant = constant
669        self.f = self.reshape(flows)
670        self.o = self.reshape(origins)
671
672        try:
673            if d_vars.shape[1]:
674                p = d_vars.shape[1]
675        except BaseException:
676            p = 1
677        self.dv = np.reshape(d_vars, (-1, p))
678        self.c = self.reshape(cost)
679        #User.check_arrays(self.f, self.o, self.dv, self.c)
680
681        BaseGravity.__init__(
682            self,
683            self.f,
684            self.c,
685            cost_func=cost_func,
686            d_vars=self.dv,
687            origins=self.o,
688            constant=constant,
689            framework=framework,
690            SF=SF,
691            CD=CD,
692            Lag=Lag,
693            Quasi=Quasi)
694
695    def local(self, locs=None):
696        """
697        Calibrate local models for subsets of data from a single location to all
698        other locations
699
700        Parameters
701        ----------
702        locs        : iterable of location (origins) labels; default is
703                      None which calibrates a local model for each origin
704
705        Returns
706        -------
707        results     : dict where keys are names of model outputs and diagnostics
708                      and values are lists of location specific values
709        """
710        results = {}
711        offset = 1
712        covs = self.dv.shape[1] + 1
713        results['AIC'] = []
714        results['deviance'] = []
715        results['pseudoR2'] = []
716        results['adj_pseudoR2'] = []
717        results['D2'] = []
718        results['adj_D2'] = []
719        results['SSI'] = []
720        results['SRMSE'] = []
721        for cov in range(covs):
722            results['param' + str(cov)] = []
723            results['stde' + str(cov)] = []
724            results['pvalue' + str(cov)] = []
725            results['tvalue' + str(cov)] = []
726        if locs is None:
727            locs = np.unique(self.o)
728        for loc in np.unique(locs):
729            subset = self.o == loc
730            f = self.reshape(self.f[subset])
731            o = self.reshape(self.o[subset])
732            d_vars = self.dv[subset.reshape(self.dv.shape[0]), :]
733            dij = self.reshape(self.c[subset])
734            model = Production(f, o, d_vars, dij, self.cf, constant=False)
735            results['AIC'].append(model.AIC)
736            results['deviance'].append(model.deviance)
737            results['pseudoR2'].append(model.pseudoR2)
738            results['adj_pseudoR2'].append(model.adj_pseudoR2)
739            results['D2'].append(model.D2)
740            results['adj_D2'].append(model.adj_D2)
741            results['SSI'].append(model.SSI)
742            results['SRMSE'].append(model.SRMSE)
743            for cov in range(covs):
744                results['param' + str(cov)].append(model.params[offset + cov])
745                results['stde' + str(cov)].append(model.std_err[offset + cov])
746                results['pvalue' +
747                        str(cov)].append(model.pvalues[offset + cov])
748                results['tvalue' +
749                        str(cov)].append(model.tvalues[offset + cov])
750        return results
751
752
753class Attraction(BaseGravity):
754    """
755    Attraction-constrained (destination-constrained) gravity-type spatial interaction model
756
757    Parameters
758    ----------
759    flows           : array of integers
760                      n x 1; observed flows between O origins and D destinations
761    destinations    : array of strings
762                      n x 1; unique identifiers of destinations of n flows; when
763                      there are many destinations it will be faster to use
764                      integers over strings for id labels.
765    cost            : array
766                      n x 1; cost to overcome separation between each origin and
767                      destination associated with a flow; typically distance or time
768    cost_func       : string or function that has scalar input and output
769                      functional form of the cost function;
770                      'exp' | 'pow' | custom function
771    o_vars          : array (optional)
772                      n x p; p attributes for each origin of  n flows; default
773                      is None
774    constant        : boolean
775                      True to include intercept in model; True by default
776    y               : array
777                      n x 1; dependent variable used in estimation including any
778                      transformations
779    X               : array
780                      n x k, design matrix used in estimation
781    framework       : string
782                      estimation technique; currently only 'GLM' is avaialble
783    Quasi           : boolean
784                      True to estimate QuasiPoisson model; should result in same
785                      parameters as Poisson but with altered covariance; default
786                      to true which estimates Poisson model
787    SF              : array
788                      n x 1; eigenvector spatial filter to include in the model;
789                      default to None which does not include a filter; not yet
790                      implemented
791    CD              : array
792                      n x 1; competing destination term that accounts for the
793                      likelihood that alternative destinations are considered
794                      along with each destination under consideration for every
795                      OD pair; defaults to None which does not include a CD
796                      term; not yet implemented
797    Lag             : W object
798                      spatial weight for n observations (OD pairs) used to
799                      construct a spatial autoregressive model and estimator;
800                      defaults to None which does not include an autoregressive
801                      term; not yet implemented
802
803    Attributes
804    ----------
805    f               : array
806                      n x 1; observed flows; dependent variable; y
807    n               : integer
808                      number of observations
809    k               : integer
810                      number of parameters
811    c               : array
812                      n x 1; cost to overcome separation between each origin and
813                      destination associated with a flow; typically distance or time
814    cf              : function
815                      cost function; used to transform cost variable
816    d               : array
817                      n x 1; index of destination id's
818    ov              : array
819                      n x p; p attributes for each origin of n flows
820    constant        : boolean
821                      True to include intercept in model; True by default
822    params          : array
823                      n x k, k estimated beta coefficients; k = # of
824                      destinations + p + 1
825    yhat            : array
826                      n x 1, predicted value of y (i.e., fittedvalues)
827    cov_params      : array
828                      Variance covariance matrix (kxk) of betas
829    std_err         : array
830                      k x 1, standard errors of betas
831    pvalues         : array
832                      k x 1, two-tailed pvalues of parameters
833    tvalues         : array
834                      k x 1, the tvalues of the standard errors
835    deviance        : float
836                      value of the deviance function evalued at params;
837                      see family.py for distribution-specific deviance
838    resid_dev       : array
839                      n x 1, residual deviance of model
840    llf             : float
841                      value of the loglikelihood function evalued at params;
842                      see family.py for distribution-specific loglikelihoods
843    llnull          : float
844                      value of the loglikelihood function evaluated with only an
845                      intercept; see family.py for distribution-specific
846                      loglikelihoods
847    AIC             : float
848                      Akaike information criterion
849    D2              : float
850                      percentage of explained deviance
851    adj_D2          : float
852                      adjusted percentage of explained deviance
853    pseudo_R2       : float
854                      McFadden's pseudo R2  (coefficient of determination)
855    adj_pseudoR2    : float
856                      adjusted McFadden's pseudo R2
857    SRMSE           : float
858                      standardized root mean square error
859    SSI             : float
860                      Sorensen similarity index
861    results         : object
862                      Full results from estimated model. May contain addtional
863                      diagnostics
864    Example
865    -------
866    >>> import numpy as np
867    >>> import libpysal
868    >>> from spint.gravity import Attraction
869    >>> nyc_bikes = libpysal.examples.load_example('nyc_bikes')
870    >>> db = libpysal.io.open(nyc_bikes.get_path('nyc_bikes_ct.csv'))
871    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
872    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
873    >>> d = np.array(db.by_col('d_tract')).reshape((-1,1))
874    >>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1))
875    >>> model = Attraction(flows, d, o_cap, cost, 'exp')
876    >>> model.params[-4:]
877    array([ 1.21962276,  0.87634028,  0.88290909, -0.00229081])
878
879    """
880
881    def __init__(self, flows, destinations, o_vars, cost, cost_func,
882                 constant=True, framework='GLM', SF=None, CD=None, Lag=None,
883                 Quasi=False):
884        self.f = np.reshape(flows, (-1, 1))
885        if len(o_vars.shape) > 1:
886            p = o_vars.shape[1]
887        else:
888            p = 1
889        self.ov = np.reshape(o_vars, (-1, p))
890        self.d = np.reshape(destinations, (-1, 1))
891        self.c = np.reshape(cost, (-1, 1))
892        #User.check_arrays(self.f, self.d, self.ov, self.c)
893
894        BaseGravity.__init__(
895            self,
896            self.f,
897            self.c,
898            cost_func=cost_func,
899            o_vars=self.ov,
900            destinations=self.d,
901            constant=constant,
902            framework=framework,
903            SF=SF,
904            CD=CD,
905            Lag=Lag,
906            Quasi=Quasi)
907
908    def local(self, locs=None):
909        """
910        Calibrate local models for subsets of data from a single location to all
911        other locations
912
913        Parameters
914        ----------
915        locs        : iterable of location (destinations) labels; default is
916                      None which calibrates a local model for each destination
917
918        Returns
919        -------
920        results     : dict where keys are names of model outputs and diagnostics
921                      and values are lists of location specific values
922        """
923        results = {}
924        offset = 1
925        covs = self.ov.shape[1] + 1
926        results['AIC'] = []
927        results['deviance'] = []
928        results['pseudoR2'] = []
929        results['adj_pseudoR2'] = []
930        results['D2'] = []
931        results['adj_D2'] = []
932        results['SSI'] = []
933        results['SRMSE'] = []
934        for cov in range(covs):
935            results['param' + str(cov)] = []
936            results['stde' + str(cov)] = []
937            results['pvalue' + str(cov)] = []
938            results['tvalue' + str(cov)] = []
939        if locs is None:
940            locs = np.unique(self.d)
941        for loc in np.unique(locs):
942            subset = self.d == loc
943            f = self.reshape(self.f[subset])
944            d = self.reshape(self.d[subset])
945            o_vars = self.ov[subset.reshape(self.ov.shape[0]), :]
946            dij = self.reshape(self.c[subset])
947            model = Attraction(f, d, o_vars, dij, self.cf, constant=False)
948            results['AIC'].append(model.AIC)
949            results['deviance'].append(model.deviance)
950            results['pseudoR2'].append(model.pseudoR2)
951            results['adj_pseudoR2'].append(model.adj_pseudoR2)
952            results['D2'].append(model.D2)
953            results['adj_D2'].append(model.adj_D2)
954            results['SSI'].append(model.SSI)
955            results['SRMSE'].append(model.SRMSE)
956            for cov in range(covs):
957                results['param' + str(cov)].append(model.params[offset + cov])
958                results['stde' + str(cov)].append(model.std_err[offset + cov])
959                results['pvalue' +
960                        str(cov)].append(model.pvalues[offset + cov])
961                results['tvalue' +
962                        str(cov)].append(model.tvalues[offset + cov])
963        return results
964
965
966class Doubly(BaseGravity):
967    """
968    Doubly-constrained gravity-type spatial interaction model
969
970    Parameters
971    ----------
972    flows           : array of integers
973                      n x 1; observed flows between O origins and D destinations
974    origins         : array of strings
975                      n x 1; unique identifiers of origins of n flows; when
976                      there are many origins it will be faster to use integers
977                      rather than strings for id labels.
978    destinations    : array of strings
979                      n x 1; unique identifiers of destinations of n flows; when
980                      there are many destinations it will be faster to use
981                      integers rather than strings for id labels
982    cost            : array
983                      n x 1; cost to overcome separation between each origin and
984                      destination associated with a flow; typically distance or time
985    cost_func       : string or function that has scalar input and output
986                      functional form of the cost function;
987                      'exp' | 'pow' | custom function
988    constant        : boolean
989                      True to include intercept in model; True by default
990    y               : array
991                      n x 1; dependent variable used in estimation including any
992                      transformations
993    X               : array
994                      n x k, design matrix used in estimation
995    framework       : string
996                      estimation technique; currently only 'GLM' is avaialble
997    Quasi           : boolean
998                      True to estimate QuasiPoisson model; should result in same
999                      parameters as Poisson but with altered covariance; default
1000                      to true which estimates Poisson model
1001    SF              : array
1002                      n x 1; eigenvector spatial filter to include in the model;
1003                      default to None which does not include a filter; not yet
1004                      implemented
1005    CD              : array
1006                      n x 1; competing destination term that accounts for the
1007                      likelihood that alternative destinations are considered
1008                      along with each destination under consideration for every
1009                      OD pair; defaults to None which does not include a CD
1010                      term; not yet implemented
1011    Lag             : W object
1012                      spatial weight for n observations (OD pairs) used to
1013                      construct a spatial autoregressive model and estimator;
1014                      defaults to None which does not include an autoregressive
1015                      term; not yet implemented
1016
1017    Attributes
1018    ----------
1019    f               : array
1020                      n x 1; observed flows; dependent variable; y
1021    n               : integer
1022                      number of observations
1023    k               : integer
1024                      number of parameters
1025    c               : array
1026                      n x 1; cost to overcome separation between each origin and
1027                      destination associated with a flow; typically distance or time
1028    cf              : function
1029                      cost function; used to transform cost variable
1030    o               : array
1031                      n x 1; index of origin id's
1032    d               : array
1033                      n x 1; index of destination id's
1034    constant        : boolean
1035                      True to include intercept in model; True by default
1036    params          : array
1037                      n x k, estimated beta coefficients; k = # of origins + #
1038                      of destinations; the first x-1 values
1039                      pertain to the x destinations (leaving out the first
1040                      destination to avoid perfect collinearity; no fixed
1041                      effect), the next x values pertain to the x origins, and the
1042                      final value is the distance decay coefficient
1043    yhat            : array
1044                      n x 1, predicted value of y (i.e., fittedvalues)
1045    cov_params      : array
1046                      Variance covariance matrix (kxk) of betas
1047    std_err         : array
1048                      k x 1, standard errors of betas
1049    pvalues         : array
1050                      k x 1, two-tailed pvalues of parameters
1051    tvalues         : array
1052                      k x 1, the tvalues of the standard errors
1053    deviance        : float
1054                      value of the deviance function evalued at params;
1055                      see family.py for distribution-specific deviance
1056    resid_dev       : array
1057                      n x 1, residual deviance of model
1058    llf             : float
1059                      value of the loglikelihood function evalued at params;
1060                      see family.py for distribution-specific loglikelihoods
1061    llnull          : float
1062                      value of the loglikelihood function evaluated with only an
1063                      intercept; see family.py for distribution-specific
1064                      loglikelihoods
1065    AIC             : float
1066                      Akaike information criterion
1067    D2              : float
1068                      percentage of explained deviance
1069    adj_D2          : float
1070                      adjusted percentage of explained deviance
1071    pseudo_R2       : float
1072                      McFadden's pseudo R2  (coefficient of determination)
1073    adj_pseudoR2    : float
1074                      adjusted McFadden's pseudo R2
1075    SRMSE           : float
1076                      standardized root mean square error
1077    SSI             : float
1078                      Sorensen similarity index
1079    results         : object
1080                      Full results from estimated model. May contain addtional
1081                      diagnostics
1082    Example
1083    -------
1084    >>> import numpy as np
1085    >>> import libpysal
1086    >>> from spint.gravity import Doubly
1087    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
1088    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
1089    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
1090    >>> d = np.array(db.by_col('d_tract')).reshape((-1,1))
1091    >>> o = np.array(db.by_col('o_tract')).reshape((-1,1))
1092    >>> model = Doubly(flows, o, d, cost, 'exp')
1093    >>> model.params[-1:]
1094    array([-0.00232112])
1095
1096    """
1097
1098    def __init__(self, flows, origins, destinations, cost, cost_func,
1099                 constant=True, framework='GLM', SF=None, CD=None, Lag=None,
1100                 Quasi=False):
1101
1102        self.f = np.reshape(flows, (-1, 1))
1103        self.o = np.reshape(origins, (-1, 1))
1104        self.d = np.reshape(destinations, (-1, 1))
1105        self.c = np.reshape(cost, (-1, 1))
1106        #User.check_arrays(self.f, self.o, self.d, self.c)
1107
1108        BaseGravity.__init__(
1109            self,
1110            self.f,
1111            self.c,
1112            cost_func=cost_func,
1113            origins=self.o,
1114            destinations=self.d,
1115            constant=constant,
1116            framework=framework,
1117            SF=SF,
1118            CD=CD,
1119            Lag=Lag,
1120            Quasi=Quasi)
1121
1122    def local(self, locs=None):
1123        """
1124        **Not inmplemented for doubly-constrained models** Not possible due to
1125        insufficient degrees of freedom.
1126
1127        Calibrate local models for subsets of data from a single location to all
1128        other locations
1129        """
1130        raise NotImplementedError(
1131            "Local models not possible for"
1132            " doubly-constrained model due to insufficient degrees of freedom.")
1133