1"""
2Weights.
3"""
4__author__ = "Sergio J. Rey <srey@asu.edu>"
5
6import copy
7from os.path import basename as BASENAME
8import math
9import warnings
10import numpy as np
11import scipy.sparse
12from scipy.sparse.csgraph import connected_components
13
14# from .util import full, WSP2W resolve import cycle by
15# forcing these into methods
16from . import adjtools
17from ..io.fileio import FileIO as popen
18
19__all__ = ["W", "WSP"]
20
21
22class W(object):
23    """
24    Spatial weights class. Class attributes are described by their
25    docstrings. to view, use the ``help`` function.
26
27    Parameters
28    ----------
29
30    neighbors : dict
31        Key is region ID, value is a list of neighbor IDS.
32        For example, ``{'a':['b'],'b':['a','c'],'c':['b']}``.
33    weights : dict
34       Key is region ID, value is a list of edge weights.
35       If not supplied all edge weights are assumed to have a weight of 1.
36       For example, ``{'a':[0.5],'b':[0.5,1.5],'c':[1.5]}``.
37    id_order : list
38       An ordered list of ids, defines the order of observations when
39       iterating over ``W`` if not set, lexicographical ordering is used
40       to iterate and the ``id_order_set`` property will return ``False``.
41       This can be set after creation by setting the ``id_order`` property.
42    silence_warnings : bool
43       By default ``libpysal`` will print a warning if the dataset contains
44       any disconnected components or islands. To silence this warning set this
45       parameter to ``True``.
46    ids : list
47        Values to use for keys of the neighbors and weights ``dict`` objects.
48
49    Attributes
50    ----------
51
52    asymmetries
53    cardinalities
54    component_labels
55    diagW2
56    diagWtW
57    diagWtW_WW
58    histogram
59    id2i
60    id_order
61    id_order_set
62    islands
63    max_neighbors
64    mean_neighbors
65    min_neighbors
66    n
67    n_components
68    neighbor_offsets
69    nonzero
70    pct_nonzero
71    s0
72    s1
73    s2
74    s2array
75    sd
76    sparse
77    trcW2
78    trcWtW
79    trcWtW_WW
80    transform
81
82    Examples
83    --------
84
85    >>> from libpysal.weights import W
86    >>> neighbors = {0: [3, 1], 1: [0, 4, 2], 2: [1, 5], 3: [0, 6, 4], 4: [1, 3, 7, 5], 5: [2, 4, 8], 6: [3, 7], 7: [4, 6, 8], 8: [5, 7]}
87    >>> weights = {0: [1, 1], 1: [1, 1, 1], 2: [1, 1], 3: [1, 1, 1], 4: [1, 1, 1, 1], 5: [1, 1, 1], 6: [1, 1], 7: [1, 1, 1], 8: [1, 1]}
88    >>> w = W(neighbors, weights)
89    >>> "%.3f"%w.pct_nonzero
90    '29.630'
91
92    Read from external `.gal file <https://geodacenter.github.io/workbook/4a_contig_weights/lab4a.html#gal-weights-file>`_.
93
94    >>> import libpysal
95    >>> w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read()
96    >>> w.n
97    78
98    >>> "%.3f"%w.pct_nonzero
99    '6.542'
100
101    Set weights implicitly.
102
103    >>> neighbors = {0: [3, 1], 1: [0, 4, 2], 2: [1, 5], 3: [0, 6, 4], 4: [1, 3, 7, 5], 5: [2, 4, 8], 6: [3, 7], 7: [4, 6, 8], 8: [5, 7]}
104    >>> w = W(neighbors)
105    >>> round(w.pct_nonzero,3)
106    29.63
107    >>> from libpysal.weights import lat2W
108    >>> w = lat2W(100, 100)
109    >>> w.trcW2
110    39600.0
111    >>> w.trcWtW
112    39600.0
113    >>> w.transform='r'
114    >>> round(w.trcW2, 3)
115    2530.722
116    >>> round(w.trcWtW, 3)
117    2533.667
118
119    Cardinality Histogram:
120
121    >>> w.histogram
122    [(2, 4), (3, 392), (4, 9604)]
123
124    Disconnected observations (islands):
125
126    >>> from libpysal.weights import W
127    >>> w = W({1:[0],0:[1],2:[], 3:[]})
128
129    UserWarning: The weights matrix is not fully connected:
130    There are 3 disconnected components.
131    There are 2 islands with ids: 2, 3.
132
133    """
134
135    def __init__(
136        self, neighbors, weights=None, id_order=None, silence_warnings=False, ids=None
137    ):
138        self.silence_warnings = silence_warnings
139        self.transformations = {}
140        self.neighbors = neighbors
141        if not weights:
142            weights = {}
143            for key in neighbors:
144                weights[key] = [1.0] * len(neighbors[key])
145        self.weights = weights
146        self.transformations["O"] = self.weights.copy()  # original weights
147        self.transform = "O"
148        if id_order is None:
149            self._id_order = list(self.neighbors.keys())
150            self._id_order.sort()
151            self._id_order_set = False
152        else:
153            self._id_order = id_order
154            self._id_order_set = True
155        self._reset()
156        self._n = len(self.weights)
157        if not self.silence_warnings and self.n_components > 1:
158            message = (
159                "The weights matrix is not fully connected: "
160                "\n There are %d disconnected components." % self.n_components
161            )
162            ni = len(self.islands)
163            if ni == 1:
164                message = message + "\n There is 1 island with id: " "%s." % (
165                    str(self.islands[0])
166                )
167            elif ni > 1:
168                message = message + "\n There are %d islands with ids: %s." % (
169                    ni,
170                    ", ".join(str(island) for island in self.islands),
171                )
172            warnings.warn(message)
173
174    def _reset(self):
175        """Reset properties."""
176        self._cache = {}
177
178    def to_file(self, path="", format=None):
179        """
180        Write a weights to a file. The format is guessed automatically
181        from the path, but can be overridden with the format argument.
182
183        See libpysal.io.FileIO for more information.
184
185        Arguments
186        ---------
187        path    :   string
188                    location to save the file
189        format  :   string
190                    string denoting the format to write the weights to.
191
192
193        Returns
194        -------
195        None
196        """
197        f = popen(dataPath=path, mode="w", dataFormat=format)
198        f.write(self)
199        f.close()
200
201    @classmethod
202    def from_file(cls, path="", format=None):
203        """
204        Read a weights file into a W object.
205
206        Arguments
207        ---------
208        path    :   string
209                    location to save the file
210        format  :   string
211                    string denoting the format to write the weights to.
212
213        Returns
214        -------
215        W object
216        """
217        f = popen(dataPath=path, mode="r", dataFormat=format)
218        w = f.read()
219        f.close()
220        return w
221
222    @classmethod
223    def from_shapefile(cls, *args, **kwargs):
224        # we could also just "do the right thing," but I think it'd make sense to
225        # try and get people to use `Rook.from_shapefile(shapefile)` rather than
226        # W.from_shapefile(shapefile, type=`rook`), otherwise we'd need to build
227        # a type dispatch table. Generic W should be for stuff we don't know
228        # anything about.
229        raise NotImplementedError(
230            "Use type-specific constructors, like Rook,"
231            " Queen, DistanceBand, or Kernel"
232        )
233
234    @classmethod
235    def from_WSP(cls, WSP, silence_warnings=True):
236        return WSP2W(WSP, silence_warnings=silence_warnings)
237
238    @classmethod
239    def from_adjlist(
240        cls, adjlist, focal_col="focal", neighbor_col="neighbor", weight_col=None
241    ):
242        """
243        Return an adjacency list representation of a weights object.
244
245        Parameters
246        ----------
247
248        adjlist : pandas.DataFrame
249            Adjacency list with a minimum of two columns.
250        focal_col : str
251            Name of the column with the "source" node ids.
252        neighbor_col : str
253            Name of the column with the "destination" node ids.
254        weight_col : str
255            Name of the column with the weight information. If not provided and
256            the dataframe has no column named "weight" then all weights
257            are assumed to be 1.
258        """
259        if weight_col is None:
260            weight_col = "weight"
261        try_weightcol = getattr(adjlist, weight_col)
262        if try_weightcol is None:
263            adjlist = adjlist.copy(deep=True)
264            adjlist["weight"] = 1
265        all_ids = set(adjlist[focal_col].tolist())
266        all_ids |= set(adjlist[neighbor_col].tolist())
267        grouper = adjlist.groupby(focal_col)
268        neighbors = grouper[neighbor_col].apply(list).to_dict()
269        weights = grouper[weight_col].apply(list).to_dict()
270        neighbors.update({k: [] for k in all_ids.difference(list(neighbors.keys()))})
271        weights.update({k: [] for k in all_ids.difference(list(weights.keys()))})
272        return cls(neighbors=neighbors, weights=weights)
273
274    def to_adjlist(
275        self,
276        remove_symmetric=False,
277        focal_col="focal",
278        neighbor_col="neighbor",
279        weight_col="weight",
280    ):
281        """
282        Compute an adjacency list representation of a weights object.
283
284        Parameters
285        ----------
286        remove_symmetric : bool
287            Whether or not to remove symmetric entries. If the ``W``
288            is symmetric, a standard directed adjacency list will contain
289            both the forward and backward links by default because adjacency
290            lists are a directed graph representation. If this is ``True``,
291            a ``W`` created from this adjacency list **MAY NOT BE THE SAME**
292            as the original ``W``. If you would like to consider (1,2) and
293            (2,1) as distinct links, leave this as ``False``.
294        focal_col : str
295            Name of the column in which to store "source" node ids.
296        neighbor_col : str
297            Name of the column in which to store "destination" node ids.
298        weight_col : str
299            Name of the column in which to store weight information.
300        """
301        try:
302            import pandas as pd
303        except ImportError:
304            raise ImportError("pandas must be installed to use this method")
305        n_islands = len(self.islands)
306        if n_islands > 0 and (not self.silence_warnings):
307            warnings.warn(
308                "{} islands in this weights matrix. Conversion to an "
309                "adjacency list will drop these observations!"
310            )
311        adjlist = pd.DataFrame(
312            ((idx, n, w) for idx, neighb in self for n, w in list(neighb.items())),
313            columns=("focal", "neighbor", "weight"),
314        )
315        return adjtools.filter_adjlist(adjlist) if remove_symmetric else adjlist
316
317    def to_networkx(self):
318        """Convert a weights object to a ``networkx`` graph.
319
320        Returns
321        -------
322        A ``networkx`` graph representation of the ``W`` object.
323        """
324        try:
325            import networkx as nx
326        except ImportError:
327            raise ImportError("NetworkX is required to use this function.")
328        G = nx.DiGraph() if len(self.asymmetries) > 0 else nx.Graph()
329        return nx.from_scipy_sparse_matrix(self.sparse, create_using=G)
330
331    @classmethod
332    def from_networkx(cls, graph, weight_col="weight"):
333        """Convert a ``networkx`` graph to a PySAL ``W`` object.
334
335        Parameters
336        ----------
337        graph : networkx.Graph
338            The graph to convert to a ``W``.
339        weight_col : string
340            If the graph is labeled, this should be the name of the field
341            to use as the weight for the ``W``.
342
343        Returns
344        -------
345        w : libpysal.weights.W
346            A ``W`` object containing the same graph as the ``networkx`` graph.
347        """
348        try:
349            import networkx as nx
350        except ImportError:
351            raise ImportError("NetworkX is required to use this function.")
352        sparse_matrix = nx.to_scipy_sparse_matrix(graph)
353        w = WSP(sparse_matrix).to_W()
354        return w
355
356    @property
357    def sparse(self):
358        """Sparse matrix object. For any matrix manipulations required for w,
359        ``w.sparse`` should be used. This is based on ``scipy.sparse``.
360        """
361        if "sparse" not in self._cache:
362            self._sparse = self._build_sparse()
363            self._cache["sparse"] = self._sparse
364        return self._sparse
365
366    @property
367    def n_components(self):
368        """Store whether the adjacency matrix is fully connected.
369        """
370        if "n_components" not in self._cache:
371            self._n_components, self._component_labels = connected_components(
372                self.sparse
373            )
374            self._cache["n_components"] = self._n_components
375            self._cache["component_labels"] = self._component_labels
376        return self._n_components
377
378    @property
379    def component_labels(self):
380        """Store the graph component in which each observation falls.
381        """
382        if "component_labels" not in self._cache:
383            self._n_components, self._component_labels = connected_components(
384                self.sparse
385            )
386            self._cache["n_components"] = self._n_components
387            self._cache["component_labels"] = self._component_labels
388        return self._component_labels
389
390    def _build_sparse(self):
391        """Construct the sparse attribute.
392        """
393
394        row = []
395        col = []
396        data = []
397        id2i = self.id2i
398        for i, neigh_list in list(self.neighbor_offsets.items()):
399            card = self.cardinalities[i]
400            row.extend([id2i[i]] * card)
401            col.extend(neigh_list)
402            data.extend(self.weights[i])
403        row = np.array(row)
404        col = np.array(col)
405        data = np.array(data)
406        s = scipy.sparse.csr_matrix((data, (row, col)), shape=(self.n, self.n))
407        return s
408
409    @property
410    def id2i(self):
411        """Dictionary where the key is an ID and the value is that ID's
412        index in ``W.id_order``.
413        """
414        if "id2i" not in self._cache:
415            self._id2i = {}
416            for i, id_i in enumerate(self._id_order):
417                self._id2i[id_i] = i
418            self._id2i = self._id2i
419            self._cache["id2i"] = self._id2i
420        return self._id2i
421
422    @property
423    def n(self):
424        """Number of units.
425        """
426        if "n" not in self._cache:
427            self._n = len(self.neighbors)
428            self._cache["n"] = self._n
429        return self._n
430
431    @property
432    def s0(self):
433        r"""``s0`` is defined as
434
435        .. math::
436
437               s0=\sum_i \sum_j w_{i,j}
438
439        """
440        if "s0" not in self._cache:
441            self._s0 = self.sparse.sum()
442            self._cache["s0"] = self._s0
443        return self._s0
444
445    @property
446    def s1(self):
447        r"""``s1`` is defined as
448
449        .. math::
450
451               s1=1/2 \sum_i \sum_j \Big(w_{i,j} + w_{j,i}\Big)^2
452
453        """
454        if "s1" not in self._cache:
455            t = self.sparse.transpose()
456            t = t + self.sparse
457            t2 = t.multiply(t)  # element-wise square
458            self._s1 = t2.sum() / 2.0
459            self._cache["s1"] = self._s1
460        return self._s1
461
462    @property
463    def s2array(self):
464        """Individual elements comprising ``s2``.
465
466        See Also
467        --------
468        s2
469
470        """
471        if "s2array" not in self._cache:
472            s = self.sparse
473            self._s2array = np.array(s.sum(1) + s.sum(0).transpose()) ** 2
474            self._cache["s2array"] = self._s2array
475        return self._s2array
476
477    @property
478    def s2(self):
479        r"""``s2`` is defined as
480
481        .. math::
482
483                s2=\sum_j \Big(\sum_i w_{i,j} + \sum_i w_{j,i}\Big)^2
484
485        """
486        if "s2" not in self._cache:
487            self._s2 = self.s2array.sum()
488            self._cache["s2"] = self._s2
489        return self._s2
490
491    @property
492    def trcW2(self):
493        """Trace of :math:`WW`.
494
495        See Also
496        --------
497        diagW2
498
499        """
500        if "trcW2" not in self._cache:
501            self._trcW2 = self.diagW2.sum()
502            self._cache["trcw2"] = self._trcW2
503        return self._trcW2
504
505    @property
506    def diagW2(self):
507        """Diagonal of :math:`WW`.
508
509        See Also
510        --------
511        trcW2
512
513        """
514        if "diagw2" not in self._cache:
515            self._diagW2 = (self.sparse * self.sparse).diagonal()
516            self._cache["diagW2"] = self._diagW2
517        return self._diagW2
518
519    @property
520    def diagWtW(self):
521        """Diagonal of :math:`W^{'}W`.
522
523        See Also
524        --------
525        trcWtW
526
527        """
528        if "diagWtW" not in self._cache:
529            self._diagWtW = (self.sparse.transpose() * self.sparse).diagonal()
530            self._cache["diagWtW"] = self._diagWtW
531        return self._diagWtW
532
533    @property
534    def trcWtW(self):
535        """Trace of :math:`W^{'}W`.
536
537        See Also
538        --------
539        diagWtW
540
541        """
542        if "trcWtW" not in self._cache:
543            self._trcWtW = self.diagWtW.sum()
544            self._cache["trcWtW"] = self._trcWtW
545        return self._trcWtW
546
547    @property
548    def diagWtW_WW(self):
549        """Diagonal of :math:`W^{'}W + WW`.
550        """
551        if "diagWtW_WW" not in self._cache:
552            wt = self.sparse.transpose()
553            w = self.sparse
554            self._diagWtW_WW = (wt * w + w * w).diagonal()
555            self._cache["diagWtW_WW"] = self._diagWtW_WW
556        return self._diagWtW_WW
557
558    @property
559    def trcWtW_WW(self):
560        """Trace of :math:`W^{'}W + WW`.
561        """
562        if "trcWtW_WW" not in self._cache:
563            self._trcWtW_WW = self.diagWtW_WW.sum()
564            self._cache["trcWtW_WW"] = self._trcWtW_WW
565        return self._trcWtW_WW
566
567    @property
568    def pct_nonzero(self):
569        """Percentage of nonzero weights.
570        """
571        if "pct_nonzero" not in self._cache:
572            self._pct_nonzero = 100.0 * self.sparse.nnz / (1.0 * self._n ** 2)
573            self._cache["pct_nonzero"] = self._pct_nonzero
574        return self._pct_nonzero
575
576    @property
577    def cardinalities(self):
578        """Number of neighbors for each observation.
579        """
580        if "cardinalities" not in self._cache:
581            c = {}
582            for i in self._id_order:
583                c[i] = len(self.neighbors[i])
584            self._cardinalities = c
585            self._cache["cardinalities"] = self._cardinalities
586        return self._cardinalities
587
588    @property
589    def max_neighbors(self):
590        """Largest number of neighbors.
591        """
592        if "max_neighbors" not in self._cache:
593            self._max_neighbors = max(self.cardinalities.values())
594            self._cache["max_neighbors"] = self._max_neighbors
595        return self._max_neighbors
596
597    @property
598    def mean_neighbors(self):
599        """Average number of neighbors.
600        """
601        if "mean_neighbors" not in self._cache:
602            self._mean_neighbors = np.mean(list(self.cardinalities.values()))
603            self._cache["mean_neighbors"] = self._mean_neighbors
604        return self._mean_neighbors
605
606    @property
607    def min_neighbors(self):
608        """Minimum number of neighbors.
609        """
610        if "min_neighbors" not in self._cache:
611            self._min_neighbors = min(self.cardinalities.values())
612            self._cache["min_neighbors"] = self._min_neighbors
613        return self._min_neighbors
614
615    @property
616    def nonzero(self):
617        """Number of nonzero weights.
618        """
619        if "nonzero" not in self._cache:
620            self._nonzero = self.sparse.nnz
621            self._cache["nonzero"] = self._nonzero
622        return self._nonzero
623
624    @property
625    def sd(self):
626        """Standard deviation of number of neighbors.
627        """
628        if "sd" not in self._cache:
629            self._sd = np.std(list(self.cardinalities.values()))
630            self._cache["sd"] = self._sd
631        return self._sd
632
633    @property
634    def asymmetries(self):
635        """List of id pairs with asymmetric weights.
636        """
637        if "asymmetries" not in self._cache:
638            self._asymmetries = self.asymmetry()
639            self._cache["asymmetries"] = self._asymmetries
640        return self._asymmetries
641
642    @property
643    def islands(self):
644        """List of ids without any neighbors.
645        """
646        if "islands" not in self._cache:
647            self._islands = [i for i, c in list(self.cardinalities.items()) if c == 0]
648            self._cache["islands"] = self._islands
649        return self._islands
650
651    @property
652    def histogram(self):
653        """Cardinality histogram as a dictionary where key is the id and
654        value is the number of neighbors for that unit.
655        """
656        if "histogram" not in self._cache:
657            ct, bin = np.histogram(
658                list(self.cardinalities.values()),
659                list(range(self.min_neighbors, self.max_neighbors + 2)),
660            )
661            self._histogram = list(zip(bin, ct))
662            self._cache["histogram"] = self._histogram
663        return self._histogram
664
665    def __getitem__(self, key):
666        """Allow a dictionary like interaction with the weights class.
667
668        Examples
669        --------
670        >>> from libpysal.weights import lat2W
671        >>> w = lat2W()
672
673        >>> w[0] == dict({1: 1.0, 5: 1.0})
674        True
675        """
676        return dict(list(zip(self.neighbors[key], self.weights[key])))
677
678    def __iter__(self):
679        """
680        Support iteration over weights.
681
682        Examples
683        --------
684        >>> from libpysal.weights import lat2W
685        >>> w=lat2W(3,3)
686        >>> for i,wi in enumerate(w):
687        ...     print(i,wi[0])
688        ...
689        0 0
690        1 1
691        2 2
692        3 3
693        4 4
694        5 5
695        6 6
696        7 7
697        8 8
698        >>>
699        """
700        for i in self._id_order:
701            yield i, dict(list(zip(self.neighbors[i], self.weights[i])))
702
703    def remap_ids(self, new_ids):
704        """
705        In place modification throughout ``W`` of id values from
706        ``w.id_order`` to ``new_ids`` in all.
707
708        Parameters
709        ----------
710
711        new_ids : list, numpy.ndarray
712            Aligned list of new ids to be inserted. Note that first
713            element of ``new_ids`` will replace first element of
714            ``w.id_order``, second element of ``new_ids`` replaces second
715            element of ``w.id_order`` and so on.
716
717        Examples
718        --------
719
720        >>> from libpysal.weights import lat2W
721        >>> w = lat2W(3, 3)
722        >>> w.id_order
723        [0, 1, 2, 3, 4, 5, 6, 7, 8]
724        >>> w.neighbors[0]
725        [3, 1]
726        >>> new_ids = ['id%i'%id for id in w.id_order]
727        >>> _ = w.remap_ids(new_ids)
728        >>> w.id_order
729        ['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8']
730        >>> w.neighbors['id0']
731        ['id3', 'id1']
732        """
733
734        old_ids = self._id_order
735        if len(old_ids) != len(new_ids):
736            raise Exception(
737                "W.remap_ids: length of `old_ids` does not match \
738            that of new_ids"
739            )
740        if len(set(new_ids)) != len(new_ids):
741            raise Exception("W.remap_ids: list `new_ids` contains duplicates")
742        else:
743            new_neighbors = {}
744            new_weights = {}
745            old_transformations = self.transformations["O"].copy()
746            new_transformations = {}
747            for o, n in zip(old_ids, new_ids):
748                o_neighbors = self.neighbors[o]
749                o_weights = self.weights[o]
750                n_neighbors = [new_ids[old_ids.index(j)] for j in o_neighbors]
751                new_neighbors[n] = n_neighbors
752                new_weights[n] = o_weights[:]
753                new_transformations[n] = old_transformations[o]
754            self.neighbors = new_neighbors
755            self.weights = new_weights
756            self.transformations["O"] = new_transformations
757
758            id_order = [self._id_order.index(o) for o in old_ids]
759            for i, id_ in enumerate(id_order):
760                self.id_order[id_] = new_ids[i]
761
762            self._reset()
763
764    def __set_id_order(self, ordered_ids):
765        """Set the iteration order in w. ``W`` can be iterated over. On
766        construction the iteration order is set to the lexicographic order of
767        the keys in the ``w.weights`` dictionary. If a specific order
768        is required it can be set with this method.
769
770        Parameters
771        ----------
772
773        ordered_ids : sequence
774            Identifiers for observations in specified order.
775
776        Notes
777        -----
778
779        The ``ordered_ids`` parameter is checked against the ids implied
780        by the keys in ``w.weights``. If they are not equivalent sets an
781        exception is raised and the iteration order is not changed.
782
783        Examples
784        --------
785
786        >>> from libpysal.weights import lat2W
787        >>> w=lat2W(3,3)
788        >>> for i,wi in enumerate(w):
789        ...     print(i, wi[0])
790        ...
791        0 0
792        1 1
793        2 2
794        3 3
795        4 4
796        5 5
797        6 6
798        7 7
799        8 8
800        >>> w.id_order
801        [0, 1, 2, 3, 4, 5, 6, 7, 8]
802        >>> w.id_order=range(8,-1,-1)
803        >>> list(w.id_order)
804        [8, 7, 6, 5, 4, 3, 2, 1, 0]
805        >>> for i,w_i in enumerate(w):
806        ...     print(i,w_i[0])
807        ...
808        0 8
809        1 7
810        2 6
811        3 5
812        4 4
813        5 3
814        6 2
815        7 1
816        8 0
817
818        """
819
820        if set(self._id_order) == set(ordered_ids):
821            self._id_order = ordered_ids
822            self._id_order_set = True
823            self._reset()
824        else:
825            raise Exception("ordered_ids do not align with W ids")
826
827    def __get_id_order(self):
828        """Returns the ids for the observations in the order in which they
829        would be encountered if iterating over the weights.
830        """
831        return self._id_order
832
833    id_order = property(__get_id_order, __set_id_order)
834
835    @property
836    def id_order_set(self):
837        """ Returns ``True`` if user has set ``id_order``, ``False`` if not.
838
839        Examples
840        --------
841        >>> from libpysal.weights import lat2W
842        >>> w=lat2W()
843        >>> w.id_order_set
844        True
845        """
846        return self._id_order_set
847
848    @property
849    def neighbor_offsets(self):
850        """
851        Given the current ``id_order``, ``neighbor_offsets[id]`` is the
852        offsets of the id's neighbors in ``id_order``.
853
854        Returns
855        -------
856        neighbor_list : list
857            Offsets of the id's neighbors in ``id_order``.
858
859        Examples
860        --------
861        >>> from libpysal.weights import W
862        >>> neighbors={'c': ['b'], 'b': ['c', 'a'], 'a': ['b']}
863        >>> weights ={'c': [1.0], 'b': [1.0, 1.0], 'a': [1.0]}
864        >>> w=W(neighbors,weights)
865        >>> w.id_order = ['a','b','c']
866        >>> w.neighbor_offsets['b']
867        [2, 0]
868        >>> w.id_order = ['b','a','c']
869        >>> w.neighbor_offsets['b']
870        [2, 1]
871        """
872
873        if "neighbors_0" not in self._cache:
874            self.__neighbors_0 = {}
875            id2i = self.id2i
876            for j, neigh_list in list(self.neighbors.items()):
877                self.__neighbors_0[j] = [id2i[neigh] for neigh in neigh_list]
878            self._cache["neighbors_0"] = self.__neighbors_0
879
880        neighbor_list = self.__neighbors_0
881
882        return neighbor_list
883
884    def get_transform(self):
885        """Getter for transform property.
886
887        Returns
888        -------
889        transformation : str, None
890            Valid transformation value. See the ``transform``
891            parameters in ``set_transform()`` for a detailed description.
892
893        Examples
894        --------
895        >>> from libpysal.weights import lat2W
896        >>> w=lat2W()
897        >>> w.weights[0]
898        [1.0, 1.0]
899        >>> w.transform
900        'O'
901        >>> w.transform='r'
902        >>> w.weights[0]
903        [0.5, 0.5]
904        >>> w.transform='b'
905        >>> w.weights[0]
906        [1.0, 1.0]
907
908        See also
909        --------
910        set_transform
911
912        """
913
914        return self._transform
915
916    def set_transform(self, value="B"):
917        """Transformations of weights.
918
919        Parameters
920        ----------
921        transform : str
922            This parameter is not case sensitive. The following are
923            valid transformations.
924
925            * **B** -- Binary
926            * **R** -- Row-standardization (global sum :math:`=n`)
927            * **D** -- Double-standardization (global sum :math:`=1`)
928            * **V** -- Variance stabilizing
929            * **O** -- Restore original transformation (from instantiation)
930
931        Notes
932        -----
933
934        Transformations are applied only to the value of the weights at
935        instantiation. Chaining of transformations cannot be done on a ``W``
936        instance.
937
938
939        Examples
940        --------
941        >>> from libpysal.weights import lat2W
942        >>> w=lat2W()
943        >>> w.weights[0]
944        [1.0, 1.0]
945        >>> w.transform
946        'O'
947        >>> w.transform='r'
948        >>> w.weights[0]
949        [0.5, 0.5]
950        >>> w.transform='b'
951        >>> w.weights[0]
952        [1.0, 1.0]
953        """
954        value = value.upper()
955        self._transform = value
956        if value in self.transformations:
957            self.weights = self.transformations[value]
958            self._reset()
959        else:
960            if value == "R":
961                # row standardized weights
962                weights = {}
963                self.weights = self.transformations["O"]
964                for i in self.weights:
965                    wijs = self.weights[i]
966                    row_sum = sum(wijs) * 1.0
967                    if row_sum == 0.0:
968                        if not self.silence_warnings:
969                            print(("WARNING: ", i, " is an island (no neighbors)"))
970                    weights[i] = [wij / row_sum for wij in wijs]
971                weights = weights
972                self.transformations[value] = weights
973                self.weights = weights
974                self._reset()
975            elif value == "D":
976                # doubly-standardized weights
977                # update current chars before doing global sum
978                self._reset()
979                s0 = self.s0
980                ws = 1.0 / s0
981                weights = {}
982                self.weights = self.transformations["O"]
983                for i in self.weights:
984                    wijs = self.weights[i]
985                    weights[i] = [wij * ws for wij in wijs]
986                weights = weights
987                self.transformations[value] = weights
988                self.weights = weights
989                self._reset()
990            elif value == "B":
991                # binary transformation
992                weights = {}
993                self.weights = self.transformations["O"]
994                for i in self.weights:
995                    wijs = self.weights[i]
996                    weights[i] = [1.0 for wij in wijs]
997                weights = weights
998                self.transformations[value] = weights
999                self.weights = weights
1000                self._reset()
1001            elif value == "V":
1002                # variance stabilizing
1003                weights = {}
1004                q = {}
1005                k = self.cardinalities
1006                s = {}
1007                Q = 0.0
1008                self.weights = self.transformations["O"]
1009                for i in self.weights:
1010                    wijs = self.weights[i]
1011                    q[i] = math.sqrt(sum([wij * wij for wij in wijs]))
1012                    s[i] = [wij / q[i] for wij in wijs]
1013                    Q += sum([si for si in s[i]])
1014                nQ = self.n / Q
1015                for i in self.weights:
1016                    weights[i] = [w * nQ for w in s[i]]
1017                weights = weights
1018                self.transformations[value] = weights
1019                self.weights = weights
1020                self._reset()
1021            elif value == "O":
1022                # put weights back to original transformation
1023                weights = {}
1024                original = self.transformations[value]
1025                self.weights = original
1026                self._reset()
1027            else:
1028                raise Exception("unsupported weights transformation")
1029
1030    transform = property(get_transform, set_transform)
1031
1032    def asymmetry(self, intrinsic=True):
1033        r"""
1034        Asymmetry check.
1035
1036        Parameters
1037        ----------
1038        intrinsic : bool
1039            Default is ``True``. Intrinsic symmetry is defined as
1040
1041            .. math::
1042
1043                w_{i,j} == w_{j,i}
1044
1045            If ``intrinsic`` is ``False`` symmetry is defined as
1046
1047            .. math::
1048
1049                i \in N_j \ \& \ j \in N_i
1050
1051            where :math:`N_j` is the set of neighbors for :math:`j`.
1052
1053        Returns
1054        -------
1055        asymmetries : list
1056            Empty if no asymmetries are found if asymmetries, then a
1057            ``list`` of ``(i,j)`` tuples is returned.
1058
1059        Examples
1060        --------
1061
1062        >>> from libpysal.weights import lat2W
1063        >>> w=lat2W(3,3)
1064        >>> w.asymmetry()
1065        []
1066        >>> w.transform='r'
1067        >>> w.asymmetry()
1068        [(0, 1), (0, 3), (1, 0), (1, 2), (1, 4), (2, 1), (2, 5), (3, 0), (3, 4), (3, 6), (4, 1), (4, 3), (4, 5), (4, 7), (5, 2), (5, 4), (5, 8), (6, 3), (6, 7), (7, 4), (7, 6), (7, 8), (8, 5), (8, 7)]
1069        >>> result = w.asymmetry(intrinsic=False)
1070        >>> result
1071        []
1072        >>> neighbors={0:[1,2,3], 1:[1,2,3], 2:[0,1], 3:[0,1]}
1073        >>> weights={0:[1,1,1], 1:[1,1,1], 2:[1,1], 3:[1,1]}
1074        >>> w=W(neighbors,weights)
1075        >>> w.asymmetry()
1076        [(0, 1), (1, 0)]
1077        """
1078
1079        if intrinsic:
1080            wd = self.sparse.transpose() - self.sparse
1081        else:
1082            transform = self.transform
1083            self.transform = "b"
1084            wd = self.sparse.transpose() - self.sparse
1085            self.transform = transform
1086
1087        ids = np.nonzero(wd)
1088        if len(ids[0]) == 0:
1089            return []
1090        else:
1091            ijs = list(zip(ids[0], ids[1]))
1092            ijs.sort()
1093            return ijs
1094
1095    def symmetrize(self, inplace=False):
1096        """Construct a symmetric KNN weight. This ensures that the neighbors
1097        of each focal observation consider the focal observation itself as
1098        a neighbor. This returns a generic ``W`` object, since the object is no
1099        longer guaranteed to have ``k`` neighbors for each observation.
1100        """
1101        if not inplace:
1102            neighbors = copy.deepcopy(self.neighbors)
1103            weights = copy.deepcopy(self.weights)
1104            out_W = W(neighbors, weights, id_order=self.id_order)
1105            out_W.symmetrize(inplace=True)
1106            return out_W
1107        else:
1108            for focal, fneighbs in list(self.neighbors.items()):
1109                for j, neighbor in enumerate(fneighbs):
1110                    neighb_neighbors = self.neighbors[neighbor]
1111                    if focal not in neighb_neighbors:
1112                        self.neighbors[neighbor].append(focal)
1113                        self.weights[neighbor].append(self.weights[focal][j])
1114            self._cache = dict()
1115            return
1116
1117    def full(self):
1118        """Generate a full ``numpy.ndarray``.
1119
1120        Parameters
1121        ----------
1122        self : libpysal.weights.W
1123            spatial weights object
1124
1125        Returns
1126        -------
1127        (fullw, keys) : tuple
1128            The first element being the full ``numpy.ndarray`` and second
1129            element keys being the ids associated with each row in the array.
1130
1131        Examples
1132        --------
1133        >>> from libpysal.weights import W, full
1134        >>> neighbors = {'first':['second'],'second':['first','third'],'third':['second']}
1135        >>> weights = {'first':[1],'second':[1,1],'third':[1]}
1136        >>> w = W(neighbors, weights)
1137        >>> wf, ids = full(w)
1138        >>> wf
1139        array([[0., 1., 0.],
1140               [1., 0., 1.],
1141               [0., 1., 0.]])
1142        >>> ids
1143        ['first', 'second', 'third']
1144        """
1145        wfull = np.zeros([self.n, self.n], dtype=float)
1146        keys = list(self.neighbors.keys())
1147        if self.id_order:
1148            keys = self.id_order
1149        for i, key in enumerate(keys):
1150            n_i = self.neighbors[key]
1151            w_i = self.weights[key]
1152            for j, wij in zip(n_i, w_i):
1153                c = keys.index(j)
1154                wfull[i, c] = wij
1155        return (wfull, keys)
1156
1157    def to_WSP(self):
1158        """Generate a ``WSP`` object.
1159
1160        Returns
1161        -------
1162
1163        implicit : libpysal.weights.WSP
1164            Thin ``W`` class
1165
1166        Examples
1167        --------
1168        >>> from libpysal.weights import W, WSP
1169        >>> neighbors={'first':['second'],'second':['first','third'],'third':['second']}
1170        >>> weights={'first':[1],'second':[1,1],'third':[1]}
1171        >>> w=W(neighbors,weights)
1172        >>> wsp=w.to_WSP()
1173        >>> isinstance(wsp, WSP)
1174        True
1175        >>> wsp.n
1176        3
1177        >>> wsp.s0
1178        4
1179
1180        See also
1181        --------
1182        WSP
1183
1184        """
1185        return WSP(self.sparse, self._id_order)
1186
1187    def set_shapefile(self, shapefile, idVariable=None, full=False):
1188        """
1189        Adding metadata for writing headers of ``.gal`` and ``.gwt`` files.
1190
1191        Parameters
1192        ----------
1193        shapefile : str
1194            The shapefile name used to construct weights.
1195        idVariable : str
1196            The name of the attribute in the shapefile to associate
1197            with ids in the weights.
1198        full : bool
1199            Write out the entire path for a shapefile (``True``) or
1200            only the base of the shapefile without extension (``False``).
1201            Default is ``True``.
1202        """
1203
1204        if full:
1205            self._shpName = shapefile
1206        else:
1207            self._shpName = BASENAME(shapefile).split(".")[0]
1208
1209        self._varName = idVariable
1210
1211    def plot(
1212        self, gdf, indexed_on=None, ax=None, color="k", node_kws=None, edge_kws=None
1213    ):
1214        """Plot spatial weights objects. **Requires** ``matplotlib``, and
1215        implicitly requires a ``geopandas.GeoDataFrame`` as input.
1216
1217        Parameters
1218        ----------
1219        gdf : geopandas.GeoDataFrame
1220            The original shapes whose topological relations are modelled in ``W``.
1221        indexed_on : str
1222            Column of ``geopandas.GeoDataFrame`` that the weights object uses
1223            as an index. Default is ``None``, so the index of the
1224            ``geopandas.GeoDataFrame`` is used.
1225        ax : matplotlib.axes.Axes
1226            Axis on which to plot the weights. Default is ``None``, so
1227            plots on the current figure.
1228        color : str
1229            ``matplotlib`` color string, will color both nodes and edges
1230            the same by default.
1231        node_kws : dict
1232            Keyword arguments dictionary to send to ``pyplot.scatter``,
1233            which provides fine-grained control over the aesthetics
1234            of the nodes in the plot.
1235        edge_kws : dict
1236            Keyword arguments dictionary to send to ``pyplot.plot``,
1237            which provides fine-grained control over the aesthetics
1238            of the edges in the plot.
1239
1240        Returns
1241        -------
1242        f : matplotlib.figure.Figure
1243            Figure on which the plot is made.
1244        ax : matplotlib.axes.Axes
1245            Axis on which the plot is made.
1246
1247        Notes
1248        -----
1249        If you'd like to overlay the actual shapes from the
1250        ``geopandas.GeoDataFrame``, call ``gdf.plot(ax=ax)`` after this.
1251        To plot underneath, adjust the z-order of the plot as follows:
1252        ``gdf.plot(ax=ax,zorder=0)``.
1253
1254        Examples
1255        --------
1256
1257        >>> from libpysal.weights import Queen
1258        >>> import libpysal as lp
1259        >>> import geopandas
1260        >>> gdf = geopandas.read_file(lp.examples.get_path("columbus.shp"))
1261        >>> weights = Queen.from_dataframe(gdf)
1262        >>> tmp = weights.plot(gdf, color='firebrickred', node_kws=dict(marker='*', color='k'))
1263        """
1264        try:
1265            import matplotlib.pyplot as plt
1266        except ImportError:
1267            raise ImportError(
1268                "W.plot depends on matplotlib.pyplot, and this was"
1269                "not able to be imported. \nInstall matplotlib to"
1270                "plot spatial weights."
1271            )
1272        if ax is None:
1273            f = plt.figure()
1274            ax = plt.gca()
1275        else:
1276            f = plt.gcf()
1277        if node_kws is not None:
1278            if "color" not in node_kws:
1279                node_kws["color"] = color
1280        else:
1281            node_kws = dict(color=color)
1282        if edge_kws is not None:
1283            if "color" not in edge_kws:
1284                edge_kws["color"] = color
1285        else:
1286            edge_kws = dict(color=color)
1287
1288        for idx, neighbors in self:
1289            if idx in self.islands:
1290                continue
1291            if indexed_on is not None:
1292                neighbors = gdf[gdf[indexed_on].isin(neighbors)].index.tolist()
1293                idx = gdf[gdf[indexed_on] == idx].index.tolist()[0]
1294            centroids = gdf.loc[neighbors].centroid.apply(lambda p: (p.x, p.y))
1295            centroids = np.vstack(centroids.values)
1296            focal = np.hstack(gdf.loc[idx].geometry.centroid.xy)
1297            seen = set()
1298            for nidx, neighbor in zip(neighbors, centroids):
1299                if (idx, nidx) in seen:
1300                    continue
1301                ax.plot(*list(zip(focal, neighbor)), marker=None, **edge_kws)
1302                seen.update((idx, nidx))
1303                seen.update((nidx, idx))
1304        ax.scatter(
1305            gdf.centroid.apply(lambda p: p.x),
1306            gdf.centroid.apply(lambda p: p.y),
1307            **node_kws
1308        )
1309        return f, ax
1310
1311
1312class WSP(object):
1313    """Thin ``W`` class for ``spreg``.
1314
1315    Parameters
1316    ----------
1317
1318    sparse : scipy.sparse.{matrix-type}
1319        NxN object from ``scipy.sparse``
1320
1321    Attributes
1322    ----------
1323
1324    n           : int
1325                  description
1326    s0          : float
1327                  description
1328    trcWtW_WW   : float
1329                  description
1330
1331    Examples
1332    --------
1333
1334    From GAL information
1335
1336    >>> import scipy.sparse
1337    >>> from libpysal.weights import WSP
1338    >>> rows = [0, 1, 1, 2, 2, 3]
1339    >>> cols = [1, 0, 2, 1, 3, 3]
1340    >>> weights =  [1, 0.75, 0.25, 0.9, 0.1, 1]
1341    >>> sparse = scipy.sparse.csr_matrix((weights, (rows, cols)), shape=(4,4))
1342    >>> w = WSP(sparse)
1343    >>> w.s0
1344    4.0
1345    >>> w.trcWtW_WW
1346    6.395
1347    >>> w.n
1348    4
1349
1350    """
1351
1352    def __init__(self, sparse, id_order=None, index=None):
1353        if not scipy.sparse.issparse(sparse):
1354            raise ValueError("must pass a scipy sparse object")
1355        rows, cols = sparse.shape
1356        if rows != cols:
1357            raise ValueError("Weights object must be square")
1358        self.sparse = sparse.tocsr()
1359        self.n = sparse.shape[0]
1360        self._cache = {}
1361        if id_order:
1362            if len(id_order) != self.n:
1363                raise ValueError(
1364                    "Number of values in id_order must match shape of sparse"
1365                )
1366            else:
1367                self._id_order = id_order
1368                self._cache["id_order"] = self._id_order
1369        # temp addition of index attribute
1370        import pandas as pd  # will be removed after refactoring is done
1371        if index is not None:
1372            if not isinstance(index, (pd.Index, pd.MultiIndex, pd.RangeIndex)):
1373                raise TypeError("index must be an instance of pandas.Index dtype")
1374            if len(index) != self.n:
1375                raise ValueError(
1376                    "Number of values in index must match shape of sparse"
1377                )
1378        else:
1379            index = pd.RangeIndex(self.n)
1380        self.index = index
1381
1382    @property
1383    def id_order(self):
1384        """An ordered list of ids, assumed to match the ordering in ``sparse``.
1385        """
1386        # Temporary solution until the refactoring is finished
1387        if "id_order" not in self._cache:
1388            if hasattr(self, "index"):
1389                self._id_order = self.index.tolist()
1390            else:
1391                self._id_order = list(range(self.n))
1392            self._cache["id_order"] = self._id_order
1393        return self._id_order
1394
1395    @property
1396    def s0(self):
1397        r"""``s0`` is defined as:
1398
1399        .. math::
1400
1401               s0=\sum_i \sum_j w_{i,j}
1402
1403        """
1404        if "s0" not in self._cache:
1405            self._s0 = self.sparse.sum()
1406            self._cache["s0"] = self._s0
1407        return self._s0
1408
1409    @property
1410    def trcWtW_WW(self):
1411        """Trace of :math:`W^{'}W + WW`.
1412        """
1413        if "trcWtW_WW" not in self._cache:
1414            self._trcWtW_WW = self.diagWtW_WW.sum()
1415            self._cache["trcWtW_WW"] = self._trcWtW_WW
1416        return self._trcWtW_WW
1417
1418    @property
1419    def diagWtW_WW(self):
1420        """Diagonal of :math:`W^{'}W + WW`.
1421        """
1422        if "diagWtW_WW" not in self._cache:
1423            wt = self.sparse.transpose()
1424            w = self.sparse
1425            self._diagWtW_WW = (wt * w + w * w).diagonal()
1426            self._cache["diagWtW_WW"] = self._diagWtW_WW
1427        return self._diagWtW_WW
1428
1429    @classmethod
1430    def from_W(cls, W):
1431        """Constructs a ``WSP`` object from the ``W``'s sparse matrix.
1432
1433        Parameters
1434        ----------
1435        W : libpysal.weights.W
1436            A PySAL weights object with a sparse form and ids.
1437
1438        Returns
1439        -------
1440        A ``WSP`` instance.
1441        """
1442        return cls(W.sparse, id_order=W.id_order)
1443
1444    def to_W(self, silence_warnings=False):
1445        """
1446        Convert a pysal WSP object (thin weights matrix) to a pysal W object.
1447
1448        Parameters
1449        ----------
1450        self : WSP
1451            PySAL sparse weights object.
1452        silence_warnings : bool
1453            Switch to ``True`` to turn off print statements for every
1454            observation with islands. Default is ``False``, which does
1455            not silence warnings.
1456
1457        Returns
1458        -------
1459        w : W
1460            PySAL weights object.
1461
1462        Examples
1463        --------
1464        >>> from libpysal.weights import lat2SW, WSP, WSP2W
1465
1466        Build a 10x10 ``scipy.sparse`` matrix for a rectangular 2x5
1467        region of cells (rook contiguity), then construct a ``libpysal``
1468        sparse weights object (``self``).
1469
1470        >>> sp = lat2SW(2, 5)
1471        >>> self = WSP(sp)
1472        >>> self.n
1473        10
1474        >>> print(self.sparse[0].todense())
1475        [[0 1 0 0 0 1 0 0 0 0]]
1476
1477        Convert this sparse weights object to a standard PySAL weights object.
1478
1479        >>> w = WSP2W(self)
1480        >>> w.n
1481        10
1482        >>> print(w.full()[0][0])
1483        [0. 1. 0. 0. 0. 1. 0. 0. 0. 0.]
1484
1485        """
1486
1487        indices = list(self.sparse.indices)
1488        data = list(self.sparse.data)
1489        indptr = list(self.sparse.indptr)
1490        id_order = self.id_order
1491        if id_order:
1492            # replace indices with user IDs
1493            indices = [id_order[i] for i in indices]
1494        else:
1495            id_order = list(range(self.n))
1496        neighbors, weights = {}, {}
1497        start = indptr[0]
1498        for i in range(self.n):
1499            oid = id_order[i]
1500            end = indptr[i + 1]
1501            neighbors[oid] = indices[start:end]
1502            weights[oid] = data[start:end]
1503            start = end
1504        ids = copy.copy(self.id_order)
1505        w = W(neighbors, weights, ids, silence_warnings=silence_warnings)
1506        w._sparse = copy.deepcopy(self.sparse)
1507        w._cache["sparse"] = w._sparse
1508        return w
1509