1from collections import defaultdict, OrderedDict
2from itertools import islice
3import copy, os, pickle, warnings
4
5import esda
6import numpy
7
8from .analysis import GlobalAutoK
9from . import util
10from libpysal import cg, examples, weights
11from libpysal.common import requires
12
13try:
14    from libpysal import open
15except ImportError:
16    import libpysal
17
18    open = libpysal.io.open
19
20
21__all__ = ["Network", "PointPattern", "GlobalAutoK"]
22
23SAME_SEGMENT = (-0.1, -0.1)
24
25
26dep_msg = (
27    "The next major release of pysal/spaghetti (2.0.0) will "
28    "drop support for all ``libpysal.cg`` geometries. This change "
29    "is a first step in refactoring ``spaghetti`` that is "
30    "expected to result in dramatically reduced runtimes for "
31    "network instantiation and operations. Users currently "
32    "requiring network and point pattern input as ``libpysal.cg`` "
33    "geometries should prepare for this simply by converting "
34    "to ``shapely`` geometries."
35    )
36warnings.warn(f"{dep_msg}", FutureWarning)
37
38
39class Network:
40    """Spatially-constrained network representation and analytical
41    functionality. Naming conventions are as follows, (1) arcs and
42    vertices for the full network object, and (2) edges and nodes for
43    the simplified graph-theoretic object. The term 'link' is used to
44    refer to a network arc or a graph edge.
45
46    Parameters
47    ----------
48    in_data : {str, iterable (list, tuple, numpy.ndarray), libpysal.cg.Chain, geopandas.GeoDataFrame}
49        The input geographic data. Either (1) a path to a shapefile
50        (str); (2) an iterable containing ``libpysal.cg.Chain``
51        objects; (3) a single ``libpysal.cg.Chain``; or
52        (4) a ``geopandas.GeoDataFrame``.
53    vertex_sig : int
54        Round the x and y coordinates of all vertices to ``vertex_sig``
55        significant digits (combined significant digits on the left and
56        right of the decimal place). Default is 11. Set to ``None`` for
57        no rounding.
58    unique_arcs : bool
59        If ``True`` (default), keep only unique arcs (i.e., prune
60        out any duplicated arcs). If ``False`` keep all segments.
61    extractgraph : bool
62        If ``True``, extract a graph-theoretic object with no degree 2
63        nodes. Default is ``True``.
64    w_components : bool
65        Set to ``False`` to not record connected components from a
66        ``libpysal.weights.W`` object. Default is ``True``.
67    weightings : {dict, bool}
68        If dict, lists of weightings for each arc. If bool,
69        ``True`` flags ``self.arc_lengths`` as the weightings,
70        ``False`` sets no weightings. Default is ``False``.
71    weights_kws : dict
72        Keyword arguments for ``libpysal.weights.W``.
73    vertex_atol : {int, None}
74        Precision for vertex absolute tolerance. Round vertex coordinates to
75        ``vertex_atol`` decimal places. Default is ``None``. **ONLY** change
76        the default when there are known issues with digitization.
77
78    Attributes
79    ----------
80    adjacencylist : list
81        List of lists storing vertex adjacency.
82    vertex_coords : dict
83        Keys are vertex IDs and values are :math:`(x,y)` coordinates of the vertices.
84    vertex_list : list
85        List of vertex IDs.
86    vertices : dict
87        Keys are tuples of vertex coords and values are the vertex ID.
88    arcs : list
89        List of arcs, where each arc is a sorted tuple
90        of vertex IDs.
91    arc_lengths : dict
92        Keys are tuples of sorted vertex IDs representing an arc and
93        values are the length.
94    pointpatterns : dict
95        Keys are a string name of the pattern and values are
96        ``PointPattern`` class instances.
97    distance_matrix : numpy.ndarray
98        All network vertices (non-observations) distance matrix. Distances
99        between vertices in disparate components are recorded as ``inf``
100        by default.
101    network_trees : dict
102        Keys are the vertex IDs (``int``). Values are dictionaries
103        with the keys being the IDs of the destination vertex
104        and values being lists of vertices along the shortest path.
105        If the destination vertex is a) the origin or b)
106        unreachable (disparate component) it is listed as itself being the
107        neighbor.
108    edges : list
109        Tuples of graph edge IDs.
110    edge_lengths : dict
111        Keys are the graph edge IDs (``tuple``). Values are the
112        graph edge length (``float``).
113    non_articulation_points : list
114        All vertices with degree 2 that are not in an isolated
115        island ring (loop) component.
116    w_network : libpysal.weights.W
117        Weights object created from the network arcs.
118    network_n_components : int
119        Count of connected components in the network.
120    network_fully_connected : bool
121        ``True`` if the network representation is a single connected
122        component, otherwise ``False``.
123    network_component_labels : numpy.ndarray
124        Component labels for network arcs.
125    network_component2arc : dict
126        Lookup in the form {int: list} for arcs comprising network
127        connected components keyed by component labels with arcs in
128        a list as values.
129    network_component_lengths : dict
130        Length of each network component (keyed by component label).
131    network_longest_component : int
132        The ID of the longest component in the network. This is not
133        necessarily equal to ``network_largest_component``.
134    network_component_vertices : dict
135        Lookup in the form {int: list} for vertices comprising network
136        connected components keyed by component labels with vertices in
137        a list as values.
138    network_component_vertex_count : dict
139        The number of vertices in each network component
140        (keyed by component label).
141    network_largest_component : int
142        The ID of the largest component in the network. Within ``spaghetti``
143        the largest component is the one with the most vertices. This is not
144        necessarily equal to ``network_longest_component``.
145    network_component_is_ring : dict
146        Lookup in the form {int: bool} keyed by component labels with values
147        as ``True`` if the component is a closed ring, otherwise ``False``.
148    w_graph : libpysal.weights.W
149        Weights object created from the graph edges.
150    graph_n_components : int
151        Count of connected components in the network.
152    graph_fully_connected : bool
153        ``True`` if the graph representation is a single connected
154        component, otherwise ``False``.
155    graph_component_labels : numpy.ndarray
156        Component labels for graph edges.
157    graph_component2edge : dict
158        Lookup in the form {int: list} for edges comprising graph connected
159        components keyed by component labels with edges in a list
160        as values.
161    graph_component_lengths : dict
162        Length of each graph component (keyed by component label).
163    graph_longest_component : int
164        The ID of the longest component in the graph. This is not
165        necessarily equal to ``graph_largest_component``.
166    graph_component_vertices : dict
167        Lookup in the form {int: list} for vertices comprising graph
168        connected components keyed by component labels with vertices in
169        a list as values.
170    graph_component_vertex_count : dict
171        The number of vertices in each graph component
172        (keyed by component label).
173    graph_largest_component : int
174        The ID of the largest component in the graph. Within ``spaghetti``
175        the largest component is the one with the most vertices. This is not
176        necessarily equal to ``graph_longest_component``.
177    graph_component_is_ring : dict
178        Lookup in the form {int: bool} keyed by component labels with values as
179        ``True`` if the component is a closed ring, otherwise ``False``.
180
181    Notes
182    -----
183
184    **Important**: The core procedure for generating network representations is
185    performed within the ``_extractnetwork()`` method. Here it is important to note
186    that a ``spaghetti.Network`` instance is built up from the individual,
187    constituent euclidean units of each line segment object. Therefore, the resulting
188    network structure will generally have (1) more vertices and links than may expected,
189    and, (2) many degree-2 vertices, which differs from a truly graph-theoretic object.
190    This is demonstrated in the
191    `Caveats Tutorial <https://pysal.org/spaghetti/notebooks/caveats.html#4.-Understanding-network-generation>`_.
192
193    See :cite:`Cliff1981`, :cite:`Tansel1983a`,
194    :cite:`AhujaRavindraK`, :cite:`Labbe1995`,
195    :cite:`Kuby2009`, :cite:`Barthelemy2011`,
196    :cite:`daskin2013`, :cite:`Okabe2012`,
197    :cite:`Ducruet2014`, :cite:`Weber2016`, for more in-depth discussion on
198    spatial networks, graph theory, and location along networks.
199    For related network-centric software see
200    `Snkit <https://github.com/tomalrussell/snkit>`_ :cite:`tom_russell_2019_3379659`,
201    `SANET <http://sanet.csis.u-tokyo.ac.jp>`_ :cite:`Okabe2006a`,
202    `NetworkX <https://networkx.github.io>`_ :cite:`Hagberg2008`,
203    `Pandana <http://udst.github.io/pandana/>`_ :cite:`Foti2012`,
204    and `OSMnx <https://osmnx.readthedocs.io/en/stable/>`_ :cite:`Boeing2017`.
205
206    Examples
207    --------
208
209    Create an instance of a network.
210
211    >>> import spaghetti
212    >>> from libpysal import examples
213    >>> streets_file = examples.get_path("streets.shp")
214    >>> ntw = spaghetti.Network(in_data=streets_file)
215
216    Fetch the number connected components in the network.
217
218    >>> ntw.network_n_components
219    1
220
221    Unique component labels in the network.
222
223    >>> import numpy
224    >>> list(numpy.unique(ntw.network_component_labels))
225    [0]
226
227    Show whether each component of the network is an isolated ring (or not).
228
229    >>> ntw.network_component_is_ring
230    {0: False}
231
232    Show how many network arcs are associated with the component.
233
234    >>> arcs = len(ntw.network_component2arc[ntw.network_component_labels[0]])
235    >>> arcs
236    303
237
238    Do the same as above, but for the graph-theoretic representation
239    of the network object.
240
241    >>> ntw.graph_n_components
242    1
243    >>> list(numpy.unique(ntw.graph_component_labels))
244    [0]
245    >>> ntw.graph_component_is_ring
246    {0: False}
247    >>> edges = len(ntw.graph_component2edge[ntw.graph_component_labels[0]])
248    >>> edges
249    179
250
251    The number of arcs in the network is always greater than or equal
252    to the number of edges in the graph-theoretic representation.
253
254    >>> arcs >= edges
255    True
256
257    Snap point observations to the network with attribute information.
258
259    >>> crimes_file = examples.get_path("crimes.shp")
260    >>> ntw.snapobservations(crimes_file, "crimes", attribute=True)
261
262    And without attribute information.
263
264    >>> schools_file = examples.get_path("schools.shp")
265    >>> ntw.snapobservations(schools_file, "schools", attribute=False)
266
267    Show the point patterns associated with the network.
268
269    >>> ntw.pointpatterns.keys()
270    dict_keys(['crimes', 'schools'])
271
272    """
273
274    def __init__(
275        self,
276        in_data=None,
277        vertex_sig=11,
278        unique_arcs=True,
279        extractgraph=True,
280        w_components=True,
281        weightings=False,
282        weights_kws=dict(),
283        vertex_atol=None,
284    ):
285
286        # do this when creating a clean network instance from a
287        # shapefile or a geopandas.GeoDataFrame, otherwise a shell
288        # network instance is created (see `split_arcs()` method)
289        if in_data is not None:
290
291            # set parameters as attributes
292            self.in_data = in_data
293            self.vertex_sig = vertex_sig
294            self.vertex_atol = vertex_atol
295            self.unique_arcs = unique_arcs
296
297            self.adjacencylist = defaultdict(list)
298            self.vertices = {}
299
300            # initialize network arcs and arc_lengths
301            self.arcs = []
302            self.arc_lengths = {}
303
304            # initialize pointpatterns
305            self.pointpatterns = {}
306
307            # spatial representation of the network
308            self._extractnetwork()
309            self.arcs = sorted(self.arcs)
310            self.vertex_coords = dict((v, k) for k, v in self.vertices.items())
311
312            # extract connected components
313            if w_components:
314                as_graph = False
315                network_weightings = False
316
317                if weightings == True:
318                    # set network arc weights to length if weights are
319                    # desired, but no other input in given
320                    weightings = self.arc_lengths
321                    network_weightings = True
322
323                # extract contiguity weights from libpysal
324                self.w_network = self.contiguityweights(
325                    graph=as_graph,
326                    weightings=weightings,
327                    weights_kws=weights_kws,
328                )
329                # identify connected components from the `w_network`
330                self.identify_components(self.w_network, graph=as_graph)
331
332            # extract the graph -- repeat similar as above
333            # for extracting the network
334            if extractgraph:
335                self.extractgraph()
336
337                if w_components:
338                    as_graph = True
339
340                    if network_weightings:
341                        weightings = self.edge_lengths
342
343                    self.w_graph = self.contiguityweights(
344                        graph=as_graph,
345                        weightings=weightings,
346                        weights_kws=weights_kws,
347                    )
348                    self.identify_components(self.w_graph, graph=as_graph)
349
350            # sorted list of vertex IDs
351            self.vertex_list = sorted(self.vertices.values())
352
353    def _round_sig(self, v):
354        """Used internally to round the vertex to a set number of
355        significant digits. If ``sig`` is set to 4, then the following
356        are some possible results for a coordinate are as follows.
357        (1) 0.0xxxx, (2) 0.xxxx, (3) x.xxx, (4) xx.xx,
358        (5) xxx.x, (6) xxxx.0, (7) xxxx0.0
359
360        Parameters
361        ----------
362        v : tuple
363            Coordinate (x,y) of the vertex.
364
365        """
366
367        # set the number of significant digits
368        sig = self.vertex_sig
369
370        # simply return vertex (x,y) coordinates
371        if sig is None:
372            return v
373
374        # for each coordinate in a coordinate pair
375        # if the coordinate location is (0.0) simply return zero
376        # else -- (1) take the absolute value of `val`; (2) take the
377        # base 10 log for [1]; (3) take the floor of [2]; (4) convert
378        # [3] into a negative integer; (5) add `sig - 1` to [4];
379        # (6) round `val` by [5]
380        out_v = [
381            val
382            if val == 0
383            else round(val, -int(numpy.floor(numpy.log10(numpy.fabs(val)))) + (sig - 1))
384            for val in v
385        ]
386
387        if self.vertex_atol:
388            out_v = [round(v, self.vertex_atol) for v in out_v]
389
390        return tuple(out_v)
391
392    def identify_components(self, w, graph=False):
393        """Identify connected component information from a
394        ``libpysal.weights.W`` object
395
396        Parameters
397        ----------
398        w : libpysal.weights.W
399            Weights object created from the network segments (either
400            raw or graph-theoretic).
401        graph : bool
402            Flag for a raw network (``False``) or graph-theoretic network
403            (``True``). Default is ``False``.
404
405        """
406
407        # flag network (arcs) or graph (edges)
408        if graph:
409            links = self.edges
410            obj_type = "graph_"
411        else:
412            links = self.arcs
413            obj_type = "network_"
414
415        # connected component count and labels
416        n_components = w.n_components
417        component_labels = w.component_labels
418
419        # is the network a single, fully-connected component?
420        if n_components == 1:
421            fully_connected = True
422        else:
423            fully_connected = False
424
425        # link to component lookup
426        link2component = dict(zip(links, component_labels))
427
428        # component ID lookups: links, lengths, vertices, vertex counts
429        component2link = {}
430        component_lengths = {}
431        component_vertices = {}
432        component_vertex_count = {}
433        cp_labs_ = set(w.component_labels)
434        l2c_ = link2component.items()
435        for cpl in cp_labs_:
436            component2link[cpl] = sorted([k for k, v in l2c_ if v == cpl])
437            c2l_ = component2link[cpl]
438            arclens_ = self.arc_lengths.items()
439            component_lengths[cpl] = sum([v for k, v in arclens_ if k in c2l_])
440            component_vertices[cpl] = list(set([v for l in c2l_ for v in l]))
441            component_vertex_count[cpl] = len(component_vertices[cpl])
442
443        # longest and largest components
444        longest_component = max(component_lengths, key=component_lengths.get)
445        largest_component = max(component_vertex_count, key=component_vertex_count.get)
446
447        # component to ring lookup
448        component_is_ring = {}
449        adj_ = self.adjacencylist.items()
450        for comp, verts in component_vertices.items():
451            component_is_ring[comp] = False
452            _2neighs = [len(neighs) == 2 for v, neighs in adj_ if v in verts]
453            if all(_2neighs):
454                component_is_ring[comp] = True
455
456        # attribute label name depends on object type
457        if graph:
458            c2l_attr_name = "component2edge"
459        else:
460            c2l_attr_name = "component2arc"
461
462        # set all new variables into list
463        extracted_attrs = [
464            ["fully_connected", fully_connected],
465            ["n_components", n_components],
466            ["component_labels", component_labels],
467            [c2l_attr_name, component2link],
468            ["component_lengths", component_lengths],
469            ["component_vertices", component_vertices],
470            ["component_vertex_count", component_vertex_count],
471            ["longest_component", longest_component],
472            ["largest_component", largest_component],
473            ["component_is_ring", component_is_ring],
474        ]
475
476        # iterate over list and set attribute with
477        # either "network" or "graph" extension
478        for (attr_str, attr) in extracted_attrs:
479            setattr(self, obj_type + attr_str, attr)
480
481    def _extractnetwork(self):
482        """Used internally to extract a network."""
483
484        # initialize vertex count
485        vertex_count = 0
486
487        # determine input network data type
488        in_dtype = str(type(self.in_data)).split("'")[1]
489        is_libpysal_chains = False
490        supported_iterables = ["list", "tuple", "numpy.ndarray"]
491        # type error message
492        msg = "'%s' not supported for network instantiation."
493
494        # set appropriate geometries
495        if in_dtype == "str":
496            shps = open(self.in_data)
497        elif in_dtype in supported_iterables:
498            shps = self.in_data
499            shp_type = str(type(shps[0])).split("'")[1]
500            if shp_type == "libpysal.cg.shapes.Chain":
501                is_libpysal_chains = True
502            else:
503                raise TypeError(msg % shp_type)
504        elif in_dtype == "libpysal.cg.shapes.Chain":
505            shps = [self.in_data]
506            is_libpysal_chains = True
507        elif in_dtype == "geopandas.geodataframe.GeoDataFrame":
508            shps = self.in_data.geometry
509        else:
510            raise TypeError(msg % in_dtype)
511
512        # iterate over each record of the network lines
513        for shp in shps:
514
515            # if the segments are native pysal geometries
516            if is_libpysal_chains:
517                vertices = shp.vertices
518            else:
519                # fetch all vertices between euclidean segments
520                # in the line record -- these vertices are
521                # coordinates in an (x, y) tuple.
522                vertices = weights._contW_lists._get_verts(shp)
523
524            # iterate over each vertex (v)
525            for i, v in enumerate(vertices[:-1]):
526
527                # -- For vertex 1
528                # adjust precision -- this was originally
529                # implemented to handle high-precision
530                # network network vertices
531                v = self._round_sig(v)
532
533                # when the vertex already exists in lookup
534                # set it as the current `vid`
535                try:
536                    vid = self.vertices[v]
537                # when the vertex is not present in the lookup
538                # add it and adjust vertex count
539                except KeyError:
540                    self.vertices[v] = vid = vertex_count
541                    vertex_count += 1
542
543                # -- For vertex 2
544                # repeat the steps above for vertex 1
545                v2 = self._round_sig(vertices[i + 1])
546                try:
547                    nvid = self.vertices[v2]
548                except KeyError:
549                    self.vertices[v2] = nvid = vertex_count
550                    vertex_count += 1
551
552                # records vertex 1 and vertex 2 adjacency
553                self.adjacencylist[vid].append(nvid)
554                self.adjacencylist[nvid].append(vid)
555
556                # Sort the edges so that mono-directional
557                # keys can be stored.
558                arc_vertices = sorted([vid, nvid])
559                arc = tuple(arc_vertices)
560
561                # record the euclidean arc within the network
562                self.arcs.append(arc)
563
564                # record length
565                length = util.compute_length(v, vertices[i + 1])
566                self.arc_lengths[arc] = length
567
568        if self.unique_arcs:
569            # Remove duplicate edges and duplicate adjacent nodes.
570            self.arcs = list(set(self.arcs))
571            for k, v in self.adjacencylist.items():
572                self.adjacencylist[k] = list(set(v))
573
574    def extractgraph(self):
575        """Using the existing network representation, create a
576        graph-theoretic representation by removing all vertices with a
577        neighbor incidence of two (non-articulation points). That is, we
578        assume these vertices are bridges between vertices with higher
579        or lower incidence.
580        """
581
582        # initialize edges and edge_lengths
583        self.edges = []
584        self.edge_lengths = {}
585
586        # find all vertices with degree 2 that are not in an isolated
587        # island ring (loop) component. These are non-articulation
588        # points on the graph representation
589        non_articulation_points = self._yield_napts()
590        # retain non_articulation_points as an attribute
591        self.non_articulation_points = list(non_articulation_points)
592
593        # start with a copy of the spatial representation and
594        # iteratively remove edges deemed to be segments
595        self.edges = copy.deepcopy(self.arcs)
596        self.edge_lengths = copy.deepcopy(self.arc_lengths)
597
598        # mapping all the 'network arcs' contained within a single
599        # 'graph represented' edge
600        self.arcs_to_edges = {}
601
602        # build up bridges "rooted" on the initial
603        # non-articulation points
604        bridge_roots = []
605
606        # iterate over all vertices that are not contained within
607        # isolated loops that have a degree of 2
608        for s in non_articulation_points:
609
610            # initialize bridge with an articulation point
611            bridge = [s]
612
613            # fetch all vertices adjacent to point `s`
614            # that are also degree 2
615            neighbors = self._yieldneighbor(s, non_articulation_points, bridge)
616            while neighbors:
617
618                # extract the current node in `neighbors`
619                cnode = neighbors.pop()
620                # remove it from `non_articulation_points`
621                non_articulation_points.remove(cnode)
622                # add it to bridge
623                bridge.append(cnode)
624                # fetch neighbors for the current node
625                newneighbors = self._yieldneighbor(
626                    cnode, non_articulation_points, bridge
627                )
628                # add the new neighbors back into `neighbors`
629                neighbors += newneighbors
630
631            # once all potential neighbors are exhausted add the
632            # current bridge of non-articulation points to the
633            # list of rooted bridges
634            bridge_roots.append(bridge)
635
636        # iterate over the list of newly created rooted bridges
637        for bridge in bridge_roots:
638
639            # if the vertex is only one non-articulation
640            # point in the bridge
641            if len(bridge) == 1:
642
643                # that the singular element of the bridge
644                n = self.adjacencylist[bridge[0]]
645                # and create a new graph edge from it
646                new_edge = tuple(sorted([n[0], n[1]]))
647
648                # identify the arcs to be removed
649                e1 = tuple(sorted([bridge[0], n[0]]))
650                e2 = tuple(sorted([bridge[0], n[1]]))
651
652                # remove the network arcs (spatial) from the
653                # graph-theoretic representation
654                self.edges.remove(e1)
655                self.edges.remove(e2)
656
657                # remove the former network arc lengths from the
658                # graph edge lengths lookup
659                length_e1 = self.edge_lengths[e1]
660                length_e2 = self.edge_lengths[e2]
661                self.edge_lengths.pop(e1, None)
662                self.edge_lengths.pop(e2, None)
663
664                # and add the new edge length in their place
665                self.edge_lengths[new_edge] = length_e1 + length_e2
666
667                # update the pointers
668                self.arcs_to_edges[e1] = new_edge
669                self.arcs_to_edges[e2] = new_edge
670
671            # if there are more than one vertices in the bridge
672            else:
673                cumulative_length = 0
674                start_end = {}
675
676                # initialize a redundant set of bridge edges
677                redundant = set([])
678
679                # iterate over the current bridge
680                for b in bridge:
681                    # iterate over each node in the bridge
682                    for n in self.adjacencylist[b]:
683                        # start the bridge with this node
684                        if n not in bridge:
685                            start_end[b] = n
686                        # or create a redundant edge with the current
687                        # node and `b`
688                        else:
689                            redundant.add(tuple(sorted([b, n])))
690
691                # initialize a new graph edge
692                new_edge = tuple(sorted(start_end.values()))
693
694                # add start_end redundant edge
695                for k, v in start_end.items():
696                    redundant.add(tuple(sorted([k, v])))
697
698                # remove all redundant network arcs while
699                # adjusting the graph edge lengths lookup
700                # and the edges_to_arcs lookup
701                for r in redundant:
702                    self.edges.remove(r)
703                    cumulative_length += self.edge_lengths[r]
704                    self.edge_lengths.pop(r, None)
705                    self.arcs_to_edges[r] = new_edge
706
707                # finally, add the new cumulative edge length
708                self.edge_lengths[new_edge] = cumulative_length
709
710            # add the updated graph edge
711            self.edges.append(new_edge)
712
713        # converted the graph edges into a sorted set to prune out
714        # duplicate graph edges created during simplification
715        self.edges = sorted(set(self.edges))
716
717    def _yield_napts(self):
718        """Find all nodes with degree 2 that are not in an isolated
719        island ring (loop) component. These are non-articulation
720        points on the graph representation.
721
722        Returns
723        -------
724        napts : list
725            Non-articulation points on a graph representation.
726
727        """
728
729        # non-articulation points
730        napts = set()
731
732        # network vertices remaining to evaluate
733        unvisted = set(self.vertices.values())
734
735        while unvisted:
736
737            # iterate over each component
738            for component_id, ring in self.network_component_is_ring.items():
739
740                # evaluate for non-articulation points
741                napts, unvisted = self._evaluate_napts(
742                    napts, unvisted, component_id, ring
743                )
744
745        # convert set of non-articulation points into list
746        napts = list(napts)
747
748        return napts
749
750    def _evaluate_napts(self, napts, unvisited, component_id, ring):
751        """Evaluate one connected component in a network for
752        non-articulation points (``napts``) and return an updated set of
753        ``napts`` and unvisted vertices.
754
755        Parameters
756        ----------
757        napts : set
758            Non-articulation points (``napts``) in the network. The
759            ``napts`` here do not include those within an isolated
760            loop island.
761        unvisited : set
762            Vertices left to evaluate in the network.
763        component_id : int
764            ID for the network connected component for the
765            current iteration of the algorithm.
766        ring : bool
767            Network component is isolated island loop ``True`` or
768            not ``False``.
769
770        Returns
771        -------
772        napts : set
773            Updated ``napts`` object.
774        unvisited : set
775            Updated ``napts`` object.
776
777        """
778
779        # iterate over each `edge` of the `component`
780        for component in self.network_component2arc[component_id]:
781
782            # each `component` has two vertices
783            for vertex in component:
784
785                # if `component` is not an isolated island
786                # and `vertex` has exactly 2 neighbors,
787                # add `vertex` to `napts`
788                if not ring:
789                    if len(self.adjacencylist[vertex]) == 2:
790                        napts.add(vertex)
791
792                # remove `vertex` from `unvisited` if
793                # it is still in the set else move along to
794                # the next iteration
795                try:
796                    unvisited.remove(vertex)
797                except KeyError:
798                    pass
799
800        return napts, unvisited
801
802    def _yieldneighbor(self, vtx, arc_vertices, bridge):
803        """Used internally, this method traverses a bridge arc
804        to find the source and destination nodes.
805
806        Parameters
807        ----------
808        vtx : int
809            The vertex ID.
810        arc_vertices : list
811            All non-articulation points (``napts``) in the network.
812            These are referred to as degree-2 vertices.
813        bridge : list
814            Inital bridge list containing only ``vtx``.
815
816        Returns
817        -------
818        nodes : list
819            Vertices to keep (articulation points). These elements are
820            referred to as nodes.
821
822        """
823
824        # instantiate empty lis to fill with network articulation
825        # points (nodes with a degree of 1 [endpoints] or greater
826        # than 2 [intersections])
827        nodes = []
828
829        # get all nodes adjacent to `vtx` that are not in the
830        # set of 'bridge' vertices
831        for i in self.adjacencylist[vtx]:
832
833            if i in arc_vertices and i not in bridge:
834                nodes.append(i)
835
836        return nodes
837
838    def contiguityweights(
839        self, graph=True, weightings=None, from_split=False, weights_kws=dict()
840    ):
841        """Create a contiguity-based ``libpysal.weights.W`` object.
842
843        Parameters
844        ----------
845        graph : bool
846            Controls whether the ``libpysal.weights.W`` is generated
847            using the spatial representation (``False``) or the graph
848            representation (``True``). Default is ``True``.
849        weightings : {dict, None}
850            Dictionary of lists of weightings for each arc/edge. Default is ``None``.
851        from_split : bool
852            Flag for whether the method is being called from within
853            ``split_arcs()`` (``True``) or not (``False``). Default is ``False``.
854        weights_kws : dict
855            Keyword arguments for ``libpysal.weights.W``.
856
857        Returns
858        -------
859         W : libpysal.weights.W
860            A ``W`` representing the binary adjacency of the network.
861
862        See also
863        --------
864
865        libpysal.weights.W
866
867        Examples
868        --------
869
870        Instantiate a network.
871
872        >>> import spaghetti
873        >>> from libpysal import examples
874        >>> import numpy
875        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
876
877        Snap point observations to the network with attribute information.
878
879        >>> ntw.snapobservations(
880        ...     examples.get_path("crimes.shp"), "crimes", attribute=True
881        ... )
882
883        Find counts per network arc.
884
885        >>> counts = ntw.count_per_link(
886        ...     ntw.pointpatterns["crimes"].obs_to_arc, graph=False
887        ... )
888        >>> counts[(50, 165)]
889        4
890
891        Create a contiguity-based ``W`` object.
892
893        >>> w = ntw.contiguityweights(graph=False)
894        >>> w.n, w.n_components
895        (303, 1)
896
897        Notes
898        -----
899
900        See :cite:`pysal2007` for more details.
901
902        """
903
904        # instantiate OrderedDict to record network link
905        # adjacency which will be keyed by the link ID (a tuple)
906        # with values being lists of tuples (contiguous links)
907        neighbors = OrderedDict()
908
909        # flag network (arcs) or graph (edges)
910        if graph:
911            links = self.edges
912        else:
913            links = self.arcs
914
915        # if weightings are desired instantiate a dictionary
916        # other ignore weightings
917        if weightings:
918            _weights = {}
919        else:
920            _weights = None
921
922        # iterate over all links until all possibilities
923        # for network link adjacency are exhausted
924        working = True
925        while working:
926
927            # for each network link (1)
928            for key in links:
929
930                # instantiate a slot in the OrderedDict
931                neighbors[key] = []
932
933                if weightings:
934                    _weights[key] = []
935
936                # for each network link (2)
937                for neigh in links:
938                    # skip if comparing link to itself
939                    if key == neigh:
940                        continue
941
942                    # if link(1) and link(2) share any vertex
943                    # update neighbors adjacency
944                    if (
945                        key[0] == neigh[0]
946                        or key[0] == neigh[1]
947                        or key[1] == neigh[0]
948                        or key[1] == neigh[1]
949                    ):
950                        neighbors[key].append(neigh)
951
952                        # and add weights if desired
953                        if weightings:
954                            _weights[key].append(weightings[neigh])
955
956                    # break condition
957                    # -- everything is sorted, so we know when we have
958                    # stepped beyond a possible neighbor
959                    if key[1] > neigh[1]:
960                        working = False
961
962            if len(links) == 1 or from_split:
963                working = False
964
965        # call libpysal for `W` instance
966        weights_kws["weights"] = _weights
967        w = weights.W(neighbors, **weights_kws)
968
969        return w
970
971    def distancebandweights(self, threshold, n_processes=1, gen_tree=False):
972        """Create distance-based weights.
973
974        Parameters
975        ----------
976        threshold : float
977            Distance threshold value.
978        n_processes : {int, str}
979            Specify the number of cores to utilize. Default is 1 core.
980            Use ``"all"`` to request all available cores.
981            Specify the exact number of cores with an integer.
982        gen_tree : bool
983            Rebuild shortest path with ``True``, or skip with ``False``.
984            Default is ``False``.
985
986        Returns
987        -------
988        w : libpysal.weights.W
989            A ``W`` object representing the binary adjacency of
990            the network.
991
992        Notes
993        -----
994
995        See :cite:`AnselinRey2014` and :cite:`rey_open_2015` for more details
996        regarding spatial weights.
997
998        See also
999        --------
1000
1001        libpysal.weights.W
1002
1003        Examples
1004        --------
1005
1006        Instantiate an instance of a network.
1007
1008        >>> import spaghetti
1009        >>> from libpysal import examples
1010        >>> streets_file = examples.get_path("streets.shp")
1011        >>> ntw = spaghetti.Network(in_data=streets_file)
1012
1013        Create a contiguity-based ``W`` object based on network distance, ``500``
1014        `US feet in this case <https://github.com/pysal/libpysal/blob/master/libpysal/examples/geodanet/streets.prj>`_.
1015
1016        >>> w = ntw.distancebandweights(threshold=500)
1017
1018        Show the number of units in the ``W`` object.
1019
1020        >>> w.n
1021        230
1022
1023        There are ``8`` units with ``3`` neighbors in the ``W`` object.
1024
1025        >>> w.histogram[-1]
1026        (8, 3)
1027
1028        """
1029
1030        # if the a vertex-to-vertex network distance matrix is
1031        # not present in the `network.Network` object; calculate
1032        # one at this point
1033        if not hasattr(self, "distance_matrix"):
1034            self.full_distance_matrix(n_processes, gen_tree=gen_tree)
1035
1036        # identify all network vertices which are within the
1037        # `threshold` parameter
1038        neighbor_query = numpy.where(self.distance_matrix < threshold)
1039
1040        # create an instance for recording neighbors which
1041        # inserts a new key if not present in object
1042        neighbors = defaultdict(list)
1043
1044        # iterate over neighbors within the `threshold`
1045        # and record all network vertices as neighbors
1046        # if the vertex is not being compared to itself
1047        for i, n in enumerate(neighbor_query[0]):
1048            neigh = neighbor_query[1][i]
1049            if n != neigh:
1050                neighbors[n].append(neigh)
1051
1052        # call libpysal for `W` instance
1053        w = weights.W(neighbors)
1054
1055        return w
1056
1057    def snapobservations(self, in_data, name, idvariable=None, attribute=False):
1058        """Snap a point pattern shapefile to a network object. The
1059        point pattern is stored in the ``network.pointpattern``
1060        attribute of the network object.
1061
1062        Parameters
1063        ----------
1064        in_data : {geopandas.GeoDataFrame, str}
1065            The input geographic data. Either (1) a path to a
1066            shapefile (str); or (2) a ``geopandas.GeoDataFrame``.
1067        name : str
1068            Name to be assigned to the point dataset.
1069        idvariable : str
1070            Column name to be used as the ID variable.
1071        attribute : bool
1072            Defines whether attributes should be extracted. ``True`` for
1073            attribute extraction. ``False`` for no attribute extraction.
1074            Default is ``False``.
1075
1076        Notes
1077        -----
1078
1079        See :cite:`doi:10.1111/gean.12211` for a detailed discussion on
1080        the modeling consequences of snapping points to spatial networks.
1081
1082        Examples
1083        --------
1084
1085        Instantiate a network.
1086
1087        >>> import spaghetti
1088        >>> from libpysal import examples
1089        >>> streets_file = examples.get_path("streets.shp")
1090        >>> ntw = spaghetti.Network(in_data=streets_file)
1091
1092        Snap observations to the network.
1093
1094        >>> pt_str = "crimes"
1095        >>> in_data = examples.get_path(pt_str+".shp")
1096        >>> ntw.snapobservations(in_data, pt_str, attribute=True)
1097
1098        Isolate the number of points in the dataset.
1099
1100        >>> ntw.pointpatterns[pt_str].npoints
1101        287
1102
1103        """
1104
1105        # create attribute of `pointpattern` but instantiating a
1106        # `network.PointPattern` class
1107        self.pointpatterns[name] = PointPattern(
1108            in_data=in_data, idvariable=idvariable, attribute=attribute
1109        )
1110
1111        # allocate the point observations to the nework
1112        self._snap_to_link(self.pointpatterns[name])
1113
1114    def compute_distance_to_vertices(self, x, y, arc):
1115        """Given an observation on a network arc, return the distance
1116        to the two vertices that bound that end.
1117
1118        Parameters
1119        ----------
1120        x : float
1121            The x-coordinate of the snapped point.
1122        y : float
1123            The y-coordinate of the snapped point.
1124        arc : tuple
1125            The (vtx0, vtx1) representation of the network arc.
1126
1127        Returns
1128        -------
1129        d1 : float
1130            The distance to vtx0. Always the vertex with the lesser ID.
1131        d2 : float
1132            The distance to vtx1. Always the vertex with the greater ID.
1133
1134        """
1135
1136        # distance to vertex 1
1137        d1 = util.compute_length((x, y), self.vertex_coords[arc[0]])
1138
1139        # distance to vertex 2
1140        d2 = util.compute_length((x, y), self.vertex_coords[arc[1]])
1141
1142        return d1, d2
1143
1144    def compute_snap_dist(self, pattern, idx):
1145        """Given an observation snapped to a network arc, calculate the
1146        distance from the original location to the snapped location.
1147
1148        Parameters
1149        -----------
1150        pattern : spaghetti.PointPattern
1151            The point pattern object.
1152        idx : int
1153            The point ID.
1154
1155        Returns
1156        -------
1157        dist : float
1158            The euclidean distance from original location to the snapped
1159            location.
1160
1161        """
1162
1163        # set of original (x,y) point coordinates
1164        loc = pattern.points[idx]["coordinates"]
1165
1166        # set of snapped (x,y) point coordinate
1167        snp = pattern.snapped_coordinates[idx]
1168
1169        # distance from the original location to
1170        # the snapped location along the network
1171        dist = util.compute_length(loc, snp)
1172
1173        return dist
1174
1175    def _snap_to_link(self, pointpattern):
1176        """Used internally to snap point observations to network arcs.
1177
1178        Parameters
1179        -----------
1180        pointpattern : spaghetti.PointPattern
1181            The point pattern object.
1182
1183        Returns
1184        -------
1185        obs_to_arc : dict
1186            Dictionary with arcs as keys and lists of points as values.
1187        arc_to_obs : dict
1188            Dictionary with point IDs as keys and arc tuples as values.
1189        dist_to_vertex : dict
1190            Dictionary with point IDs as keys and values as dictionaries
1191            with keys for vertex IDs and values as distances from point
1192            to vertex.
1193        dist_snapped : dict
1194            Dictionary with point IDs as keys and distance from point
1195            to the network arc that it is snapped.
1196
1197        """
1198
1199        # instantiate observations snapped coordinates lookup
1200        pointpattern.snapped_coordinates = {}
1201
1202        # record throw-away arcs (pysal.cg.Chain) enumerator
1203        arcs_ = []
1204
1205        # snapped(point)-to-arc lookup
1206        s2a = {}
1207
1208        # iterate over network arc IDs
1209        for arc in self.arcs:
1210
1211            # record the start and end of the arc
1212            head = self.vertex_coords[arc[0]]
1213            tail = self.vertex_coords[arc[1]]
1214
1215            # create a pysal.cg.Chain object of the arc
1216            # and add it to the arcs enumerator
1217            arcs_.append(util._chain_constr(None, [head, tail]))
1218
1219            # add the arc into the snapped(point)-to-arc lookup
1220            s2a[(head, tail)] = arc
1221
1222        # instantiate crosswalks
1223        points = {}  # point ID to coordinates lookup
1224        obs_to_arc = {}  # observations to arcs lookup
1225        dist_to_vertex = {}  # distance to vertices lookup
1226        dist_snapped = {}  # snapped distance lookup
1227
1228        # fetch and records point coordinates keyed by ID
1229        for point_idx, point in pointpattern.points.items():
1230            points[point_idx] = point["coordinates"]
1231
1232        # snap point observations to the network
1233        snapped = util.snap_points_to_links(points, arcs_)
1234
1235        # record obs_to_arc, dist_to_vertex, and dist_snapped
1236        # -- iterate over the snapped observation points
1237        for point_idx, snap_info in snapped.items():
1238
1239            # fetch the x and y coordinate
1240            x, y = snap_info[1].tolist()
1241
1242            # look up the arc from snapped(point)-to-arc
1243            arc = s2a[tuple(snap_info[0])]
1244
1245            # add the arc key to observations to arcs lookup
1246            if arc not in obs_to_arc:
1247                obs_to_arc[arc] = {}
1248
1249            # add the (x,y) coordinates of the original observation
1250            # point location to the observations to arcs lookup
1251            obs_to_arc[arc][point_idx] = (x, y)
1252
1253            # add the (x,y) coordinates of the snapped observation
1254            # point location to the snapped coordinates lookup
1255            pointpattern.snapped_coordinates[point_idx] = (x, y)
1256
1257            # calculate the distance to the left and right vertex
1258            # along the network link from the snapped point location
1259            d1, d2 = self.compute_distance_to_vertices(x, y, arc)
1260
1261            # record the distances in the distance to vertices lookup
1262            dist_to_vertex[point_idx] = {arc[0]: d1, arc[1]: d2}
1263
1264            # record the snapped distance
1265            dist_snapped[point_idx] = self.compute_snap_dist(pointpattern, point_idx)
1266
1267        # instantiate observations to network vertex lookup
1268        obs_to_vertex = defaultdict(list)
1269
1270        # iterate over the observations to arcs lookup
1271        for k, v in obs_to_arc.items():
1272
1273            # record the left and right vertex ids
1274            keys = v.keys()
1275            obs_to_vertex[k[0]] = keys
1276            obs_to_vertex[k[1]] = keys
1277
1278        # iterate over components and assign observations
1279        component_to_obs = {}
1280        for comp, _arcids in self.network_component2arc.items():
1281            component_to_obs[comp] = []
1282            for lk, odict in obs_to_arc.items():
1283                if lk in _arcids:
1284                    component_to_obs[comp].extend(list(odict.keys()))
1285
1286        # set crosswalks as attributes of the `pointpattern` class
1287        pointpattern.obs_to_arc = obs_to_arc
1288        pointpattern.component_to_obs = component_to_obs
1289        pointpattern.dist_to_vertex = dist_to_vertex
1290        pointpattern.dist_snapped = dist_snapped
1291        pointpattern.obs_to_vertex = list(obs_to_vertex)
1292
1293    def count_per_link(self, obs_on, graph=False):
1294        """Compute the counts per arc or edge (link).
1295
1296        Parameters
1297        ----------
1298        obs_on : dict
1299            Dictionary of observations on the network.
1300            Either in the form ``{(<LINK>):{<POINT_ID>:(<COORDS>)}}``
1301            or ``{<LINK>:[(<COORD>),(<COORD>)]}``.
1302        graph : bool
1303            Count observations on graph edges (``True``) or
1304            network arcs (``False``). Default is ``False``.
1305
1306        Returns
1307        -------
1308        counts : dict
1309            Counts per network link in the form ``{(<LINK>):<COUNT>}``.
1310
1311        Examples
1312        --------
1313
1314        Note that this passes the ``obs_to_arc`` or ``obs_to_edge`` attribute
1315        of a point pattern snapped to the network.
1316
1317        >>> import spaghetti
1318        >>> from libpysal import examples
1319        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
1320
1321        Snap observations to the network.
1322
1323        >>> ntw.snapobservations(
1324        ...     examples.get_path("crimes.shp"), "crimes", attribute=True
1325        ... )
1326
1327        >>> counts = ntw.count_per_link(
1328        ...     ntw.pointpatterns["crimes"].obs_to_arc, graph=False
1329        ... )
1330        >>> counts[(140, 142)]
1331        10
1332
1333        >>> s = sum([v for v in list(counts.values())])
1334        >>> s
1335        287
1336
1337        """
1338
1339        # instantiate observation counts by link lookup
1340        counts = {}
1341
1342        # graph-theoretic object of nodes and edges
1343        if graph:
1344
1345            # iterate the links-to-observations lookup
1346            for key, observations in obs_on.items():
1347
1348                # isolate observation count for the link
1349                cnt = len(observations)
1350
1351                # extract link (edges) key
1352                if key in self.arcs_to_edges.keys():
1353                    key = self.arcs_to_edges[key]
1354
1355                # either add to current count or a dictionary
1356                # entry or create new dictionary entry
1357                try:
1358                    counts[key] += cnt
1359                except KeyError:
1360                    counts[key] = cnt
1361
1362        # network object of arcs and vertices
1363        else:
1364
1365            # simplified version of the above process
1366            for key in obs_on.keys():
1367                counts[key] = len(obs_on[key])
1368
1369        return counts
1370
1371    def _newpoint_coords(self, arc, distance):
1372        """Used internally to compute new point coordinates during snapping."""
1373
1374        # extract coordinates for vertex 1 of arc
1375        x1 = self.vertex_coords[arc[0]][0]
1376        y1 = self.vertex_coords[arc[0]][1]
1377
1378        # extract coordinates for vertex 2 of arc
1379        x2 = self.vertex_coords[arc[1]][0]
1380        y2 = self.vertex_coords[arc[1]][1]
1381
1382        # if the network arc is vertical set the (x) coordinate
1383        # and proceed to calculating the (y) coordinate
1384        if x1 == x2:
1385            x0 = x1
1386
1387            # if the vertical direction is positive from
1388            # vertex 1 to vertex 2 on the euclidean plane
1389            if y1 < y2:
1390                y0 = y1 + distance
1391
1392            # if the vertical direction is negative from
1393            # vertex 1 to vertex 2 on the euclidean plane
1394            # -- this shouldn't happen due to vertex sorting in
1395            # -- self._extractnetwork() and self.extractgraph()
1396            elif y1 > y2:
1397                y0 = y2 + distance
1398
1399            # otherwise the link is zero-length
1400            # -- this should never happen
1401            else:
1402                y0 = y1
1403
1404            return x0, y0
1405
1406        # calculate the slope of the arc, `m`
1407        m = (y2 - y1) / (x2 - x1)
1408
1409        # if the horizontal direction is negative from
1410        # vertex 1 to vertex 2 on the euclidean plane
1411        if x1 > x2:
1412            x0 = x1 - distance / numpy.sqrt(1 + m ** 2)
1413
1414        # if the horizontal direction is positive from
1415        # vertex 1 to vertex 2 on the euclidean plane
1416        elif x1 < x2:
1417            x0 = x1 + distance / numpy.sqrt(1 + m ** 2)
1418
1419        # calculate the (y) coordinate
1420        y0 = m * (x0 - x1) + y1
1421
1422        # the new (x,y) coordinates for the snapped observation
1423        return x0, y0
1424
1425    def simulate_observations(self, count, distribution="uniform"):
1426        """Generate a simulated point pattern on the network.
1427
1428        Parameters
1429        ----------
1430        count : int
1431            The number of points to create.
1432        distribution : str
1433            A distribution of random points. Currently, the only
1434            supported distribution is uniform.
1435
1436        Returns
1437        -------
1438        random_pts : dict
1439            Keys are the edge tuple. Values are lists of new point coordinates.
1440
1441        See also
1442        --------
1443
1444        numpy.random.Generator.uniform
1445
1446        Examples
1447        --------
1448
1449        Instantiate a network.
1450
1451        >>> import spaghetti
1452        >>> from libpysal import examples
1453        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
1454
1455        Snap observations to the network.
1456
1457        >>> ntw.snapobservations(
1458        ...     examples.get_path("crimes.shp"), "crimes", attribute=True
1459        ... )
1460
1461        Isolate the number of points in the dataset.
1462
1463        >>> npts = ntw.pointpatterns["crimes"].npoints
1464        >>> npts
1465        287
1466
1467        Simulate ``npts`` number of points along the network
1468        in a `uniform` distribution.
1469
1470        >>> sim = ntw.simulate_observations(npts)
1471        >>> isinstance(sim, spaghetti.network.SimulatedPointPattern)
1472        True
1473        >>> sim.npoints
1474        287
1475
1476        """
1477
1478        # instantiate an empty `SimulatedPointPattern()`
1479        simpts = SimulatedPointPattern()
1480
1481        # record throw-away arcs enumerator
1482        arcs_ = []
1483
1484        # create array and fill each entry as length of network arc
1485        lengths = numpy.zeros(len(self.arc_lengths))
1486        for i, key in enumerate(self.arc_lengths.keys()):
1487            arcs_.append(key)
1488            lengths[i] = self.arc_lengths[key]
1489
1490        # cumulative network length
1491        stops = numpy.cumsum(lengths)
1492        cumlen = stops[-1]
1493
1494        # create lengths with a uniform distribution
1495        if distribution.lower() == "uniform":
1496            nrandompts = numpy.random.uniform(0, cumlen, size=(count,))
1497        else:
1498            msg = "%s distribution not currently supported." % distribution
1499            raise RuntimeError(msg)
1500
1501        # iterate over random distances created above
1502        for i, r in enumerate(nrandompts):
1503
1504            # take the first element of the index array (arc ID) where the
1505            # random distance is greater than that of its value in `stops`
1506            idx = numpy.where(r < stops)[0][0]
1507
1508            # assign the simulated point to the arc
1509            assignment_arc = arcs_[idx]
1510
1511            # calculate and set the distance from the arc start
1512            distance_from_start = stops[idx] - r
1513
1514            # populate the coordinates dict
1515            x0, y0 = self._newpoint_coords(assignment_arc, distance_from_start)
1516
1517            # record the snapped coordinates and associated vertices
1518            simpts.snapped_coordinates[i] = (x0, y0)
1519            simpts.obs_to_vertex[assignment_arc[0]].append(i)
1520            simpts.obs_to_vertex[assignment_arc[1]].append(i)
1521
1522            # calculate and set the distance from the arc end
1523            distance_from_end = self.arc_lengths[arcs_[idx]] - distance_from_start
1524
1525            # populate the distances to vertices
1526            simpts.dist_to_vertex[i] = {
1527                assignment_arc[0]: distance_from_start,
1528                assignment_arc[1]: distance_from_end,
1529            }
1530
1531            # set snapped coordinates and point count attributes
1532            simpts.points = simpts.snapped_coordinates
1533            simpts.npoints = len(simpts.points)
1534
1535        return simpts
1536
1537    def enum_links_vertex(self, v0):
1538        """Returns the arcs (links) adjacent to vertices.
1539
1540        Parameters
1541        -----------
1542        v0 : int
1543            The vertex ID.
1544
1545        Returns
1546        -------
1547        links : list
1548            List of tuple arcs adjacent to the vertex.
1549
1550        Examples
1551        --------
1552
1553        Create an instance of a network.
1554
1555        >>> import spaghetti
1556        >>> from libpysal import examples
1557        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
1558
1559        Enumerate the links/arcs that are adjacent to vertex ``24``.
1560
1561        >>> ntw.enum_links_vertex(24)
1562        [(24, 48), (24, 25), (24, 26)]
1563
1564        """
1565
1566        # instantiate links list
1567        links = []
1568
1569        neighbor_vertices = self.adjacencylist[v0]
1570
1571        # enumerate links associated with the current vertex
1572        for n in neighbor_vertices:
1573            links.append(tuple(sorted([n, v0])))
1574
1575        return links
1576
1577    def full_distance_matrix(self, n_processes, gen_tree=False):
1578        """All vertex-to-vertex distances on a network. This method
1579        is called from within ``allneighbordistances()``,
1580        ``nearestneighbordistances()``, and ``distancebandweights()``.
1581
1582        Parameters
1583        -----------
1584        n_processes : int
1585            Specify the number of cores to utilize. Default is 1 core.
1586            Use ``"all"`` to request all available cores.
1587            Specify the exact number of cores with an integer.
1588        gen_tree : bool
1589            Rebuild shortest path ``True``, or skip ``False``.
1590            Default is ``False``.
1591
1592        Notes
1593        -----
1594
1595        Based on :cite:`Dijkstra1959a` and :cite:`doi:10.1002/9781119967101.ch3`.
1596
1597        """
1598
1599        # create an empty matrix which will store shortest path distance
1600        nvtx = len(self.vertex_list)
1601        self.distance_matrix = numpy.empty((nvtx, nvtx))
1602
1603        # create `network_trees` attribute that stores
1604        # all network path trees (if desired)
1605        self.network_trees = {}
1606
1607        # single-core processing
1608        if n_processes == 1:
1609
1610            # iterate over each network vertex
1611            for vtx in self.vertex_list:
1612
1613                # calculate the shortest path and preceding
1614                # vertices for traversal route
1615                distance, pred = util.dijkstra(self, vtx)
1616                pred = numpy.array(pred)
1617
1618                # generate the shortest path tree
1619                if gen_tree:
1620                    tree = util.generatetree(pred)
1621                else:
1622                    tree = None
1623
1624                # populate distances and paths
1625                self.distance_matrix[vtx] = distance
1626                self.network_trees[vtx] = tree
1627
1628        # multiprocessing
1629        else:
1630
1631            # set up multiprocessing schema
1632            import multiprocessing as mp
1633            from itertools import repeat
1634
1635            if n_processes == "all":
1636                cores = mp.cpu_count()
1637            else:
1638                cores = n_processes
1639            p = mp.Pool(processes=cores)
1640
1641            # calculate the shortest path and preceding
1642            # vertices for traversal route by mapping each process
1643            distance_pred = p.map(util.dijkstra_mp, zip(repeat(self), self.vertex_list))
1644
1645            # set range of iterations
1646            iterations = range(len(distance_pred))
1647
1648            # fill shortest paths
1649            distance = [distance_pred[itr][0] for itr in iterations]
1650
1651            # fill preceding vertices
1652            pred = numpy.array([distance_pred[itr][1] for itr in iterations])
1653
1654            # iterate of network vertices and generate
1655            # the shortest path tree for each
1656            for vtx in self.vertex_list:
1657                if gen_tree:
1658                    tree = util.generatetree(pred[vtx])
1659                else:
1660                    tree = None
1661
1662                # populate distances and paths
1663                self.distance_matrix[vtx] = distance[vtx]
1664                self.network_trees[vtx] = tree
1665
1666    def allneighbordistances(
1667        self,
1668        sourcepattern,
1669        destpattern=None,
1670        fill_diagonal=None,
1671        n_processes=1,
1672        gen_tree=False,
1673        snap_dist=False,
1674    ):
1675        """Compute either all distances between :math:`i` and :math:`j` in a
1676        single point pattern or all distances between each :math:`i` from a
1677        source pattern and all :math:`j` from a destination pattern.
1678
1679        Parameters
1680        ----------
1681        sourcepattern : {str, spaghetti.PointPattern}
1682            The key of a point pattern snapped to the network or
1683            the full ``spaghetti.PointPattern`` object.
1684        destpattern : str
1685            (Optional) The key of a point pattern snapped to the network
1686            or the full ``spaghetti.PointPattern`` object.
1687        fill_diagonal : {float, int}
1688            (Optional) Fill the diagonal of the cost matrix. Default is
1689            ``None`` and will populate the diagonal with ``numpy.nan``.
1690            Do not declare a ``destpattern`` for a custom
1691            ``fill_diagonal``.
1692        n_processes : {int, str}
1693            Specify the number of cores to utilize. Default is 1 core.
1694            Use ``"all"`` to request all available cores.
1695            Specify the exact number of cores with an integer.
1696        gen_tree : bool
1697            Rebuild shortest path ``True``, or skip ``False``.
1698            Default is ``False``.
1699        snap_dist : bool
1700            Flag as ``True`` to include the distance from the original
1701            location to the snapped location along the network. Default
1702            is ``False``.
1703
1704        Returns
1705        -------
1706        nearest : numpy.ndarray
1707            An array of shape (n,m) storing distances between all
1708            source and destination points.
1709        tree_nearest : dict
1710            Nearest network node to point pattern vertex shortest
1711            path lookup. The values of the dictionary are a tuple
1712            of the nearest source vertex and the nearest destination
1713            vertex to query the lookup tree. If two observations are
1714            snapped to the same network arc a flag of -.1 is set for
1715            both the source and destination network vertex
1716            indicating the same arc is used while also raising an
1717            ``IndexError`` when rebuilding the path.
1718
1719        Examples
1720        --------
1721
1722        Create a network instance.
1723
1724        >>> import spaghetti
1725        >>> from libpysal import examples
1726        >>> import numpy
1727        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
1728
1729        Snap observations to the network.
1730
1731        >>> ntw.snapobservations(
1732        ...     examples.get_path("crimes.shp"), "crimes", attribute=True
1733        ... )
1734
1735        Calculate all distances between observations in the ``crimes`` dataset.
1736
1737        >>> s2s_dist = ntw.allneighbordistances("crimes")
1738
1739        If calculating a ``type-a`` to ``type-a`` distance matrix
1740        the distance between an observation and itself is ``nan`` and
1741        the distance between one observation and another will be positive value.
1742
1743        >>> s2s_dist[0,0], s2s_dist[1,0]
1744        (nan, 3105.189475447081)
1745
1746        If calculating a ``type-a`` to ``type-b`` distance matrix
1747        the distance between all observations will likely be positive
1748        values, may be zero (or approximately zero), but will never be negative.
1749
1750        >>> ntw.snapobservations(
1751        ...     examples.get_path("schools.shp"), "schools", attribute=False
1752        ... )
1753        >>> s2d_dist = ntw.allneighbordistances("crimes", destpattern="schools")
1754        >>> numpy.round((s2d_dist[0,0], s2d_dist[1,0]), 5)
1755        array([4520.72354, 6340.42297])
1756
1757
1758        Shortest paths can also be reconstructed when desired by
1759        setting the ``gen_tree`` keyword argument to ``True``. Here
1760        it is shown that the shortest path between school ``6`` and
1761        school ``7`` flows along network arcs through network
1762        vertices ``173`` and ``64``. The ``ntw.network_trees`` attribute
1763        may then be queried for the network elements comprising that path.
1764
1765        >>> d2d_dist, tree = ntw.allneighbordistances("schools", gen_tree=True)
1766        >>> tree[(6, 7)]
1767        (173, 64)
1768
1769        """
1770
1771        # calculate the network vertex to vertex distance matrix
1772        # if it is not already an attribute
1773        if not hasattr(self, "distance_matrix"):
1774            self.full_distance_matrix(n_processes, gen_tree=gen_tree)
1775
1776        # set the source and destination observation point patterns
1777        if type(sourcepattern) is str:
1778            sourcepattern = self.pointpatterns[sourcepattern]
1779            if destpattern:
1780                destpattern = self.pointpatterns[destpattern]
1781
1782        # source pattern setup
1783        # set local copy of source pattern index
1784        src_indices = list(sourcepattern.points.keys())
1785        # set local copy of source distance to vertex lookup
1786        src_d2v = copy.deepcopy(sourcepattern.dist_to_vertex)
1787        # source point count
1788        nsource_pts = len(src_indices)
1789        # create source point to network vertex lookup
1790        src_vertices = {}
1791        for s in src_indices:
1792            v1, v2 = src_d2v[s].keys()
1793            src_vertices[s] = (v1, v2)
1794
1795        # destination pattern setup
1796        # if only a source pattern is specified, also set it as
1797        # the destination pattern
1798        symmetric = False
1799        if destpattern is None:
1800            symmetric = True
1801            destpattern = sourcepattern
1802        # set local copy of destination pattern index
1803        dest_indices = list(destpattern.points.keys())
1804        # set local copy of destination distance to vertex lookup
1805        dst_d2v = copy.deepcopy(destpattern.dist_to_vertex)
1806        # destination point count
1807        ndest_pts = len(dest_indices)
1808        # create `deepcopy` of destination points to
1809        # consider for searching
1810        dest_searchpts = copy.deepcopy(dest_indices)
1811        # create destination point to network vertex lookup
1812        dest_vertices = {}
1813        for s in dest_indices:
1814            v1, v2 = dst_d2v[s].keys()
1815            dest_vertices[s] = (v1, v2)
1816
1817        # add snapping distance to each pointpattern
1818        if snap_dist:
1819            # declare both point patterns and both
1820            # distance to vertex lookup in single lists
1821            patterns = [sourcepattern, destpattern]
1822            dist_copies = [src_d2v, dst_d2v]
1823            # iterate over each point pattern
1824            for elm, pp in enumerate(patterns):
1825                # extract associated vertex distances
1826                for pidx, dists_dict in dist_copies[elm].items():
1827                    # add snapped distance to each point
1828                    for vidx, vdist in dists_dict.items():
1829                        dists_dict[vidx] = vdist + pp.dist_snapped[pidx]
1830
1831        # output setup
1832        # create empty source x destination array
1833        # and fill with infinity values
1834        nearest = numpy.empty((nsource_pts, ndest_pts))
1835        nearest[:] = numpy.inf
1836        # create empty dictionary to store path trees
1837        tree_nearest = {}
1838
1839        # iterate over each point in sources
1840        for p1 in src_indices:
1841
1842            # get the source vertices and dist to source vertices
1843            source1, source2 = src_vertices[p1]
1844            set1 = set(src_vertices[p1])
1845
1846            # distance from source vertex1 to point and
1847            # distance from source vertex2 to point
1848            sdist1, sdist2 = src_d2v[p1].values()
1849
1850            if symmetric:
1851
1852                # only compute the upper triangle if symmetric
1853                dest_searchpts.remove(p1)
1854
1855            # iterate over each point remaining in destinations
1856            for p2 in dest_searchpts:
1857
1858                # get the destination vertices and
1859                # dist to destination vertices
1860                dest1, dest2 = dest_vertices[p2]
1861                set2 = set(dest_vertices[p2])
1862
1863                # when the observations are snapped to the same arc
1864                if set1 == set2:
1865
1866                    # calculate only the length between points along
1867                    # that arc
1868                    x1, y1 = sourcepattern.snapped_coordinates[p1]
1869                    x2, y2 = destpattern.snapped_coordinates[p2]
1870
1871                    computed_length = util.compute_length((x1, y1), (x2, y2))
1872                    nearest[p1, p2] = computed_length
1873
1874                    # set the nearest network vertices to a flag of -.1
1875                    # indicating the same arc is used while also raising
1876                    # and indexing error when rebuilding the path
1877                    tree_nearest[p1, p2] = SAME_SEGMENT
1878
1879                # otherwise lookup distance between the source and
1880                # destination vertex
1881                else:
1882
1883                    # distance from destination vertex1 to point and
1884                    # distance from destination vertex2 to point
1885                    ddist1, ddist2 = dst_d2v[p2].values()
1886
1887                    # set the four possible combinations of
1888                    # source to destination shortest path traversal
1889                    d11 = self.distance_matrix[source1][dest1]
1890                    d21 = self.distance_matrix[source2][dest1]
1891                    d12 = self.distance_matrix[source1][dest2]
1892                    d22 = self.distance_matrix[source2][dest2]
1893
1894                    # find the shortest distance from the path passing
1895                    # through each of the two origin vertices to the
1896                    # first destination vertex
1897                    sd_1 = d11 + sdist1
1898                    sd_21 = d21 + sdist2
1899                    sp_combo1 = source1, dest1
1900                    if sd_1 > sd_21:
1901                        sd_1 = sd_21
1902                        sp_combo1 = source2, dest1
1903
1904                    # now add the point to vertex1 distance on
1905                    # the destination arc
1906                    len_1 = sd_1 + ddist1
1907
1908                    # repeat the prior but now for the paths entering
1909                    # at the second vertex of the second arc
1910                    sd_2 = d12 + sdist1
1911                    sd_22 = d22 + sdist2
1912                    sp_combo2 = source1, dest2
1913                    if sd_2 > sd_22:
1914                        sd_2 = sd_22
1915                        sp_combo2 = source2, dest2
1916                    len_2 = sd_2 + ddist2
1917
1918                    # now find the shortest distance path between point
1919                    # 1 on arc 1 and point 2 on arc 2, and assign
1920                    sp_12 = len_1
1921                    s_vertex, d_vertex = sp_combo1
1922                    if len_1 > len_2:
1923                        sp_12 = len_2
1924                        s_vertex, d_vertex = sp_combo2
1925
1926                    # set distance and path tree
1927                    nearest[p1, p2] = sp_12
1928                    tree_nearest[p1, p2] = (s_vertex, d_vertex)
1929
1930                if symmetric:
1931
1932                    # mirror the upper and lower triangle
1933                    # when symmetric
1934                    nearest[p2, p1] = nearest[p1, p2]
1935
1936        # populate the main diagonal when symmetric
1937        if symmetric:
1938
1939            # fill the matrix diagonal with NaN values is no fill
1940            # value is specified
1941            if fill_diagonal is None:
1942                numpy.fill_diagonal(nearest, numpy.nan)
1943
1944            # otherwise fill with specified value
1945            else:
1946                numpy.fill_diagonal(nearest, fill_diagonal)
1947
1948        # if the nearest path tree is desired return it along
1949        # with the cost matrix
1950        if gen_tree:
1951            return nearest, tree_nearest
1952
1953        else:
1954            return nearest
1955
1956    def nearestneighbordistances(
1957        self,
1958        sourcepattern,
1959        destpattern=None,
1960        n_processes=1,
1961        gen_tree=False,
1962        all_dists=None,
1963        snap_dist=False,
1964        keep_zero_dist=True,
1965    ):
1966        """Compute the interpattern nearest neighbor distances or the
1967        intrapattern nearest neighbor distances between a source
1968        pattern and a destination pattern.
1969
1970        Parameters
1971        ----------
1972        sourcepattern : str
1973            The key of a point pattern snapped to the network.
1974        destpattern : str
1975            (Optional) The key of a point pattern snapped to the
1976            network.
1977        n_processes : {int, str}
1978            Specify the number of cores to utilize. Default is 1 core.
1979            Use ``"all"`` to request all available cores.
1980            Specify the exact number of cores with an integer.
1981        gen_tree : bool
1982            Rebuild shortest path ``True``, or skip ``False``.
1983            Default is ``False``.
1984        all_dists : numpy.ndarray
1985            An array of shape :math:`(n,n)` storing distances between all
1986            points.
1987        snap_dist : bool
1988            Flag as ``True`` to include the distance from the original
1989            location to the snapped location along the network. Default
1990            is ``False``.
1991        keep_zero_dist : bool
1992            Include zero values in minimum distance ``True`` or exclude
1993            ``False``. Default is ``True``. If the source pattern is the
1994            same as the destination pattern the diagonal is filled with
1995            ``numpy.nan``.
1996
1997        Returns
1998        -------
1999        nearest : dict
2000            Nearest neighbor distances keyed by the source point ID with
2001            the value as as tuple of lists containing
2002            nearest destination point ID(s) and distance.
2003
2004        Examples
2005        --------
2006
2007        Instantiate a network.
2008
2009        >>> import spaghetti
2010        >>> from libpysal import examples
2011        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
2012
2013        Snap observations to the network.
2014
2015        >>> ntw.snapobservations(examples.get_path("crimes.shp"), "crimes")
2016
2017        Fetch nearest neighbor distances while (potentially)
2018        keeping neighbors that have been geocoded directly on top of
2019        each other. Here it is demonstrated that observation ``11``
2020        has two neighbors (``18`` and ``19``) at an exactly equal distance.
2021        However, observation ``18`` is shown to have only one neighbor
2022        (``18``) with no distance between them.
2023
2024        >>> nn = ntw.nearestneighbordistances("crimes", keep_zero_dist=True)
2025        >>> nn[11], nn[18]
2026        (([18, 19], 165.33982412719126), ([19], 0.0))
2027
2028        This may be remedied by setting the ``keep_zero_dist`` keyword
2029        argument to ``False``. With this parameter set, observation ``11``
2030        still has the same neighbor/distance values, but
2031        observation ``18`` now has a single nearest neighbor (``11``)
2032        with a non-zero, postive distance.
2033
2034        >>> nn = ntw.nearestneighbordistances("crimes", keep_zero_dist=False)
2035        >>> nn[11], nn[18]
2036        (([18, 19], 165.33982412719126), ([11], 165.33982412719126))
2037
2038        There are valid reasons for both retaining or masking zero distance
2039        neighbors. When conducting analysis, thought must be given as to
2040        which model more accurately represents the specific scenario.
2041
2042        """
2043
2044        # raise exception is the specified point pattern does not exist
2045        if sourcepattern not in self.pointpatterns.keys():
2046            err_msg = "Available point patterns are {}"
2047            raise KeyError(err_msg.format(self.pointpatterns.keys()))
2048
2049        # calculate the network vertex to vertex distance matrix
2050        # if it is not already an attribute
2051        if not hasattr(self, "distance_matrix"):
2052            self.full_distance_matrix(n_processes, gen_tree=gen_tree)
2053
2054        # determine if the source and destination patterns are equal
2055        symmetric = sourcepattern != destpattern
2056
2057        # (for source-to-source patterns) if zero-distance neighbors are
2058        # desired, keep the diagonal as NaN and take the minimum
2059        # distance neighbor(s), which may include zero distance
2060        # neighors.
2061        fill_diagonal = None
2062        if not keep_zero_dist and symmetric:
2063            # (for source-to-source patterns) if zero-distance neighbors
2064            # should be ignored, convert the diagonal to 0.0 and take
2065            # the minimum distance neighbor(s) that is/are not 0.0
2066            # distance.
2067            fill_diagonal = 0.0
2068
2069        # set the source and destination observation point patterns
2070        sourcepattern = self.pointpatterns[sourcepattern]
2071        if destpattern:
2072            destpattern = self.pointpatterns[destpattern]
2073
2074        # if the full source to destination is not calculated,
2075        # do that at this time
2076        if all_dists is None:
2077            all_dists = self.allneighbordistances(
2078                sourcepattern,
2079                destpattern=destpattern,
2080                fill_diagonal=fill_diagonal,
2081                n_processes=n_processes,
2082                gen_tree=gen_tree,
2083                snap_dist=snap_dist,
2084            )
2085
2086        # create empty nearest neighbors lookup
2087        nearest = {}
2088
2089        # iterate over each source point
2090        for source_index in sourcepattern.points.keys():
2091
2092            # this considers all zero-distance neighbors
2093            if keep_zero_dist and symmetric:
2094                val = numpy.nanmin(all_dists[source_index, :])
2095
2096            # this does not consider zero-distance neighbors
2097            else:
2098                val = numpy.min(
2099                    all_dists[source_index, :][
2100                        numpy.nonzero(all_dists[source_index, :])
2101                    ]
2102                )
2103
2104            # nearest destination (may be more than one if
2105            # observations are equal distances away)
2106            dest_idxs = numpy.where(all_dists[source_index, :] == val)[0].tolist()
2107
2108            # set nearest destination point(s) and distance
2109            nearest[source_index] = (dest_idxs, val)
2110
2111        return nearest
2112
2113    def shortest_paths(self, tree, pp_orig, pp_dest=None, n_processes=1):
2114        """Return the shortest paths between observation points as
2115        ``libpysal.cg.Chain`` objects.
2116
2117        Parameters
2118        ----------
2119        tree : dict
2120            See ``tree_nearest`` in
2121            ``spaghetti.Network.allneighbordistances()``.
2122        pp_orig : str
2123            Origin point pattern for shortest paths.
2124            See ``name`` in ``spaghetti.Network.snapobservations()``.
2125        pp_dest : str
2126            Destination point pattern for shortest paths.
2127            See ``name`` in ``spaghetti.Network.snapobservations()``.
2128            Defaults ``pp_orig`` if not declared.
2129        n_processes : int
2130            See ``n_processes`` in ``spaghetti.Network.full_distance_matrix()``.
2131
2132        Returns
2133        -------
2134        paths : list
2135            The shortest paths between observations as geometric objects.
2136            Each element of the list is a list where the first element
2137            is an origin-destination pair tuple and the second
2138            element is a ``libpysal.cg.Chain``.
2139
2140        Raises
2141        ------
2142        AttributeError
2143            This exception is raised when an attempt to extract shortest
2144            path geometries is being made that but the ``network_trees``
2145            attribute does not exist within the network object.
2146
2147        Examples
2148        --------
2149
2150        Instantiate a network.
2151
2152        >>> import spaghetti
2153        >>> from libpysal import examples
2154        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
2155
2156        Snap observations to the network.
2157
2158        >>> ntw.snapobservations(examples.get_path("schools.shp"), "schools")
2159
2160        Create shortest path trees between observations.
2161
2162        >>> _, tree = ntw.allneighbordistances("schools", gen_tree=True)
2163
2164        Generate geometric objects from trees.
2165
2166        >>> paths = ntw.shortest_paths(tree, "schools")
2167
2168        Extract the first path, which is between observations
2169        ``0`` and ``1``.
2170
2171        >>> path = paths[0]
2172        >>> path[0]
2173        (0, 1)
2174
2175        The are ``n`` vertices in the path between observations
2176        ``0`` and ``1``.
2177
2178        >>> n = len(path[1].vertices)
2179        >>> n
2180        10
2181
2182        """
2183
2184        # build the network trees object if it is not already an attribute
2185        if not hasattr(self, "network_trees"):
2186            msg = "The 'network_trees' attribute has not been created. "
2187            msg += "Rerun 'spaghetti.Network.allneighbordistances()' "
2188            msg += "with the 'gen_tree' parameter set to 'True'."
2189            raise AttributeError(msg)
2190
2191        # isolate network attributes
2192        pp_orig = self.pointpatterns[pp_orig]
2193        if pp_dest:
2194            pp_dest = self.pointpatterns[pp_dest]
2195        else:
2196            pp_dest = pp_orig
2197        vtx_coords = self.vertex_coords
2198        net_trees = self.network_trees
2199
2200        # instantiate a list to store paths
2201        paths = []
2202
2203        # iterate over each path in the tree
2204        for idx, ((obs0, obs1), (v0, v1)) in enumerate(tree.items()):
2205
2206            # if the observations share the same segment
2207            # create a partial segment path
2208            if (v0, v1) == SAME_SEGMENT:
2209                # isolate the snapped coordinates and put in a list
2210                partial_segment_verts = [
2211                    cg.Point(pp_orig.snapped_coordinates[obs0]),
2212                    cg.Point(pp_dest.snapped_coordinates[obs1]),
2213                ]
2214                path = partial_segment_verts
2215
2216            else:
2217                # source and destination network vertices
2218                svtx, dvtx = tree[obs0, obs1]
2219
2220                # path passes through these nodes
2221                # (source and destination inclusive)
2222                thru_nodes = net_trees[svtx][dvtx][::-1] + [dvtx]
2223
2224                # full-length network segments along path
2225                full_segs_path = []
2226                iter_limit = len(thru_nodes) - 1
2227                for _idx, item in enumerate(islice(thru_nodes, iter_limit)):
2228                    full_segs_path.append((item, thru_nodes[_idx + 1]))
2229
2230                # create copy of arc paths dataframe
2231                full_segments = []
2232                for fsp in full_segs_path:
2233                    full_segments.append(util._chain_constr(vtx_coords, fsp))
2234
2235                # unpack the vertices containers
2236                segm_verts = [v for fs in full_segments for v in fs.vertices]
2237
2238                # remove duplicate vertices
2239                for idx, v in enumerate(segm_verts):
2240                    try:
2241                        if v == segm_verts[idx + 1]:
2242                            segm_verts.remove(v)
2243                    except IndexError as e:
2244                        if e.args[0] == "list index out of range":
2245                            continue
2246                        else:
2247                            raise
2248
2249                # partial-length network segments along path
2250                partial_segment_verts = [
2251                    cg.Point(pp_orig.snapped_coordinates[obs0]),
2252                    cg.Point(pp_dest.snapped_coordinates[obs1]),
2253                ]
2254
2255                # combine the full and partial segments into a single list
2256                first_vtx, last_vtx = partial_segment_verts
2257                path = [first_vtx] + segm_verts + [last_vtx]
2258
2259            # populate the ``paths`` dataframe
2260            paths.append([(obs0, obs1), util._chain_constr(None, path)])
2261
2262        return paths
2263
2264    def split_arcs(self, split_param, split_by="distance", w_components=True):
2265        """Split all network arcs at either a fixed distance or fixed count.
2266
2267        Parameters
2268        -----------
2269        split_param : {int, float}
2270            Either the number of desired resultant split arcs or
2271            the distance at which arcs are split.
2272        split_by : str
2273            Either ``'distance'`` or ``'count'``. Default is ``'distance'``.
2274        w_components : bool
2275            Set to ``False`` to not record connected components from a
2276            ``libpysal.weights.W`` object. Default is ``True``.
2277
2278        Returns
2279        -------
2280        split_network : spaghetti.Network
2281            A newly instantiated ``spaghetti.Network`` object.
2282
2283        Examples
2284        --------
2285
2286        Instantiate a network.
2287
2288        >>> import spaghetti
2289        >>> from libpysal import examples
2290        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
2291
2292        Split the network into a segments of 200 distance units in length
2293        (`US feet in this case <https://github.com/pysal/libpysal/blob/master/libpysal/examples/geodanet/streets.prj>`_.).
2294        This will include "remainder" segments unless the network is
2295        comprised of arcs with lengths exactly divisible by ``distance``.
2296
2297        >>> n200 = ntw.split_arcs(200.0)
2298        >>> len(n200.arcs)
2299        688
2300
2301        The number of arcs within the new object can be accessed via the
2302        weights object, as well. These counts will be equal.
2303
2304        >>> len(n200.arcs) == n200.w_network.n
2305        True
2306
2307        Neighboring arcs can also be queried through the weight object.
2308
2309        >>> n200.w_network.neighbors[72,392]
2310        [(71, 72), (72, 252), (72, 391), (392, 393)]
2311
2312        Network arcs can also be split by a specified number of divisions with
2313        the ``split_by`` keyword set to ``'count'``, which is ``'distance'`` by
2314        default. For example, each arc can be split into 2 equal parts.
2315
2316        >>> n2 = ntw.split_arcs(2, split_by="count")
2317        >>> len(n2.arcs)
2318        606
2319
2320        """
2321
2322        # catch invalid split types
2323        split_by = split_by.lower()
2324        valid_split_types = ["distance", "count"]
2325        if split_by not in valid_split_types:
2326            msg = f"'{split_by}' is not a valid value for 'split_by'. "
2327            msg += f"Valid arguments include: {valid_split_types}."
2328            raise ValueError(msg)
2329
2330        # catch invalid count params
2331        if split_by == "count":
2332            if split_param <= 1:
2333                msg = "Splitting arcs by 1 or less is not possible. "
2334                msg += f"Currently 'split_param' is set to {split_param}."
2335                raise ValueError(msg)
2336            split_integer = int(split_param)
2337            if split_param != split_integer:
2338                msg = "Network arcs must split by an integer. "
2339                msg += f"Currently 'split_param' is set to {split_param}."
2340                raise TypeError(msg)
2341
2342        # convert coordinates for integers if possible
2343        # e.g., (1.0, 0.5) --> (1, 0.5)
2344        int_coord = lambda c: int(c) if (type(c) == float and c.is_integer()) else c
2345
2346        # create new shell network instance
2347        split_network = Network()
2348
2349        # duplicate input network attributes
2350        split_network.adjacencylist = copy.deepcopy(self.adjacencylist)
2351        split_network.arc_lengths = copy.deepcopy(self.arc_lengths)
2352        split_network.arcs = copy.deepcopy(self.arcs)
2353        split_network.vertex_coords = copy.deepcopy(self.vertex_coords)
2354        split_network.vertex_list = copy.deepcopy(self.vertex_list)
2355        split_network.vertices = copy.deepcopy(self.vertices)
2356        split_network.pointpatterns = copy.deepcopy(self.pointpatterns)
2357        split_network.in_data = self.in_data
2358
2359        # set vertex ID to start iterations
2360        current_vertex_id = max(self.vertices.values())
2361
2362        # instantiate sets for newly created network arcs and
2363        # input network arcs to remove
2364        new_arcs = set()
2365        remove_arcs = set()
2366
2367        # iterate over all network arcs
2368        for arc in split_network.arcs:
2369
2370            # fetch network arc length
2371            length = split_network.arc_lengths[arc]
2372
2373            # set initial segmentation interval
2374            if split_by == "distance":
2375                interval = split_param
2376            else:
2377                interval = length / float(split_param)
2378
2379            # initialize arc new arc length at zero
2380            totallength = 0
2381
2382            # initialize the current vertex and ending vertex
2383            currentstart, end_vertex = arc[0], arc[1]
2384
2385            # determine direction of arc vertices
2386            csx, csy = split_network.vertex_coords[currentstart]
2387            evx, evy = split_network.vertex_coords[end_vertex]
2388            if csy > evy and csx == evx:
2389                currentstart, end_vertex = end_vertex, currentstart
2390
2391            # if the arc will be split remove the current
2392            # arc from the adjacency list
2393            if interval < length:
2394
2395                # remove old arc adjacency information
2396                split_network.adjacencylist[currentstart].remove(end_vertex)
2397                split_network.adjacencylist[end_vertex].remove(currentstart)
2398
2399                # remove old arc length information
2400                split_network.arc_lengths.pop(arc, None)
2401
2402                # add old arc to set of arcs to remove
2403                remove_arcs.add(arc)
2404
2405            # if the arc will not be split, do nothing and continue
2406            else:
2407                continue
2408
2409            # traverse the length of the arc
2410            while totallength < length:
2411
2412                # once an arc can not be split further
2413                if totallength + interval >= length:
2414                    # record the ending vertex
2415                    currentstop = end_vertex
2416                    # set the length remainder
2417                    interval = length - totallength
2418                    # full old length reached
2419                    totallength = length
2420
2421                else:
2422                    # set the current vertex ID
2423                    current_vertex_id += 1
2424                    # set the current stopping ID
2425                    currentstop = current_vertex_id
2426                    # add the interval distance to the traversed length
2427                    totallength += interval
2428
2429                    # compute the new vertex coordinate
2430                    newx, newy = self._newpoint_coords(arc, totallength)
2431                    new_vertex = (int_coord(newx), int_coord(newy))
2432
2433                    # update the vertex and coordinate info if needed
2434                    if new_vertex not in split_network.vertices.keys():
2435                        split_network.vertices[new_vertex] = currentstop
2436                        split_network.vertex_coords[currentstop] = new_vertex
2437                        split_network.vertex_list.append(currentstop)
2438                    else:
2439                        # retrieve vertex ID if coordinate already exists
2440                        current_vertex_id -= 1
2441                        currentstop = split_network.vertices[new_vertex]
2442
2443                # update the new network adjacency list
2444                split_network.adjacencylist[currentstart].append(currentstop)
2445                split_network.adjacencylist[currentstop].append(currentstart)
2446
2447                # add the new arc to the arc dictionary
2448                # iterating over this so we need to add after iterating
2449                _new_arc = tuple(sorted([currentstart, currentstop]))
2450                new_arcs.add(_new_arc)
2451
2452                # set the length of the arc
2453                split_network.arc_lengths[_new_arc] = interval
2454
2455                # increment the starting vertex to the stopping vertex
2456                currentstart = currentstop
2457
2458        # add the newly created arcs to the network and remove the old arcs
2459        split_network.arcs = set(split_network.arcs)
2460        split_network.arcs.update(new_arcs)
2461        split_network.arcs.difference_update(remove_arcs)
2462        split_network.arcs = sorted(list(split_network.arcs))
2463
2464        # extract connected components
2465        if w_components:
2466
2467            # extract contiguity weights from libpysal
2468            split_network.w_network = split_network.contiguityweights(
2469                graph=False, from_split=True
2470            )
2471            # identify connected components from the `w_network`
2472            split_network.identify_components(split_network.w_network, graph=False)
2473
2474        # update the snapped point pattern
2475        for instance in split_network.pointpatterns.values():
2476            split_network._snap_to_link(instance)
2477
2478        return split_network
2479
2480    def GlobalAutoK(
2481        self,
2482        pointpattern,
2483        nsteps=10,
2484        permutations=99,
2485        threshold=0.5,
2486        distribution="uniform",
2487        upperbound=None,
2488    ):
2489        r"""Compute a global auto :math:`K`-function based on a network constrained
2490        cost matrix through `Monte Carlo simulation <https://en.wikipedia.org/wiki/Monte_Carlo_method>`_
2491        according to the formulation adapted from
2492        :cite:`doi:10.1002/9780470549094.ch5`. See the **Notes**
2493        section for further description.
2494
2495        Parameters
2496        ----------
2497        pointpattern : spaghetti.PointPattern
2498            A ``spaghetti`` point pattern object.
2499        nsteps : int
2500            The number of steps at which the count of the nearest
2501            neighbors is computed. Default is ``10``.
2502        permutations : int
2503            The number of permutations to perform. Default is ``99``.
2504        threshold : float
2505            The level at which significance is computed.
2506            (0.5 would be 97.5% and 2.5%). Default is ``0.5``.
2507        distribution : str
2508            The distribution from which random points are sampled.
2509            Currently, the only supported distribution is ``'uniform'``.
2510        upperbound : float
2511            The upper bound at which the :math:`K`-function is computed.
2512            Defaults to the maximum observed nearest neighbor distance.
2513
2514        Returns
2515        -------
2516        GlobalAutoK : spaghetti.analysis.GlobalAutoK
2517            The global auto :math:`K`-function class instance.
2518
2519        Notes
2520        -----
2521
2522        The :math:`K`-function can be formulated as:
2523
2524        .. math::
2525
2526           \displaystyle K(r)=\frac{\sum^n_{i=1} \#[\hat{A} \in D(a_i, r)]}{n\lambda},
2527
2528        where $n$ is the set cardinality of :math:`A`, :math:`\hat{A}` is the subset of
2529        observations in :math:`A` that are within :math:`D` units of distance from :math:`a_i`
2530        (each single observation in :math:`A`), and :math:`r` is the range of distance
2531        values over which the :math:`K`-function is calculated. The :math:`\lambda` term
2532        is the intensity of observations along the network, calculated as:
2533
2534        .. math::
2535
2536           \displaystyle \lambda = \frac{n}{\big|N_{arcs}\big|},
2537
2538        where :math:`\big|N_{arcs}\big|` is the summed length of network arcs.
2539        The global auto :math:`K`-function measures overall clustering in one set of
2540        observations by comparing all intra-set distances over a range of
2541        distance buffers :math:`D \in r`. The :math:`K`-function improves upon
2542        nearest-neighbor distance measures through the analysis of all neighbor
2543        distances. For an explanation on how to interpret the results of the
2544        :math:`K`-function see the `Network Spatial Dependence tutorial <https://pysal.org/spaghetti/notebooks/network-spatial-dependence.html>`_.
2545
2546        For original implementation see :cite:`Ripley1976`
2547        and :cite:`Ripley1977`.
2548        For further Network-`K` formulations see
2549        :cite:`doi:10.1111/j.1538-4632.2001.tb00448.x`,
2550        :cite:`doi:10.1002/9781119967101.ch6`, and
2551        :cite:`Baddeley2020`.
2552
2553        See also
2554        --------
2555
2556        pointpats.K
2557
2558        Examples
2559        --------
2560
2561        Create a network instance.
2562
2563        >>> import spaghetti
2564        >>> from libpysal import examples
2565        >>> ntw = spaghetti.Network(in_data=examples.get_path("streets.shp"))
2566
2567        Snap observation points onto the network.
2568
2569        >>> pt_str = "schools"
2570        >>> in_data = examples.get_path(pt_str+".shp")
2571        >>> ntw.snapobservations(in_data, pt_str, attribute=True)
2572        >>> schools = ntw.pointpatterns[pt_str]
2573
2574        Compute a :math:`K`-function from school observations
2575        with ``99`` ``permutations`` at ``10`` intervals.
2576
2577        >>> kres = ntw.GlobalAutoK(schools, permutations=99, nsteps=10)
2578        >>> kres.lowerenvelope.shape[0]
2579        10
2580
2581        """
2582
2583        # call analysis.GlobalAutoK
2584        return GlobalAutoK(
2585            self,
2586            pointpattern,
2587            nsteps=nsteps,
2588            permutations=permutations,
2589            threshold=threshold,
2590            distribution=distribution,
2591            upperbound=upperbound,
2592        )
2593
2594    def Moran(self, pp_name, permutations=999, graph=False):
2595        """Calculate a Moran's *I* statistic on a set of observations
2596        based on network arcs. The Moran’s *I* test statistic allows
2597        for the inference of how clustered (or dispersed) a dataset is
2598        while considering both attribute values and spatial relationships.
2599        A value of closer to +1 indicates absolute clustering while a
2600        value of closer to -1 indicates absolute dispersion. Complete
2601        spatial randomness takes the value of 0. See the
2602        `esda documentation <https://pysal.org/esda/generated/esda.Moran.html#esda.Moran>`_
2603        for in-depth descriptions and tutorials.
2604
2605        Parameters
2606        ----------
2607        pp_name : str
2608            The name of the point pattern in question.
2609        permutations : int
2610            The number of permutations to perform. Default is ``999``.
2611        graph : bool
2612            Perform the Moran calculation on the graph `W` object
2613            (``True``). Default is ``False``, which performs the
2614            Moran calculation on the network `W` object.
2615
2616        Returns
2617        -------
2618        moran : esda.Moran
2619            A Moran's *I* statistic object results.
2620        y : list
2621            The y-axis (counts).
2622
2623        Examples
2624        --------
2625
2626        Create a network instance.
2627
2628        >>> import spaghetti
2629        >>> from libpysal import examples
2630        >>> ntw = spaghetti.Network(in_data=examples.get_path("streets.shp"))
2631
2632        Snap observation points onto the network.
2633
2634        >>> crimes = "crimes"
2635        >>> in_data = examples.get_path(crimes+".shp")
2636        >>> ntw.snapobservations(in_data, crimes, attribute=True)
2637
2638        Compute a Moran's :math:`I` from crime observations.
2639
2640        >>> moran_res, _ = ntw.Moran(crimes)
2641        >>> round(moran_res.I, 6)
2642        0.005193
2643
2644        Notes
2645        -----
2646
2647        See :cite:`moran:_cliff81` and :cite:`esda:_2019` for more details.
2648
2649        """
2650
2651        # set proper weights attribute
2652        if graph:
2653            w = self.w_graph
2654        else:
2655            w = self.w_network
2656
2657        # Compute the counts
2658        pointpat = self.pointpatterns[pp_name]
2659        counts = self.count_per_link(pointpat.obs_to_arc, graph=graph)
2660
2661        # Build the y vector
2662        y = [counts[i] if i in counts else 0.0 for i in w.neighbors]
2663
2664        # Moran's I
2665        moran = esda.moran.Moran(y, w, permutations=permutations)
2666
2667        return moran, y
2668
2669    def savenetwork(self, filename):
2670        """Save a network to disk as a binary file.
2671
2672        Parameters
2673        ----------
2674        filename : str
2675            The filename where the network should be saved. This should
2676            be a full path or it will be saved in the current directory.
2677
2678        Examples
2679        --------
2680
2681        Create a network instance.
2682
2683        >>> import spaghetti
2684        >>> from libpysal import examples
2685        >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
2686
2687        Save out the network instance.
2688
2689        >>> ntw.savenetwork("mynetwork.pkl")
2690
2691        """
2692
2693        with open(filename, "wb") as networkout:
2694            pickle.dump(self, networkout, protocol=2)
2695
2696    @staticmethod
2697    def loadnetwork(filename):
2698        """Load a network from a binary file saved on disk.
2699
2700        Parameters
2701        ----------
2702        filename : str
2703            The filename where the network is saved.
2704
2705        Returns
2706        -------
2707        self : spaghetti.Network
2708            A pre-computed ``spaghetti`` network object.
2709
2710        """
2711
2712        with open(filename, "rb") as networkin:
2713            self = pickle.load(networkin)
2714
2715        return self
2716
2717
2718def extract_component(net, component_id, weightings=None):
2719    """Extract a single component from a network object.
2720
2721    Parameters
2722    ----------
2723    net : spaghetti.Network
2724        Full network object.
2725    component_id : int
2726        The ID of the desired network component.
2727    weightings : {dict, bool}
2728        See the ``weightings`` keyword argument in ``spaghetti.Network``.
2729
2730    Returns
2731    -------
2732    cnet : spaghetti.Network
2733        The pruned network containing the component specified in
2734        ``component_id``.
2735
2736    Notes
2737    -----
2738
2739    Point patterns are not reassigned when extracting a component. Therefore,
2740    component extraction should be performed prior to snapping any point
2741    sets onto the network. Also, if the ``spaghetti.Network`` object
2742    has ``distance_matrix`` or ``network_trees`` attributes, they are
2743    deleted and must be computed again on the single component.
2744
2745    Examples
2746    --------
2747
2748    Instantiate a network object.
2749
2750    >>> from libpysal import examples
2751    >>> import spaghetti
2752    >>> snow_net = examples.get_path("Soho_Network.shp")
2753    >>> ntw = spaghetti.Network(in_data=snow_net, extractgraph=False)
2754
2755    The network is not fully connected.
2756
2757    >>> ntw.network_fully_connected
2758    False
2759
2760    Examine the number of network components.
2761
2762    >>> ntw.network_n_components
2763    45
2764
2765    Extract the longest component.
2766
2767    >>> longest = spaghetti.extract_component(ntw, ntw.network_longest_component)
2768    >>> longest.network_n_components
2769    1
2770    >>> longest.network_component_lengths
2771    {0: 13508.169276875526}
2772
2773    """
2774
2775    def _reassign(attr, cid):
2776        """Helper for reassigning attributes."""
2777
2778        # set for each attribute(s)
2779        if attr == "_fully_connected":
2780            _val = [True for objt in obj_type]
2781            attr = [objt + attr for objt in obj_type]
2782        elif attr == "_n_components":
2783            _val = [1 for objt in obj_type]
2784            attr = [objt + attr for objt in obj_type]
2785        elif attr in ["_longest_component", "_largest_component"]:
2786            _val = [cid for objt in obj_type]
2787            attr = [objt + attr for objt in obj_type]
2788        elif attr == "vertex_list":
2789            # reassigns vertex list + network, graph component vertices
2790            supp = [objt + "_component_vertices" for objt in obj_type]
2791            _val = [getattr(cnet, supp[0])[cid]]
2792            _val += [{cid: getattr(cnet, s)[cid]} for s in supp]
2793            attr = [attr] + supp
2794        elif attr == "vertex_coords":
2795            # reassigns both vertex_coords and vertices
2796            supp = getattr(cnet, "vertex_list")
2797            _val = [{k: v for k, v in getattr(cnet, attr).items() if k in supp}]
2798            _val += [{v: k for k, v in _val[0].items()}]
2799            attr = [attr, "vertices"]
2800        elif attr == "_component_vertex_count":
2801            # reassigns both network and graph _component_vertex_count
2802            supp = len(getattr(cnet, "vertex_list"))
2803            _val = [{cid: supp} for objt in obj_type]
2804            attr = [objt + attr for objt in obj_type]
2805        elif attr == "adjacencylist":
2806            supp_adj = copy.deepcopy(list(getattr(cnet, attr).keys()))
2807            supp_vtx = getattr(cnet, "vertex_list")
2808            supp_rmv = [v for v in supp_adj if v not in supp_vtx]
2809            [getattr(cnet, attr).pop(s) for s in supp_rmv]
2810            return
2811        elif attr == "_component_is_ring":
2812            # reassigns both network and graph _component_is_ring
2813            supp = [getattr(cnet, objt + attr) for objt in obj_type]
2814            _val = [{cid: s[cid]} for s in supp]
2815            attr = [objt + attr for objt in obj_type]
2816        elif attr == "non_articulation_points":
2817            supp_vtx = getattr(cnet, "vertex_list")
2818            _val = [[s for s in getattr(cnet, attr) if s in supp_vtx]]
2819            attr = [attr]
2820        elif attr == "_component2":
2821            # reassigns both network and graph _component2 attributes
2822            supp = [_n + "_component2" + _a]
2823            if hasgraph:
2824                supp += [_g + "_component2" + _e]
2825            _val = [{cid: getattr(cnet, s)[cid]} for s in supp]
2826            attr = supp
2827        elif attr == "arcs":
2828            # reassigns both arcs and edges
2829            c2 = "_component2"
2830            supp = [_n + c2 + _a]
2831            if hasgraph:
2832                supp += [_g + c2 + _e]
2833            _val = [getattr(cnet, s)[cid] for s in supp]
2834            attr = [attr]
2835            if hasgraph:
2836                attr += ["edges"]
2837        elif attr == "_component_labels":
2838            # reassigns both network and graph _component_labels
2839            supp = [len(getattr(cnet, o + "s")) for o in obj]
2840            _val = [numpy.array([cid] * s) for s in supp]
2841            attr = [objt + attr for objt in obj_type]
2842        elif attr == "_component_lengths":
2843            # reassigns both network and graph _component_lengths
2844            supp = [objt + attr for objt in obj_type]
2845            _val = [{cid: getattr(cnet, s)[cid]} for s in supp]
2846            attr = supp
2847        elif attr == "_lengths":
2848            # reassigns both arc and edge _lengths
2849            supp_name = [o + attr for o in obj]
2850            supp_lens = [getattr(cnet, s) for s in supp_name]
2851            supp_link = [getattr(cnet, o + "s") for o in obj]
2852            supp_ll = list(zip(supp_lens, supp_link))
2853            _val = [{k: v for k, v in l1.items() if k in l2} for l1, l2 in supp_ll]
2854            attr = supp_name
2855
2856        # reassign attributes
2857        for a, av in zip(attr, _val):
2858            setattr(cnet, a, av)
2859
2860    # provide warning (for now) if the network contains a point pattern
2861    if getattr(net, "pointpatterns"):
2862        msg = "There is a least one point pattern associated with the network."
2863        msg += " Component extraction should be performed prior to snapping"
2864        msg += " point patterns to the network object; failing to do so may"
2865        msg += " lead to unexpected results."
2866        warnings.warn(msg)
2867    # provide warning (for now) if the network contains a point pattern
2868    dm, nt = "distance_matrix", "network_trees"
2869    if hasattr(net, dm) or hasattr(net, nt):
2870        msg = "Either one or both (%s, %s) attributes" % (dm, nt)
2871        msg += " are present and will be deleted. These must be"
2872        msg += " recalculated following component extraction."
2873        warnings.warn(msg)
2874        for attr in [dm, nt]:
2875            if hasattr(net, attr):
2876                _attr = getattr(net, attr)
2877                del _attr
2878
2879    # make initial copy of the network
2880    cnet = copy.deepcopy(net)
2881
2882    # set labels
2883    _n, _a, _g, _e = "network", "arc", "graph", "edge"
2884    obj_type = [_n]
2885    obj = [_a]
2886    hasgraph = False
2887    if hasattr(cnet, "w_graph"):
2888        obj_type += [_g]
2889        obj += [_e]
2890        hasgraph = True
2891
2892    # attributes to reassign
2893    update_attributes = [
2894        "_fully_connected",
2895        "_n_components",
2896        "_longest_component",
2897        "_largest_component",
2898        "vertex_list",
2899        "vertex_coords",
2900        "_component_vertex_count",
2901        "adjacencylist",
2902        "_component_is_ring",
2903        "_component2",
2904        "arcs",
2905        "_component_lengths",
2906        "_lengths",
2907        "_component_labels",
2908    ]
2909    if hasgraph:
2910        update_attributes.append("non_articulation_points")
2911
2912    # reassign attributes
2913    for attribute in update_attributes:
2914        _reassign(attribute, component_id)
2915
2916    # recreate spatial weights
2917    cnet.w_network = cnet.contiguityweights(graph=False, weightings=weightings)
2918    if hasgraph:
2919        cnet.w_graph = cnet.contiguityweights(graph=True, weightings=weightings)
2920
2921    return cnet
2922
2923
2924def spanning_tree(net, method="sort", maximum=False, silence_warnings=True):
2925    """Extract a minimum or maximum spanning tree from a network.
2926
2927    Parameters
2928    ----------
2929    net : spaghetti.Network
2930        Instance of a network object.
2931    method : str
2932        Method for determining spanning tree. Currently, the only
2933        supported method is 'sort', which sorts the network arcs
2934        by length prior to building intermediary networks and checking
2935        for cycles within the tree/subtrees. Future methods may
2936        include linear programming approachs, etc.
2937    maximum : bool
2938        When ``True`` a maximum spanning tree is created. When ``False``
2939        a minimum spanning tree is created. Default is ``False``.
2940    silence_warnings : bool
2941        Warn if there is more than one connected component. Default is
2942        ``False`` due to the nature of constructing a minimum
2943        spanning tree.
2944
2945    Returns
2946    -------
2947    net : spaghetti.Network
2948        Pruned instance of the network object.
2949
2950    Notes
2951    -----
2952
2953    For in-depth background and details see
2954    :cite:`GrahamHell_1985`,
2955    :cite:`AhujaRavindraK`, and
2956    :cite:`Okabe2012`.
2957
2958    See also
2959    --------
2960
2961    networkx.algorithms.tree.mst
2962    scipy.sparse.csgraph.minimum_spanning_tree
2963
2964    Examples
2965    --------
2966
2967    Create a network instance.
2968
2969    >>> from libpysal import cg
2970    >>> import spaghetti
2971    >>> p00 = cg.Point((0,0))
2972    >>> lines = [cg.Chain([p00, cg.Point((0,3)), cg.Point((4,0)), p00])]
2973    >>> ntw = spaghetti.Network(in_data=lines)
2974
2975    Extract the minimum spanning tree.
2976
2977    >>> minst_net = spaghetti.spanning_tree(ntw)
2978    >>> min_len = sum(minst_net.arc_lengths.values())
2979    >>> min_len
2980    7.0
2981
2982    Extract the maximum spanning tree.
2983
2984    >>> maxst_net = spaghetti.spanning_tree(ntw, maximum=True)
2985    >>> max_len = sum(maxst_net.arc_lengths.values())
2986    >>> max_len
2987    9.0
2988
2989    >>> max_len > min_len
2990    True
2991
2992    """
2993
2994    # (un)silence warning
2995    weights_kws = {"silence_warnings": silence_warnings}
2996    # do not extract graph object while testing for cycles
2997    net_kws = {"extractgraph": False, "weights_kws": weights_kws}
2998
2999    # if the network has no cycles, it is already a spanning tree
3000    if util.network_has_cycle(net.adjacencylist):
3001
3002        if method.lower() == "sort":
3003            spanning_tree = mst_weighted_sort(net, maximum, net_kws)
3004        else:
3005            msg = "'%s' not a valid method for minimum spanning tree creation"
3006            raise ValueError(msg % method)
3007
3008        # instantiate the spanning tree as a network object
3009        net = Network(in_data=spanning_tree, weights_kws=weights_kws)
3010
3011    return net
3012
3013
3014def mst_weighted_sort(net, maximum, net_kws):
3015    """Extract a minimum or maximum spanning tree from a network used
3016    the length-weighted sort method.
3017
3018    Parameters
3019    ----------
3020    net : spaghetti.Network
3021        See ``spanning_tree()``.
3022    maximum : bool
3023        See ``spanning_tree()``.
3024    net_kws : dict
3025        Keywords arguments for instaniating a ``spaghetti.Network``.
3026
3027    Returns
3028    -------
3029    spanning_tree : list
3030        All networks arcs that are members of the spanning tree.
3031
3032    Notes
3033    -----
3034
3035    This function is based on the method found in Chapter 3
3036    Section 4.3 of :cite:`Okabe2012`.
3037
3038    """
3039
3040    # network arcs dictionary sorted by arc length
3041    sort_kws = {"key": net.arc_lengths.get, "reverse": maximum}
3042    sorted_lengths = sorted(net.arc_lengths, **sort_kws)
3043
3044    # the spanning tree is initially empty
3045    spanning_tree = []
3046
3047    # iterate over each lengths of network arc
3048    while sorted_lengths:
3049        _arc = sorted_lengths.pop(0)
3050        # make a spatial representation of an arc
3051        chain_rep = util.chain_constr(net.vertex_coords, [_arc])
3052        # current set of network arcs as libpysal.cg.Chain
3053        _chains = spanning_tree + chain_rep
3054        # current network iteration
3055        _ntw = Network(in_data=_chains, **net_kws)
3056        # determine if the network contains a cycle
3057        if not util.network_has_cycle(_ntw.adjacencylist):
3058            # If no cycle is present, add the arc to the spanning tree
3059            spanning_tree.extend(chain_rep)
3060
3061    return spanning_tree
3062
3063
3064@requires("geopandas", "shapely")
3065def element_as_gdf(
3066    net,
3067    vertices=False,
3068    arcs=False,
3069    pp_name=None,
3070    snapped=False,
3071    routes=None,
3072    id_col="id",
3073    geom_col="geometry",
3074):
3075    """Return a ``geopandas.GeoDataFrame`` of network elements. This can be
3076    (a) the vertices of a network; (b) the arcs of a network; (c) both the
3077    vertices and arcs of the network; (d) the raw point pattern associated
3078    with the network; (e) the snapped point pattern of (d); or (f) the
3079    shortest path routes between point observations.
3080
3081    Parameters
3082    ----------
3083    net : spaghetti.Network
3084        A `spaghetti` network object.
3085    vertices : bool
3086        Extract the network vertices (``True``). Default is ``False``.
3087    arcs : bool
3088        Extract the network arcs (``True``). Default is ``False``.
3089    pp_name : str
3090        Name of the ``network.PointPattern`` to extract.
3091        Default is ``None``.
3092    snapped : bool
3093        If extracting a ``network.PointPattern``, set to ``True`` for
3094        snapped point locations along the network. Default is ``False``.
3095    routes : dict
3096        See ``paths`` from ``spaghetti.Network.shortest_paths``.
3097        Default is ``None``.
3098    id_col : str
3099        ``geopandas.GeoDataFrame`` column name for IDs. Default is ``"id"``.
3100        When extracting routes this creates an (origin, destination) tuple.
3101    geom_col : str
3102        ``geopandas.GeoDataFrame`` column name for geometry. Default is
3103        ``"geometry"``.
3104
3105    Raises
3106    ------
3107    KeyError
3108        In order to extract a ``network.PointPattern`` it must already
3109        be a part of the network object. This exception is raised
3110        when a ``network.PointPattern`` is being extracted that does
3111        not exist within the network object.
3112
3113    Returns
3114    -------
3115    points : geopandas.GeoDataFrame
3116        Network point elements (either vertices or ``network.PointPattern``
3117        points) as a ``geopandas.GeoDataFrame`` of ``shapely.geometry.Point``
3118        objects with an ``"id"`` column and ``"geometry""`` column.
3119        If the network object has a ``network_component_vertices`` attribute,
3120        then component labels are also added in a column.
3121    lines : geopandas.GeoDataFrame
3122        Network arc elements as a ``geopandas.GeoDataFrame`` of
3123        ``shapely.geometry.LineString`` objects with an ``"id"``
3124        column and ``"geometry"`` column. If the network object has
3125        a ``network_component_labels`` attribute, then component labels
3126        are also added in a column.
3127    paths : geopandas.GeoDataFrame
3128        Shortest path routes along network arc elements as a
3129        ``geopandas.GeoDataFrame`` of ``shapely.geometry.LineString``
3130        objects with an ``"id"`` (see ``spaghetti.Network.shortest_paths()``)
3131        column and ``"geometry"`` column.
3132
3133    Notes
3134    -----
3135
3136    When both network vertices and arcs are desired, the variable
3137    declaration must be in the order: <vertices>, <arcs>.
3138    This function requires ``geopandas``.
3139
3140    See also
3141    --------
3142
3143    geopandas.GeoDataFrame
3144
3145    Examples
3146    --------
3147
3148    Instantiate a network object.
3149
3150    >>> import spaghetti
3151    >>> from libpysal import examples
3152    >>> ntw = spaghetti.Network(examples.get_path("streets.shp"))
3153
3154    Extract the network elements (vertices and arcs) as
3155    ``geopandas.GeoDataFrame`` objects.
3156
3157    >>> vertices_df, arcs_df = spaghetti.element_as_gdf(
3158    ...     ntw, vertices=True, arcs=True
3159    ... )
3160
3161    Examine the first vertex. It is a member of the component labeled ``0``.
3162
3163    >>> vertices_df.loc[0]
3164    id                                            0
3165    geometry      POINT (728368.04762 877125.89535)
3166    comp_label                                    0
3167    Name: 0, dtype: object
3168
3169    Calculate the total length of the network.
3170
3171    >>> arcs_df.geometry.length.sum()
3172    104414.09200823458
3173
3174    """
3175
3176    # shortest path routes between observations
3177    if routes:
3178        paths = util._routes_as_gdf(routes, id_col, geom_col)
3179        return paths
3180
3181    # need vertices place holder to create network segment LineStrings
3182    # even if only network edges are desired.
3183    vertices_for_arcs = False
3184    if arcs and not vertices:
3185        vertices_for_arcs = True
3186
3187    # vertices/nodes/points
3188    if vertices or vertices_for_arcs or pp_name:
3189        points = util._points_as_gdf(
3190            net,
3191            vertices,
3192            vertices_for_arcs,
3193            pp_name,
3194            snapped,
3195            id_col=id_col,
3196            geom_col=geom_col,
3197        )
3198
3199        # return points geodataframe if arcs not specified or
3200        # if extracting `PointPattern` points
3201        if not arcs or pp_name:
3202            return points
3203
3204    # arcs
3205    arcs = util._arcs_as_gdf(net, points, id_col=id_col, geom_col=geom_col)
3206
3207    if vertices_for_arcs:
3208        return arcs
3209
3210    else:
3211        return points, arcs
3212
3213
3214def regular_lattice(bounds, nh, nv=None, exterior=False):
3215    """Generate a regular lattice of line segments
3216    (`libpysal.cg.Chain objects <https://pysal.org/libpysal/generated/libpysal.cg.Chain.html#libpysal.cg.Chain>`_).
3217
3218    Parameters
3219    ----------
3220    bounds : {tuple, list}
3221        Area bounds in the form - <minx,miny,maxx,maxy>.
3222    nh : int
3223        The number of internal horizontal lines of the lattice.
3224    nv : int
3225        The number of internal vertical lines of the lattice. Defaults to
3226        ``nh`` if left as None.
3227    exterior : bool
3228        Flag for including the outer bounding box segments. Default is False.
3229
3230    Returns
3231    -------
3232    lattice : list
3233        The ``libpysal.cg.Chain`` objects forming a regular lattice.
3234
3235    Notes
3236    -----
3237
3238    The ``nh`` and ``nv`` parameters do not include the external
3239    line segments. For example, setting ``nh=3, nv=2, exterior=True``
3240    will result in 5 horizontal line sets and 4 vertical line sets.
3241
3242    Examples
3243    --------
3244
3245    Create a 5x5 regular lattice with an exterior
3246
3247    >>> import spaghetti
3248    >>> lattice = spaghetti.regular_lattice((0,0,4,4), 3, exterior=True)
3249    >>> lattice[0].vertices
3250    [(0.0, 0.0), (1.0, 0.0)]
3251
3252    Create a 5x5 regular lattice without an exterior
3253
3254    >>> lattice = spaghetti.regular_lattice((0,0,5,5), 3, exterior=False)
3255    >>> lattice[-1].vertices
3256    [(3.75, 3.75), (3.75, 5.0)]
3257
3258    Create a 7x9 regular lattice with an exterior from the
3259    bounds of ``streets.shp``.
3260
3261    >>> path = libpysal.examples.get_path("streets.shp")
3262    >>> shp = libpysal.io.open(path)
3263    >>> lattice = spaghetti.regular_lattice(shp.bbox, 5, nv=7, exterior=True)
3264    >>> lattice[0].vertices
3265    [(723414.3683108028, 875929.0396895551), (724286.1381211297, 875929.0396895551)]
3266
3267    """
3268
3269    # check for bounds validity
3270    if len(bounds) != 4:
3271        bounds_len = len(bounds)
3272        msg = "The 'bounds' parameter is %s elements " % bounds_len
3273        msg += "but should be exactly 4 - <minx,miny,maxx,maxy>."
3274        raise RuntimeError(msg)
3275
3276    # check for bounds validity
3277    if not nv:
3278        nv = nh
3279    try:
3280        nh, nv = int(nh), int(nv)
3281    except TypeError:
3282        nlines_types = type(nh), type(nv)
3283        msg = "The 'nh' and 'nv' parameters (%s, %s) " % nlines_types
3284        msg += "could not be converted to integers."
3285        raise TypeError(msg)
3286
3287    # bounding box line lengths
3288    len_h, len_v = bounds[2] - bounds[0], bounds[3] - bounds[1]
3289
3290    # horizontal and vertical increments
3291    incr_h, incr_v = len_h / float(nh + 1), len_v / float(nv + 1)
3292
3293    # define the horizontal and vertical space
3294    space_h = [incr_h * slot for slot in range(nv + 2)]
3295    space_v = [incr_v * slot for slot in range(nh + 2)]
3296
3297    # create vertical and horizontal lines
3298    lines_h = util.build_chains(space_h, space_v, exterior, bounds)
3299    lines_v = util.build_chains(space_h, space_v, exterior, bounds, h=False)
3300
3301    # combine into one list
3302    lattice = lines_h + lines_v
3303
3304    return lattice
3305
3306
3307class PointPattern:
3308    """A stub point pattern class used to store a point pattern.
3309
3310    Note from the original author of ``pysal.network``:
3311    This class is monkey patched with network specific attributes when the
3312    points are snapped to a network. In the future this class may be
3313    replaced with a generic point pattern class.
3314
3315    Parameters
3316    ----------
3317    in_data : {str, list, tuple, libpysal.cg.Point, geopandas.GeoDataFrame}
3318        The input geographic data. Either (1) a path to a shapefile
3319        (str); (2) an iterable containing ``libpysal.cg.Point``
3320        objects; (3) a single ``libpysal.cg.Point``; or
3321        (4) a ``geopandas.GeoDataFrame``.
3322    idvariable : str
3323        Field in the shapefile to use as an ID variable.
3324    attribute :  bool
3325        A flag to indicate whether all attributes are tagged to this
3326        class (``True``) or excluded (``False``). Default is ``False``.
3327
3328    Attributes
3329    ----------
3330    points : dict
3331        Keys are the point IDs (int). Values are the :math:`(x,y)`
3332        coordinates (tuple).
3333    npoints : int
3334        The number of points.
3335    obs_to_arc : dict
3336        Keys are arc IDs (tuple). Values are snapped point information
3337        (``dict``).  Within the snapped point information (``dict``)
3338        keys are observation IDs (``int``), and values are snapped
3339        coordinates.
3340    obs_to_vertex : list
3341       List of incident network vertices to snapped observation points
3342       converted from a ``default_dict``. Originally in the form of
3343       paired left/right nearest network vertices {netvtx1: obs_id1,
3344       netvtx2: obs_id1, netvtx1: obs_id2... netvtx1: obs_idn}, then
3345       simplified to a list in the form
3346       [netvtx1, netvtx2, netvtx1, netvtx2, ...].
3347    dist_to_vertex : dict
3348        Keys are observations IDs (``int``). Values are distance lookup
3349        (``dict``). Within distance lookup (``dict``) keys are the two
3350        incident vertices of the arc and values are distance to each of
3351        those arcs.
3352    snapped_coordinates : dict
3353        Keys are the point IDs (int). Values are the snapped :math:`(x,y)`
3354        coordinates (tuple).
3355    snap_dist : bool
3356            Flag as ``True`` to include the distance from the original
3357            location to the snapped location along the network. Default
3358            is ``False``.
3359
3360    """
3361
3362    def __init__(self, in_data=None, idvariable=None, attribute=False):
3363
3364        # initialize points dictionary and counter
3365        self.points = {}
3366        self.npoints = 0
3367
3368        # determine input point data type
3369        in_dtype = str(type(in_data)).split("'")[1]
3370        # flag for points from a shapefile
3371        from_shp = False
3372        # flag for points as libpysal.cg.Point objects
3373        is_libpysal_points = False
3374        supported_iterables = ["list", "tuple"]
3375        # type error message
3376        msg = "'%s' not supported for point pattern instantiation."
3377
3378        # set appropriate geometries
3379        if in_dtype == "str":
3380            from_shp = True
3381        elif in_dtype in supported_iterables:
3382            dtype = str(type(in_data[0])).split("'")[1]
3383            if dtype == "libpysal.cg.shapes.Point":
3384                is_libpysal_points = True
3385            else:
3386                raise TypeError(msg % dtype)
3387        elif in_dtype == "libpysal.cg.shapes.Point":
3388            in_data = [in_data]
3389            is_libpysal_points = True
3390        elif in_dtype == "geopandas.geodataframe.GeoDataFrame":
3391            from_shp = False
3392        else:
3393            raise TypeError(msg % in_dtype)
3394
3395        # either set native point ID from dataset or create new IDs
3396        if idvariable and not is_libpysal_points:
3397            ids = weights.util.get_ids(in_data, idvariable)
3398        else:
3399            ids = None
3400
3401        # extract the point geometries
3402        if not is_libpysal_points:
3403            if from_shp:
3404                pts = open(in_data)
3405            else:
3406                pts_objs = list(in_data.geometry)
3407                pts = [cg.shapes.Point((p.x, p.y)) for p in pts_objs]
3408        else:
3409            pts = in_data
3410
3411        # fetch attributes if requested
3412        if attribute and not is_libpysal_points:
3413
3414            # open the database file if data is from shapefile
3415            if from_shp:
3416                dbname = os.path.splitext(in_data)[0] + ".dbf"
3417                db = open(dbname)
3418
3419            # if data is from a GeoDataFrame, drop the geometry column
3420            # and declare attribute values as a list of lists
3421            else:
3422                db = in_data.drop(in_data.geometry.name, axis=1).values.tolist()
3423                db = [[d] for d in db]
3424        else:
3425            db = None
3426
3427        # iterate over all points
3428        for i, pt in enumerate(pts):
3429
3430            # IDs, attributes
3431            if ids and db is not None:
3432                self.points[ids[i]] = {"coordinates": pt, "properties": db[i]}
3433
3434            # IDs, no attributes
3435            elif ids and db is None:
3436                self.points[ids[i]] = {"coordinates": pt, "properties": None}
3437
3438            # no IDs, attributes
3439            elif not ids and db is not None:
3440                self.points[i] = {"coordinates": pt, "properties": db[i]}
3441
3442            # no IDs, no attributes
3443            else:
3444                self.points[i] = {"coordinates": pt, "properties": None}
3445
3446        # close the shapefile and database file
3447        # if the input data is a .shp
3448        if from_shp:
3449            pts.close()
3450            if db:
3451                db.close()
3452
3453        # record number of points
3454        self.npoints = len(self.points.keys())
3455
3456
3457class SimulatedPointPattern:
3458    """Note from the original author of ``pysal.network``:
3459    Struct style class to mirror the ``PointPattern`` class.
3460    If the ``PointPattern`` class has methods, it might make
3461    sense to make this a child of that class. This class is not intended
3462    to be used by the external user.
3463
3464    Attributes
3465    ----------
3466    npoints : int
3467        The number of points.
3468    obs_to_arc : dict
3469        Keys are arc IDs (tuple). Values are snapped point information
3470        (dict).  Within the snapped point information (dict)
3471        keys are observation IDs (int), and values are snapped
3472        coordinates.
3473    obs_to_vertex : list
3474       List of incident network vertices to snapped observation points
3475       converted from a default_dict. Originally in the form of
3476       paired left/right nearest network vertices {netvtx1: obs_id1,
3477       netvtx2: obs_id1, netvtx1: obs_id2... netvtx1: obs_idn}, then
3478       simplified to a list in the form
3479       [netvtx1, netvtx2, netvtx1, netvtx2, ...].
3480    dist_to_vertex : dict
3481        Keys are observations IDs (int). Values are distance lookup
3482        (dict). Within distance lookup (dict) keys are the two
3483        incident vertices of the arc and values are distance to each of
3484        those arcs.
3485    snapped_coordinates : dict
3486        Keys are the point IDs (int). Values are the snapped :math:`(x,y)`
3487        coordinates (tuple).
3488    snap_dist : bool
3489            Flag as ``True`` to include the distance from the original
3490            location to the snapped location along the network. Default
3491            is ``False``.
3492
3493    """
3494
3495    def __init__(self):
3496
3497        # duplicate post-snapping PointPattern class structure
3498        self.npoints = 0
3499        self.obs_to_arc = {}
3500        self.obs_to_vertex = defaultdict(list)
3501        self.dist_to_vertex = {}
3502        self.snapped_coordinates = {}
3503