1import json
2import warnings
3
4import numpy as np
5import pandas as pd
6from pandas import DataFrame, Series
7from pandas.core.accessor import CachedAccessor
8
9from shapely.geometry import mapping, shape
10from shapely.geometry.base import BaseGeometry
11
12from pyproj import CRS
13
14from geopandas.array import GeometryArray, GeometryDtype, from_shapely, to_wkb, to_wkt
15from geopandas.base import GeoPandasBase, is_geometry_type
16from geopandas.geoseries import GeoSeries
17import geopandas.io
18from geopandas.explore import _explore
19from . import _compat as compat
20from ._decorator import doc
21
22
23DEFAULT_GEO_COLUMN_NAME = "geometry"
24
25
26def _ensure_geometry(data, crs=None):
27    """
28    Ensure the data is of geometry dtype or converted to it.
29
30    If input is a (Geo)Series, output is a GeoSeries, otherwise output
31    is GeometryArray.
32
33    If the input is a GeometryDtype with a set CRS, `crs` is ignored.
34    """
35    if is_geometry_type(data):
36        if isinstance(data, Series):
37            data = GeoSeries(data)
38        if data.crs is None:
39            data.crs = crs
40        return data
41    else:
42        if isinstance(data, Series):
43            out = from_shapely(np.asarray(data), crs=crs)
44            return GeoSeries(out, index=data.index, name=data.name)
45        else:
46            out = from_shapely(data, crs=crs)
47            return out
48
49
50def _crs_mismatch_warning():
51    # TODO: raise error in 0.9 or 0.10.
52    warnings.warn(
53        "CRS mismatch between CRS of the passed geometries "
54        "and 'crs'. Use 'GeoDataFrame.set_crs(crs, "
55        "allow_override=True)' to overwrite CRS or "
56        "'GeoDataFrame.to_crs(crs)' to reproject geometries. "
57        "CRS mismatch will raise an error in the future versions "
58        "of GeoPandas.",
59        FutureWarning,
60        stacklevel=3,
61    )
62
63
64class GeoDataFrame(GeoPandasBase, DataFrame):
65    """
66    A GeoDataFrame object is a pandas.DataFrame that has a column
67    with geometry. In addition to the standard DataFrame constructor arguments,
68    GeoDataFrame also accepts the following keyword arguments:
69
70    Parameters
71    ----------
72    crs : value (optional)
73        Coordinate Reference System of the geometry objects. Can be anything accepted by
74        :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
75        such as an authority string (eg "EPSG:4326") or a WKT string.
76    geometry : str or array (optional)
77        If str, column to use as geometry. If array, will be set as 'geometry'
78        column on GeoDataFrame.
79
80    Examples
81    --------
82    Constructing GeoDataFrame from a dictionary.
83
84    >>> from shapely.geometry import Point
85    >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
86    >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
87    >>> gdf
88        col1                 geometry
89    0  name1  POINT (1.00000 2.00000)
90    1  name2  POINT (2.00000 1.00000)
91
92    Notice that the inferred dtype of 'geometry' columns is geometry.
93
94    >>> gdf.dtypes
95    col1          object
96    geometry    geometry
97    dtype: object
98
99    Constructing GeoDataFrame from a pandas DataFrame with a column of WKT geometries:
100
101    >>> import pandas as pd
102    >>> d = {'col1': ['name1', 'name2'], 'wkt': ['POINT (1 2)', 'POINT (2 1)']}
103    >>> df = pd.DataFrame(d)
104    >>> gs = geopandas.GeoSeries.from_wkt(df['wkt'])
105    >>> gdf = geopandas.GeoDataFrame(df, geometry=gs, crs="EPSG:4326")
106    >>> gdf
107        col1          wkt                 geometry
108    0  name1  POINT (1 2)  POINT (1.00000 2.00000)
109    1  name2  POINT (2 1)  POINT (2.00000 1.00000)
110
111    See also
112    --------
113    GeoSeries : Series object designed to store shapely geometry objects
114    """
115
116    _metadata = ["_crs", "_geometry_column_name"]
117
118    _geometry_column_name = DEFAULT_GEO_COLUMN_NAME
119
120    def __init__(self, data=None, *args, geometry=None, crs=None, **kwargs):
121        with compat.ignore_shapely2_warnings():
122            super().__init__(data, *args, **kwargs)
123
124        # need to set this before calling self['geometry'], because
125        # getitem accesses crs
126        self._crs = CRS.from_user_input(crs) if crs else None
127
128        # set_geometry ensures the geometry data have the proper dtype,
129        # but is not called if `geometry=None` ('geometry' column present
130        # in the data), so therefore need to ensure it here manually
131        # but within a try/except because currently non-geometries are
132        # allowed in that case
133        # TODO do we want to raise / return normal DataFrame in this case?
134
135        # if gdf passed in and geo_col is set, we use that for geometry
136        if geometry is None and isinstance(data, GeoDataFrame):
137            self._geometry_column_name = data._geometry_column_name
138            if crs is not None and data.crs != crs:
139                _crs_mismatch_warning()
140                # TODO: raise error in 0.9 or 0.10.
141            return
142
143        if geometry is None and "geometry" in self.columns:
144            # Check for multiple columns with name "geometry". If there are,
145            # self["geometry"] is a gdf and constructor gets recursively recalled
146            # by pandas internals trying to access this
147            if (self.columns == "geometry").sum() > 1:
148                raise ValueError(
149                    "GeoDataFrame does not support multiple columns "
150                    "using the geometry column name 'geometry'."
151                )
152
153            # only if we have actual geometry values -> call set_geometry
154            index = self.index
155            try:
156                if (
157                    hasattr(self["geometry"].values, "crs")
158                    and self["geometry"].values.crs
159                    and crs
160                    and not self["geometry"].values.crs == crs
161                ):
162                    _crs_mismatch_warning()
163                    # TODO: raise error in 0.9 or 0.10.
164                self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165            except TypeError:
166                pass
167            else:
168                if self.index is not index:
169                    # With pandas < 1.0 and an empty frame (no rows), the index
170                    # gets reset to a default RangeIndex -> set back the original
171                    # index if needed
172                    self.index = index
173                geometry = "geometry"
174
175        if geometry is not None:
176            if (
177                hasattr(geometry, "crs")
178                and geometry.crs
179                and crs
180                and not geometry.crs == crs
181            ):
182                _crs_mismatch_warning()
183                # TODO: raise error in 0.9 or 0.10.
184            self.set_geometry(geometry, inplace=True)
185
186        if geometry is None and crs:
187            warnings.warn(
188                "Assigning CRS to a GeoDataFrame without a geometry column is now "
189                "deprecated and will not be supported in the future.",
190                FutureWarning,
191                stacklevel=2,
192            )
193
194    def __setattr__(self, attr, val):
195        # have to special case geometry b/c pandas tries to use as column...
196        if attr == "geometry":
197            object.__setattr__(self, attr, val)
198        else:
199            super().__setattr__(attr, val)
200
201    def _get_geometry(self):
202        if self._geometry_column_name not in self:
203            raise AttributeError(
204                "No geometry data set yet (expected in"
205                " column '%s'.)" % self._geometry_column_name
206            )
207        return self[self._geometry_column_name]
208
209    def _set_geometry(self, col):
210        if not pd.api.types.is_list_like(col):
211            raise ValueError("Must use a list-like to set the geometry property")
212        self.set_geometry(col, inplace=True)
213
214    geometry = property(
215        fget=_get_geometry, fset=_set_geometry, doc="Geometry data for GeoDataFrame"
216    )
217
218    def set_geometry(self, col, drop=False, inplace=False, crs=None):
219        """
220        Set the GeoDataFrame geometry using either an existing column or
221        the specified input. By default yields a new object.
222
223        The original geometry column is replaced with the input.
224
225        Parameters
226        ----------
227        col : column label or array
228        drop : boolean, default False
229            Delete column to be used as the new geometry
230        inplace : boolean, default False
231            Modify the GeoDataFrame in place (do not create a new object)
232        crs : pyproj.CRS, optional
233            Coordinate system to use. The value can be anything accepted
234            by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
235            such as an authority string (eg "EPSG:4326") or a WKT string.
236            If passed, overrides both DataFrame and col's crs.
237            Otherwise, tries to get crs from passed col values or DataFrame.
238
239        Examples
240        --------
241        >>> from shapely.geometry import Point
242        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
243        >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
244        >>> gdf
245            col1                 geometry
246        0  name1  POINT (1.00000 2.00000)
247        1  name2  POINT (2.00000 1.00000)
248
249        Passing an array:
250
251        >>> df1 = gdf.set_geometry([Point(0,0), Point(1,1)])
252        >>> df1
253            col1                 geometry
254        0  name1  POINT (0.00000 0.00000)
255        1  name2  POINT (1.00000 1.00000)
256
257        Using existing column:
258
259        >>> gdf["buffered"] = gdf.buffer(2)
260        >>> df2 = gdf.set_geometry("buffered")
261        >>> df2.geometry
262        0    POLYGON ((3.00000 2.00000, 2.99037 1.80397, 2....
263        1    POLYGON ((4.00000 1.00000, 3.99037 0.80397, 3....
264        Name: buffered, dtype: geometry
265
266        Returns
267        -------
268        GeoDataFrame
269
270        See also
271        --------
272        GeoDataFrame.rename_geometry : rename an active geometry column
273        """
274        # Most of the code here is taken from DataFrame.set_index()
275        if inplace:
276            frame = self
277        else:
278            frame = self.copy()
279
280        to_remove = None
281        geo_column_name = self._geometry_column_name
282        if isinstance(col, (Series, list, np.ndarray, GeometryArray)):
283            level = col
284        elif hasattr(col, "ndim") and col.ndim != 1:
285            raise ValueError("Must pass array with one dimension only.")
286        else:
287            try:
288                level = frame[col]
289            except KeyError:
290                raise ValueError("Unknown column %s" % col)
291            except Exception:
292                raise
293            if isinstance(level, DataFrame):
294                raise ValueError(
295                    "GeoDataFrame does not support setting the geometry column where "
296                    "the column name is shared by multiple columns."
297                )
298
299            if drop:
300                to_remove = col
301                geo_column_name = self._geometry_column_name
302            else:
303                geo_column_name = col
304
305        if to_remove:
306            del frame[to_remove]
307
308        if not crs:
309            level_crs = getattr(level, "crs", None)
310            crs = level_crs if level_crs is not None else self._crs
311
312        if isinstance(level, (GeoSeries, GeometryArray)) and level.crs != crs:
313            # Avoids caching issues/crs sharing issues
314            level = level.copy()
315            level.crs = crs
316
317        # Check that we are using a listlike of geometries
318        level = _ensure_geometry(level, crs=crs)
319        index = frame.index
320        frame[geo_column_name] = level
321        if frame.index is not index and len(frame.index) == len(index):
322            # With pandas < 1.0 and an empty frame (no rows), the index gets reset
323            # to a default RangeIndex -> set back the original index if needed
324            frame.index = index
325        frame._geometry_column_name = geo_column_name
326        frame.crs = crs
327        if not inplace:
328            return frame
329
330    def rename_geometry(self, col, inplace=False):
331        """
332        Renames the GeoDataFrame geometry column to
333        the specified name. By default yields a new object.
334
335        The original geometry column is replaced with the input.
336
337        Parameters
338        ----------
339        col : new geometry column label
340        inplace : boolean, default False
341            Modify the GeoDataFrame in place (do not create a new object)
342
343        Examples
344        --------
345        >>> from shapely.geometry import Point
346        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
347        >>> df = geopandas.GeoDataFrame(d, crs="EPSG:4326")
348        >>> df1 = df.rename_geometry('geom1')
349        >>> df1.geometry.name
350        'geom1'
351        >>> df.rename_geometry('geom1', inplace=True)
352        >>> df.geometry.name
353        'geom1'
354
355        Returns
356        -------
357        geodataframe : GeoDataFrame
358
359        See also
360        --------
361        GeoDataFrame.set_geometry : set the active geometry
362        """
363        geometry_col = self.geometry.name
364        if col in self.columns:
365            raise ValueError(f"Column named {col} already exists")
366        else:
367            if not inplace:
368                return self.rename(columns={geometry_col: col}).set_geometry(
369                    col, inplace
370                )
371            self.rename(columns={geometry_col: col}, inplace=inplace)
372            self.set_geometry(col, inplace=inplace)
373
374    @property
375    def crs(self):
376        """
377        The Coordinate Reference System (CRS) represented as a ``pyproj.CRS``
378        object.
379
380        Returns None if the CRS is not set, and to set the value it
381        :getter: Returns a ``pyproj.CRS`` or None. When setting, the value
382        can be anything accepted by
383        :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
384        such as an authority string (eg "EPSG:4326") or a WKT string.
385
386        Examples
387        --------
388
389        >>> gdf.crs  # doctest: +SKIP
390        <Geographic 2D CRS: EPSG:4326>
391        Name: WGS 84
392        Axis Info [ellipsoidal]:
393        - Lat[north]: Geodetic latitude (degree)
394        - Lon[east]: Geodetic longitude (degree)
395        Area of Use:
396        - name: World
397        - bounds: (-180.0, -90.0, 180.0, 90.0)
398        Datum: World Geodetic System 1984
399        - Ellipsoid: WGS 84
400        - Prime Meridian: Greenwich
401
402        See also
403        --------
404        GeoDataFrame.set_crs : assign CRS
405        GeoDataFrame.to_crs : re-project to another CRS
406
407        """
408        return self._crs
409
410    @crs.setter
411    def crs(self, value):
412        """Sets the value of the crs"""
413        if self._geometry_column_name not in self:
414            warnings.warn(
415                "Assigning CRS to a GeoDataFrame without a geometry column is now "
416                "deprecated and will not be supported in the future.",
417                FutureWarning,
418                stacklevel=4,
419            )
420            self._crs = None if not value else CRS.from_user_input(value)
421        else:
422            if hasattr(self.geometry.values, "crs"):
423                self.geometry.values.crs = value
424                self._crs = self.geometry.values.crs
425            else:
426                # column called 'geometry' without geometry
427                self._crs = None if not value else CRS.from_user_input(value)
428
429    def __setstate__(self, state):
430        # overriding DataFrame method for compat with older pickles (CRS handling)
431        if isinstance(state, dict):
432            if "_metadata" in state and "crs" in state["_metadata"]:
433                metadata = state["_metadata"]
434                metadata[metadata.index("crs")] = "_crs"
435            if "crs" in state and "_crs" not in state:
436                crs = state.pop("crs")
437                state["_crs"] = CRS.from_user_input(crs) if crs is not None else crs
438
439        super().__setstate__(state)
440
441        # for some versions that didn't yet have CRS at array level -> crs is set
442        # at GeoDataFrame level with '_crs' (and not 'crs'), so without propagating
443        # to the GeoSeries/GeometryArray
444        try:
445            if self.crs is not None:
446                if self.geometry.values.crs is None:
447                    self.crs = self.crs
448        except Exception:
449            pass
450
451    @classmethod
452    def from_dict(cls, data, geometry=None, crs=None, **kwargs):
453        """
454        Construct GeoDataFrame from dict of array-like or dicts by
455        overriding DataFrame.from_dict method with geometry and crs
456
457        Parameters
458        ----------
459        data : dict
460            Of the form {field : array-like} or {field : dict}.
461        geometry : str or array (optional)
462            If str, column to use as geometry. If array, will be set as 'geometry'
463            column on GeoDataFrame.
464        crs : str or dict (optional)
465            Coordinate reference system to set on the resulting frame.
466        kwargs : key-word arguments
467            These arguments are passed to DataFrame.from_dict
468
469        Returns
470        -------
471        GeoDataFrame
472
473        """
474        dataframe = super().from_dict(data, **kwargs)
475        return GeoDataFrame(dataframe, geometry=geometry, crs=crs)
476
477    @classmethod
478    def from_file(cls, filename, **kwargs):
479        """Alternate constructor to create a ``GeoDataFrame`` from a file.
480
481        It is recommended to use :func:`geopandas.read_file` instead.
482
483        Can load a ``GeoDataFrame`` from a file in any format recognized by
484        `fiona`. See http://fiona.readthedocs.io/en/latest/manual.html for details.
485
486        Parameters
487        ----------
488        filename : str
489            File path or file handle to read from. Depending on which kwargs
490            are included, the content of filename may vary. See
491            http://fiona.readthedocs.io/en/latest/README.html#usage for usage details.
492        kwargs : key-word arguments
493            These arguments are passed to fiona.open, and can be used to
494            access multi-layer data, data stored within archives (zip files),
495            etc.
496
497        Examples
498        --------
499
500        >>> path = geopandas.datasets.get_path('nybb')
501        >>> gdf = geopandas.GeoDataFrame.from_file(path)
502        >>> gdf  # doctest: +SKIP
503           BoroCode       BoroName     Shape_Leng    Shape_Area                 \
504                          geometry
505        0         5  Staten Island  330470.010332  1.623820e+09  MULTIPOLYGON ((\
506(970217.022 145643.332, 970227....
507        1         4         Queens  896344.047763  3.045213e+09  MULTIPOLYGON ((\
508(1029606.077 156073.814, 102957...
509        2         3       Brooklyn  741080.523166  1.937479e+09  MULTIPOLYGON ((\
510(1021176.479 151374.797, 102100...
511        3         1      Manhattan  359299.096471  6.364715e+08  MULTIPOLYGON ((\
512(981219.056 188655.316, 980940....
513        4         2          Bronx  464392.991824  1.186925e+09  MULTIPOLYGON ((\
514(1012821.806 229228.265, 101278...
515
516        The recommended method of reading files is :func:`geopandas.read_file`:
517
518        >>> gdf = geopandas.read_file(path)
519
520        See also
521        --------
522        read_file : read file to GeoDataFame
523        GeoDataFrame.to_file : write GeoDataFrame to file
524
525        """
526        return geopandas.io.file._read_file(filename, **kwargs)
527
528    @classmethod
529    def from_features(cls, features, crs=None, columns=None):
530        """
531        Alternate constructor to create GeoDataFrame from an iterable of
532        features or a feature collection.
533
534        Parameters
535        ----------
536        features
537            - Iterable of features, where each element must be a feature
538              dictionary or implement the __geo_interface__.
539            - Feature collection, where the 'features' key contains an
540              iterable of features.
541            - Object holding a feature collection that implements the
542              ``__geo_interface__``.
543        crs : str or dict (optional)
544            Coordinate reference system to set on the resulting frame.
545        columns : list of column names, optional
546            Optionally specify the column names to include in the output frame.
547            This does not overwrite the property names of the input, but can
548            ensure a consistent output format.
549
550        Returns
551        -------
552        GeoDataFrame
553
554        Notes
555        -----
556        For more information about the ``__geo_interface__``, see
557        https://gist.github.com/sgillies/2217756
558
559        Examples
560        --------
561        >>> feature_coll = {
562        ...     "type": "FeatureCollection",
563        ...     "features": [
564        ...         {
565        ...             "id": "0",
566        ...             "type": "Feature",
567        ...             "properties": {"col1": "name1"},
568        ...             "geometry": {"type": "Point", "coordinates": (1.0, 2.0)},
569        ...             "bbox": (1.0, 2.0, 1.0, 2.0),
570        ...         },
571        ...         {
572        ...             "id": "1",
573        ...             "type": "Feature",
574        ...             "properties": {"col1": "name2"},
575        ...             "geometry": {"type": "Point", "coordinates": (2.0, 1.0)},
576        ...             "bbox": (2.0, 1.0, 2.0, 1.0),
577        ...         },
578        ...     ],
579        ...     "bbox": (1.0, 1.0, 2.0, 2.0),
580        ... }
581        >>> df = geopandas.GeoDataFrame.from_features(feature_coll)
582        >>> df
583                          geometry   col1
584        0  POINT (1.00000 2.00000)  name1
585        1  POINT (2.00000 1.00000)  name2
586
587        """
588        # Handle feature collections
589        if hasattr(features, "__geo_interface__"):
590            fs = features.__geo_interface__
591        else:
592            fs = features
593
594        if isinstance(fs, dict) and fs.get("type") == "FeatureCollection":
595            features_lst = fs["features"]
596        else:
597            features_lst = features
598
599        rows = []
600        for feature in features_lst:
601            # load geometry
602            if hasattr(feature, "__geo_interface__"):
603                feature = feature.__geo_interface__
604            row = {
605                "geometry": shape(feature["geometry"]) if feature["geometry"] else None
606            }
607            # load properties
608            row.update(feature["properties"])
609            rows.append(row)
610        return GeoDataFrame(rows, columns=columns, crs=crs)
611
612    @classmethod
613    def from_postgis(
614        cls,
615        sql,
616        con,
617        geom_col="geom",
618        crs=None,
619        index_col=None,
620        coerce_float=True,
621        parse_dates=None,
622        params=None,
623        chunksize=None,
624    ):
625        """
626        Alternate constructor to create a ``GeoDataFrame`` from a sql query
627        containing a geometry column in WKB representation.
628
629        Parameters
630        ----------
631        sql : string
632        con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
633        geom_col : string, default 'geom'
634            column name to convert to shapely geometries
635        crs : optional
636            Coordinate reference system to use for the returned GeoDataFrame
637        index_col : string or list of strings, optional, default: None
638            Column(s) to set as index(MultiIndex)
639        coerce_float : boolean, default True
640            Attempt to convert values of non-string, non-numeric objects (like
641            decimal.Decimal) to floating point, useful for SQL result sets
642        parse_dates : list or dict, default None
643            - List of column names to parse as dates.
644            - Dict of ``{column_name: format string}`` where format string is
645              strftime compatible in case of parsing string times, or is one of
646              (D, s, ns, ms, us) in case of parsing integer timestamps.
647            - Dict of ``{column_name: arg dict}``, where the arg dict
648              corresponds to the keyword arguments of
649              :func:`pandas.to_datetime`. Especially useful with databases
650              without native Datetime support, such as SQLite.
651        params : list, tuple or dict, optional, default None
652            List of parameters to pass to execute method.
653        chunksize : int, default None
654            If specified, return an iterator where chunksize is the number
655            of rows to include in each chunk.
656
657        Examples
658        --------
659        PostGIS
660
661        >>> from sqlalchemy import create_engine  # doctest: +SKIP
662        >>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydb"
663        >>> con = create_engine(db_connection_url)  # doctest: +SKIP
664        >>> sql = "SELECT geom, highway FROM roads"
665        >>> df = geopandas.GeoDataFrame.from_postgis(sql, con)  # doctest: +SKIP
666
667        SpatiaLite
668
669        >>> sql = "SELECT ST_Binary(geom) AS geom, highway FROM roads"
670        >>> df = geopandas.GeoDataFrame.from_postgis(sql, con)  # doctest: +SKIP
671
672        The recommended method of reading from PostGIS is
673        :func:`geopandas.read_postgis`:
674
675        >>> df = geopandas.read_postgis(sql, con)  # doctest: +SKIP
676
677        See also
678        --------
679        geopandas.read_postgis : read PostGIS database to GeoDataFrame
680        """
681
682        df = geopandas.io.sql._read_postgis(
683            sql,
684            con,
685            geom_col=geom_col,
686            crs=crs,
687            index_col=index_col,
688            coerce_float=coerce_float,
689            parse_dates=parse_dates,
690            params=params,
691            chunksize=chunksize,
692        )
693
694        return df
695
696    def to_json(self, na="null", show_bbox=False, drop_id=False, **kwargs):
697        """
698        Returns a GeoJSON representation of the ``GeoDataFrame`` as a string.
699
700        Parameters
701        ----------
702        na : {'null', 'drop', 'keep'}, default 'null'
703            Indicates how to output missing (NaN) values in the GeoDataFrame.
704            See below.
705        show_bbox : bool, optional, default: False
706            Include bbox (bounds) in the geojson
707        drop_id : bool, default: False
708            Whether to retain the index of the GeoDataFrame as the id property
709            in the generated GeoJSON. Default is False, but may want True
710            if the index is just arbitrary row numbers.
711
712        Notes
713        -----
714        The remaining *kwargs* are passed to json.dumps().
715
716        Missing (NaN) values in the GeoDataFrame can be represented as follows:
717
718        - ``null``: output the missing entries as JSON null.
719        - ``drop``: remove the property from the feature. This applies to each
720          feature individually so that features may have different properties.
721        - ``keep``: output the missing entries as NaN.
722
723        Examples
724        --------
725
726        >>> from shapely.geometry import Point
727        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
728        >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
729        >>> gdf
730            col1                 geometry
731        0  name1  POINT (1.00000 2.00000)
732        1  name2  POINT (2.00000 1.00000)
733
734        >>> gdf.to_json()
735        '{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", \
736"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates": [1.0,\
737 2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"}, "geometry"\
738: {"type": "Point", "coordinates": [2.0, 1.0]}}]}'
739
740        Alternatively, you can write GeoJSON to file:
741
742        >>> gdf.to_file(path, driver="GeoJSON")  # doctest: +SKIP
743
744        See also
745        --------
746        GeoDataFrame.to_file : write GeoDataFrame to file
747
748        """
749        return json.dumps(
750            self._to_geo(na=na, show_bbox=show_bbox, drop_id=drop_id), **kwargs
751        )
752
753    @property
754    def __geo_interface__(self):
755        """Returns a ``GeoDataFrame`` as a python feature collection.
756
757        Implements the `geo_interface`. The returned python data structure
758        represents the ``GeoDataFrame`` as a GeoJSON-like
759        ``FeatureCollection``.
760
761        This differs from `_to_geo()` only in that it is a property with
762        default args instead of a method
763
764        Examples
765        --------
766
767        >>> from shapely.geometry import Point
768        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
769        >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
770        >>> gdf
771            col1                 geometry
772        0  name1  POINT (1.00000 2.00000)
773        1  name2  POINT (2.00000 1.00000)
774
775        >>> gdf.__geo_interface__
776        {'type': 'FeatureCollection', 'features': [{'id': '0', 'type': 'Feature', \
777'properties': {'col1': 'name1'}, 'geometry': {'type': 'Point', 'coordinates': (1.0\
778, 2.0)}, 'bbox': (1.0, 2.0, 1.0, 2.0)}, {'id': '1', 'type': 'Feature', 'properties\
779': {'col1': 'name2'}, 'geometry': {'type': 'Point', 'coordinates': (2.0, 1.0)}, 'b\
780box': (2.0, 1.0, 2.0, 1.0)}], 'bbox': (1.0, 1.0, 2.0, 2.0)}
781
782
783        """
784        return self._to_geo(na="null", show_bbox=True, drop_id=False)
785
786    def iterfeatures(self, na="null", show_bbox=False, drop_id=False):
787        """
788        Returns an iterator that yields feature dictionaries that comply with
789        __geo_interface__
790
791        Parameters
792        ----------
793        na : str, optional
794            Options are {'null', 'drop', 'keep'}, default 'null'.
795            Indicates how to output missing (NaN) values in the GeoDataFrame
796
797            - null: output the missing entries as JSON null
798            - drop: remove the property from the feature. This applies to each feature \
799individually so that features may have different properties
800            - keep: output the missing entries as NaN
801
802        show_bbox : bool, optional
803            Include bbox (bounds) in the geojson. Default False.
804        drop_id : bool, default: False
805            Whether to retain the index of the GeoDataFrame as the id property
806            in the generated GeoJSON. Default is False, but may want True
807            if the index is just arbitrary row numbers.
808
809        Examples
810        --------
811
812        >>> from shapely.geometry import Point
813        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
814        >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
815        >>> gdf
816            col1                 geometry
817        0  name1  POINT (1.00000 2.00000)
818        1  name2  POINT (2.00000 1.00000)
819
820        >>> feature = next(gdf.iterfeatures())
821        >>> feature
822        {'id': '0', 'type': 'Feature', 'properties': {'col1': 'name1'}, 'geometry': {\
823'type': 'Point', 'coordinates': (1.0, 2.0)}}
824        """
825        if na not in ["null", "drop", "keep"]:
826            raise ValueError("Unknown na method {0}".format(na))
827
828        if self._geometry_column_name not in self:
829            raise AttributeError(
830                "No geometry data set (expected in"
831                " column '%s')." % self._geometry_column_name
832            )
833
834        ids = np.array(self.index, copy=False)
835        geometries = np.array(self[self._geometry_column_name], copy=False)
836
837        if not self.columns.is_unique:
838            raise ValueError("GeoDataFrame cannot contain duplicated column names.")
839
840        properties_cols = self.columns.difference([self._geometry_column_name])
841
842        if len(properties_cols) > 0:
843            # convert to object to get python scalars.
844            properties = self[properties_cols].astype(object).values
845            if na == "null":
846                properties[pd.isnull(self[properties_cols]).values] = None
847
848            for i, row in enumerate(properties):
849                geom = geometries[i]
850
851                if na == "drop":
852                    properties_items = {
853                        k: v for k, v in zip(properties_cols, row) if not pd.isnull(v)
854                    }
855                else:
856                    properties_items = {k: v for k, v in zip(properties_cols, row)}
857
858                if drop_id:
859                    feature = {}
860                else:
861                    feature = {"id": str(ids[i])}
862
863                feature["type"] = "Feature"
864                feature["properties"] = properties_items
865                feature["geometry"] = mapping(geom) if geom else None
866
867                if show_bbox:
868                    feature["bbox"] = geom.bounds if geom else None
869
870                yield feature
871
872        else:
873            for fid, geom in zip(ids, geometries):
874
875                if drop_id:
876                    feature = {}
877                else:
878                    feature = {"id": str(fid)}
879
880                feature["type"] = "Feature"
881                feature["properties"] = {}
882                feature["geometry"] = mapping(geom) if geom else None
883
884                if show_bbox:
885                    feature["bbox"] = geom.bounds if geom else None
886
887                yield feature
888
889    def _to_geo(self, **kwargs):
890        """
891        Returns a python feature collection (i.e. the geointerface)
892        representation of the GeoDataFrame.
893
894        """
895        geo = {
896            "type": "FeatureCollection",
897            "features": list(self.iterfeatures(**kwargs)),
898        }
899
900        if kwargs.get("show_bbox", False):
901            geo["bbox"] = tuple(self.total_bounds)
902
903        return geo
904
905    def to_wkb(self, hex=False, **kwargs):
906        """
907        Encode all geometry columns in the GeoDataFrame to WKB.
908
909        Parameters
910        ----------
911        hex : bool
912            If true, export the WKB as a hexadecimal string.
913            The default is to return a binary bytes object.
914        kwargs
915            Additional keyword args will be passed to
916            :func:`pygeos.to_wkb` if pygeos is installed.
917
918        Returns
919        -------
920        DataFrame
921            geometry columns are encoded to WKB
922        """
923
924        df = DataFrame(self.copy())
925
926        # Encode all geometry columns to WKB
927        for col in df.columns[df.dtypes == "geometry"]:
928            df[col] = to_wkb(df[col].values, hex=hex, **kwargs)
929
930        return df
931
932    def to_wkt(self, **kwargs):
933        """
934        Encode all geometry columns in the GeoDataFrame to WKT.
935
936        Parameters
937        ----------
938        kwargs
939            Keyword args will be passed to :func:`pygeos.to_wkt`
940            if pygeos is installed.
941
942        Returns
943        -------
944        DataFrame
945            geometry columns are encoded to WKT
946        """
947
948        df = DataFrame(self.copy())
949
950        # Encode all geometry columns to WKT
951        for col in df.columns[df.dtypes == "geometry"]:
952            df[col] = to_wkt(df[col].values, **kwargs)
953
954        return df
955
956    def to_parquet(self, path, index=None, compression="snappy", **kwargs):
957        """Write a GeoDataFrame to the Parquet format.
958
959        Any geometry columns present are serialized to WKB format in the file.
960
961        Requires 'pyarrow'.
962
963        WARNING: this is an initial implementation of Parquet file support and
964        associated metadata.  This is tracking version 0.1.0 of the metadata
965        specification at:
966        https://github.com/geopandas/geo-arrow-spec
967
968        This metadata specification does not yet make stability promises.  As such,
969        we do not yet recommend using this in a production setting unless you are
970        able to rewrite your Parquet files.
971
972        .. versionadded:: 0.8
973
974        Parameters
975        ----------
976        path : str, path object
977        index : bool, default None
978            If ``True``, always include the dataframe's index(es) as columns
979            in the file output.
980            If ``False``, the index(es) will not be written to the file.
981            If ``None``, the index(ex) will be included as columns in the file
982            output except `RangeIndex` which is stored as metadata only.
983        compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
984            Name of the compression to use. Use ``None`` for no compression.
985        kwargs
986            Additional keyword arguments passed to :func:`pyarrow.parquet.write_table`.
987
988        Examples
989        --------
990
991        >>> gdf.to_parquet('data.parquet')  # doctest: +SKIP
992
993        See also
994        --------
995        GeoDataFrame.to_feather : write GeoDataFrame to feather
996        GeoDataFrame.to_file : write GeoDataFrame to file
997        """
998
999        from geopandas.io.arrow import _to_parquet
1000
1001        _to_parquet(self, path, compression=compression, index=index, **kwargs)
1002
1003    def to_feather(self, path, index=None, compression=None, **kwargs):
1004        """Write a GeoDataFrame to the Feather format.
1005
1006        Any geometry columns present are serialized to WKB format in the file.
1007
1008        Requires 'pyarrow' >= 0.17.
1009
1010        WARNING: this is an initial implementation of Feather file support and
1011        associated metadata.  This is tracking version 0.1.0 of the metadata
1012        specification at:
1013        https://github.com/geopandas/geo-arrow-spec
1014
1015        This metadata specification does not yet make stability promises.  As such,
1016        we do not yet recommend using this in a production setting unless you are
1017        able to rewrite your Feather files.
1018
1019        .. versionadded:: 0.8
1020
1021        Parameters
1022        ----------
1023        path : str, path object
1024        index : bool, default None
1025            If ``True``, always include the dataframe's index(es) as columns
1026            in the file output.
1027            If ``False``, the index(es) will not be written to the file.
1028            If ``None``, the index(ex) will be included as columns in the file
1029            output except `RangeIndex` which is stored as metadata only.
1030        compression : {'zstd', 'lz4', 'uncompressed'}, optional
1031            Name of the compression to use. Use ``"uncompressed"`` for no
1032            compression. By default uses LZ4 if available, otherwise uncompressed.
1033        kwargs
1034            Additional keyword arguments passed to to
1035            :func:`pyarrow.feather.write_feather`.
1036
1037        Examples
1038        --------
1039
1040        >>> gdf.to_feather('data.feather')  # doctest: +SKIP
1041
1042        See also
1043        --------
1044        GeoDataFrame.to_parquet : write GeoDataFrame to parquet
1045        GeoDataFrame.to_file : write GeoDataFrame to file
1046        """
1047
1048        from geopandas.io.arrow import _to_feather
1049
1050        _to_feather(self, path, index=index, compression=compression, **kwargs)
1051
1052    def to_file(self, filename, driver=None, schema=None, index=None, **kwargs):
1053        """Write the ``GeoDataFrame`` to a file.
1054
1055        By default, an ESRI shapefile is written, but any OGR data source
1056        supported by Fiona can be written. A dictionary of supported OGR
1057        providers is available via:
1058
1059        >>> import fiona
1060        >>> fiona.supported_drivers  # doctest: +SKIP
1061
1062        Parameters
1063        ----------
1064        filename : string
1065            File path or file handle to write to.
1066        driver : string, default None
1067            The OGR format driver used to write the vector file.
1068            If not specified, it attempts to infer it from the file extension.
1069            If no extension is specified, it saves ESRI Shapefile to a folder.
1070        schema : dict, default: None
1071            If specified, the schema dictionary is passed to Fiona to
1072            better control how the file is written.
1073        index : bool, default None
1074            If True, write index into one or more columns (for MultiIndex).
1075            Default None writes the index into one or more columns only if
1076            the index is named, is a MultiIndex, or has a non-integer data
1077            type. If False, no index is written.
1078
1079            .. versionadded:: 0.7
1080                Previously the index was not written.
1081
1082        Notes
1083        -----
1084        The extra keyword arguments ``**kwargs`` are passed to fiona.open and
1085        can be used to write to multi-layer data, store data within archives
1086        (zip files), etc.
1087
1088        The format drivers will attempt to detect the encoding of your data, but
1089        may fail. In this case, the proper encoding can be specified explicitly
1090        by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
1091
1092        See Also
1093        --------
1094        GeoSeries.to_file
1095        GeoDataFrame.to_postgis : write GeoDataFrame to PostGIS database
1096        GeoDataFrame.to_parquet : write GeoDataFrame to parquet
1097        GeoDataFrame.to_feather : write GeoDataFrame to feather
1098
1099        Examples
1100        --------
1101
1102        >>> gdf.to_file('dataframe.shp')  # doctest: +SKIP
1103
1104        >>> gdf.to_file('dataframe.gpkg', driver='GPKG', layer='name')  # doctest: +SKIP
1105
1106        >>> gdf.to_file('dataframe.geojson', driver='GeoJSON')  # doctest: +SKIP
1107
1108        With selected drivers you can also append to a file with `mode="a"`:
1109
1110        >>> gdf.to_file('dataframe.shp', mode="a")  # doctest: +SKIP
1111        """
1112        from geopandas.io.file import _to_file
1113
1114        _to_file(self, filename, driver, schema, index, **kwargs)
1115
1116    def set_crs(self, crs=None, epsg=None, inplace=False, allow_override=False):
1117        """
1118        Set the Coordinate Reference System (CRS) of the ``GeoDataFrame``.
1119
1120        If there are multiple geometry columns within the GeoDataFrame, only
1121        the CRS of the active geometry column is set.
1122
1123        NOTE: The underlying geometries are not transformed to this CRS. To
1124        transform the geometries to a new CRS, use the ``to_crs`` method.
1125
1126        Parameters
1127        ----------
1128        crs : pyproj.CRS, optional if `epsg` is specified
1129            The value can be anything accepted
1130            by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
1131            such as an authority string (eg "EPSG:4326") or a WKT string.
1132        epsg : int, optional if `crs` is specified
1133            EPSG code specifying the projection.
1134        inplace : bool, default False
1135            If True, the CRS of the GeoDataFrame will be changed in place
1136            (while still returning the result) instead of making a copy of
1137            the GeoDataFrame.
1138        allow_override : bool, default False
1139            If the the GeoDataFrame already has a CRS, allow to replace the
1140            existing CRS, even when both are not equal.
1141
1142        Examples
1143        --------
1144        >>> from shapely.geometry import Point
1145        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
1146        >>> gdf = geopandas.GeoDataFrame(d)
1147        >>> gdf
1148            col1                 geometry
1149        0  name1  POINT (1.00000 2.00000)
1150        1  name2  POINT (2.00000 1.00000)
1151
1152        Setting CRS to a GeoDataFrame without one:
1153
1154        >>> gdf.crs is None
1155        True
1156
1157        >>> gdf = gdf.set_crs('epsg:3857')
1158        >>> gdf.crs  # doctest: +SKIP
1159        <Projected CRS: EPSG:3857>
1160        Name: WGS 84 / Pseudo-Mercator
1161        Axis Info [cartesian]:
1162        - X[east]: Easting (metre)
1163        - Y[north]: Northing (metre)
1164        Area of Use:
1165        - name: World - 85°S to 85°N
1166        - bounds: (-180.0, -85.06, 180.0, 85.06)
1167        Coordinate Operation:
1168        - name: Popular Visualisation Pseudo-Mercator
1169        - method: Popular Visualisation Pseudo Mercator
1170        Datum: World Geodetic System 1984
1171        - Ellipsoid: WGS 84
1172        - Prime Meridian: Greenwich
1173
1174        Overriding existing CRS:
1175
1176        >>> gdf = gdf.set_crs(4326, allow_override=True)
1177
1178        Without ``allow_override=True``, ``set_crs`` returns an error if you try to
1179        override CRS.
1180
1181        See also
1182        --------
1183        GeoDataFrame.to_crs : re-project to another CRS
1184
1185        """
1186        if not inplace:
1187            df = self.copy()
1188        else:
1189            df = self
1190        df.geometry = df.geometry.set_crs(
1191            crs=crs, epsg=epsg, allow_override=allow_override, inplace=True
1192        )
1193        return df
1194
1195    def to_crs(self, crs=None, epsg=None, inplace=False):
1196        """Transform geometries to a new coordinate reference system.
1197
1198        Transform all geometries in an active geometry column to a different coordinate
1199        reference system.  The ``crs`` attribute on the current GeoSeries must
1200        be set.  Either ``crs`` or ``epsg`` may be specified for output.
1201
1202        This method will transform all points in all objects. It has no notion
1203        or projecting entire geometries.  All segments joining points are
1204        assumed to be lines in the current projection, not geodesics. Objects
1205        crossing the dateline (or other projection boundary) will have
1206        undesirable behavior.
1207
1208        Parameters
1209        ----------
1210        crs : pyproj.CRS, optional if `epsg` is specified
1211            The value can be anything accepted by
1212            :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
1213            such as an authority string (eg "EPSG:4326") or a WKT string.
1214        epsg : int, optional if `crs` is specified
1215            EPSG code specifying output projection.
1216        inplace : bool, optional, default: False
1217            Whether to return a new GeoDataFrame or do the transformation in
1218            place.
1219
1220        Returns
1221        -------
1222        GeoDataFrame
1223
1224        Examples
1225        --------
1226        >>> from shapely.geometry import Point
1227        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}
1228        >>> gdf = geopandas.GeoDataFrame(d, crs=4326)
1229        >>> gdf
1230            col1                 geometry
1231        0  name1  POINT (1.00000 2.00000)
1232        1  name2  POINT (2.00000 1.00000)
1233        >>> gdf.crs  # doctest: +SKIP
1234        <Geographic 2D CRS: EPSG:4326>
1235        Name: WGS 84
1236        Axis Info [ellipsoidal]:
1237        - Lat[north]: Geodetic latitude (degree)
1238        - Lon[east]: Geodetic longitude (degree)
1239        Area of Use:
1240        - name: World
1241        - bounds: (-180.0, -90.0, 180.0, 90.0)
1242        Datum: World Geodetic System 1984
1243        - Ellipsoid: WGS 84
1244        - Prime Meridian: Greenwich
1245
1246        >>> gdf = gdf.to_crs(3857)
1247        >>> gdf
1248            col1                       geometry
1249        0  name1  POINT (111319.491 222684.209)
1250        1  name2  POINT (222638.982 111325.143)
1251        >>> gdf.crs  # doctest: +SKIP
1252        <Projected CRS: EPSG:3857>
1253        Name: WGS 84 / Pseudo-Mercator
1254        Axis Info [cartesian]:
1255        - X[east]: Easting (metre)
1256        - Y[north]: Northing (metre)
1257        Area of Use:
1258        - name: World - 85°S to 85°N
1259        - bounds: (-180.0, -85.06, 180.0, 85.06)
1260        Coordinate Operation:
1261        - name: Popular Visualisation Pseudo-Mercator
1262        - method: Popular Visualisation Pseudo Mercator
1263        Datum: World Geodetic System 1984
1264        - Ellipsoid: WGS 84
1265        - Prime Meridian: Greenwich
1266
1267        See also
1268        --------
1269        GeoDataFrame.set_crs : assign CRS without re-projection
1270        """
1271        if inplace:
1272            df = self
1273        else:
1274            df = self.copy()
1275        geom = df.geometry.to_crs(crs=crs, epsg=epsg)
1276        df.geometry = geom
1277        df.crs = geom.crs
1278        if not inplace:
1279            return df
1280
1281    def estimate_utm_crs(self, datum_name="WGS 84"):
1282        """Returns the estimated UTM CRS based on the bounds of the dataset.
1283
1284        .. versionadded:: 0.9
1285
1286        .. note:: Requires pyproj 3+
1287
1288        Parameters
1289        ----------
1290        datum_name : str, optional
1291            The name of the datum to use in the query. Default is WGS 84.
1292
1293        Returns
1294        -------
1295        pyproj.CRS
1296
1297        Examples
1298        --------
1299        >>> world = geopandas.read_file(
1300        ...     geopandas.datasets.get_path("naturalearth_lowres")
1301        ... )
1302        >>> germany = world.loc[world.name == "Germany"]
1303        >>> germany.estimate_utm_crs()  # doctest: +SKIP
1304        <Projected CRS: EPSG:32632>
1305        Name: WGS 84 / UTM zone 32N
1306        Axis Info [cartesian]:
1307        - E[east]: Easting (metre)
1308        - N[north]: Northing (metre)
1309        Area of Use:
1310        - name: World - N hemisphere - 6°E to 12°E - by country
1311        - bounds: (6.0, 0.0, 12.0, 84.0)
1312        Coordinate Operation:
1313        - name: UTM zone 32N
1314        - method: Transverse Mercator
1315        Datum: World Geodetic System 1984
1316        - Ellipsoid: WGS 84
1317        - Prime Meridian: Greenwich
1318        """
1319        return self.geometry.estimate_utm_crs(datum_name=datum_name)
1320
1321    def __getitem__(self, key):
1322        """
1323        If the result is a column containing only 'geometry', return a
1324        GeoSeries. If it's a DataFrame with a 'geometry' column, return a
1325        GeoDataFrame.
1326        """
1327        result = super().__getitem__(key)
1328        geo_col = self._geometry_column_name
1329        if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
1330            result.__class__ = GeoSeries
1331        elif isinstance(result, DataFrame) and geo_col in result:
1332            result.__class__ = GeoDataFrame
1333            result._geometry_column_name = geo_col
1334        elif isinstance(result, DataFrame) and geo_col not in result:
1335            result.__class__ = DataFrame
1336        return result
1337
1338    def __setitem__(self, key, value):
1339        """
1340        Overwritten to preserve CRS of GeometryArray in cases like
1341        df['geometry'] = [geom... for geom in df.geometry]
1342        """
1343        if not pd.api.types.is_list_like(key) and key == self._geometry_column_name:
1344            if pd.api.types.is_scalar(value) or isinstance(value, BaseGeometry):
1345                value = [value] * self.shape[0]
1346            try:
1347                value = _ensure_geometry(value, crs=self.crs)
1348                self._crs = value.crs
1349            except TypeError:
1350                warnings.warn("Geometry column does not contain geometry.")
1351        super().__setitem__(key, value)
1352
1353    #
1354    # Implement pandas methods
1355    #
1356
1357    def merge(self, *args, **kwargs):
1358        r"""Merge two ``GeoDataFrame`` objects with a database-style join.
1359
1360        Returns a ``GeoDataFrame`` if a geometry column is present; otherwise,
1361        returns a pandas ``DataFrame``.
1362
1363        Returns
1364        -------
1365        GeoDataFrame or DataFrame
1366
1367        Notes
1368        -----
1369        The extra arguments ``*args`` and keyword arguments ``**kwargs`` are
1370        passed to DataFrame.merge.
1371
1372        Reference
1373        ---------
1374        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas\
1375        .DataFrame.merge.html
1376
1377        """
1378        result = DataFrame.merge(self, *args, **kwargs)
1379        geo_col = self._geometry_column_name
1380        if isinstance(result, DataFrame) and geo_col in result:
1381            result.__class__ = GeoDataFrame
1382            result.crs = self.crs
1383            result._geometry_column_name = geo_col
1384        elif isinstance(result, DataFrame) and geo_col not in result:
1385            result.__class__ = DataFrame
1386        return result
1387
1388    @doc(pd.DataFrame)
1389    def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwargs):
1390        result = super().apply(
1391            func, axis=axis, raw=raw, result_type=result_type, args=args, **kwargs
1392        )
1393        if (
1394            isinstance(result, GeoDataFrame)
1395            and self._geometry_column_name in result.columns
1396            and isinstance(result[self._geometry_column_name].dtype, GeometryDtype)
1397        ):
1398            # apply calls _constructor which resets geom col name to geometry
1399            result._geometry_column_name = self._geometry_column_name
1400            if self.crs is not None and result.crs is None:
1401                result.set_crs(self.crs, inplace=True)
1402        return result
1403
1404    @property
1405    def _constructor(self):
1406        return GeoDataFrame
1407
1408    def __finalize__(self, other, method=None, **kwargs):
1409        """propagate metadata from other to self"""
1410        self = super().__finalize__(other, method=method, **kwargs)
1411
1412        # merge operation: using metadata of the left object
1413        if method == "merge":
1414            for name in self._metadata:
1415                object.__setattr__(self, name, getattr(other.left, name, None))
1416        # concat operation: using metadata of the first object
1417        elif method == "concat":
1418            for name in self._metadata:
1419                object.__setattr__(self, name, getattr(other.objs[0], name, None))
1420
1421            if (self.columns == self._geometry_column_name).sum() > 1:
1422                raise ValueError(
1423                    "Concat operation has resulted in multiple columns using "
1424                    f"the geometry column name '{self._geometry_column_name}'.\n"
1425                    f"Please ensure this column from the first DataFrame is not "
1426                    f"repeated."
1427                )
1428        return self
1429
1430    def dissolve(
1431        self,
1432        by=None,
1433        aggfunc="first",
1434        as_index=True,
1435        level=None,
1436        sort=True,
1437        observed=False,
1438        dropna=True,
1439    ):
1440        """
1441        Dissolve geometries within `groupby` into single observation.
1442        This is accomplished by applying the `unary_union` method
1443        to all geometries within a groupself.
1444
1445        Observations associated with each `groupby` group will be aggregated
1446        using the `aggfunc`.
1447
1448        Parameters
1449        ----------
1450        by : string, default None
1451            Column whose values define groups to be dissolved. If None,
1452            whole GeoDataFrame is considered a single group.
1453        aggfunc : function or string, default "first"
1454            Aggregation function for manipulation of data associated
1455            with each group. Passed to pandas `groupby.agg` method.
1456        as_index : boolean, default True
1457            If true, groupby columns become index of result.
1458        level : int or str or sequence of int or sequence of str, default None
1459            If the axis is a MultiIndex (hierarchical), group by a
1460            particular level or levels.
1461
1462            .. versionadded:: 0.9.0
1463        sort : bool, default True
1464            Sort group keys. Get better performance by turning this off.
1465            Note this does not influence the order of observations within
1466            each group. Groupby preserves the order of rows within each group.
1467
1468            .. versionadded:: 0.9.0
1469        observed : bool, default False
1470            This only applies if any of the groupers are Categoricals.
1471            If True: only show observed values for categorical groupers.
1472            If False: show all values for categorical groupers.
1473
1474            .. versionadded:: 0.9.0
1475        dropna : bool, default True
1476            If True, and if group keys contain NA values, NA values
1477            together with row/column will be dropped. If False, NA
1478            values will also be treated as the key in groups.
1479
1480            This parameter is not supported for pandas < 1.1.0.
1481            A warning will be emitted for earlier pandas versions
1482            if a non-default value is given for this parameter.
1483
1484            .. versionadded:: 0.9.0
1485
1486        Returns
1487        -------
1488        GeoDataFrame
1489
1490        Examples
1491        --------
1492        >>> from shapely.geometry import Point
1493        >>> d = {
1494        ...     "col1": ["name1", "name2", "name1"],
1495        ...     "geometry": [Point(1, 2), Point(2, 1), Point(0, 1)],
1496        ... }
1497        >>> gdf = geopandas.GeoDataFrame(d, crs=4326)
1498        >>> gdf
1499            col1                 geometry
1500        0  name1  POINT (1.00000 2.00000)
1501        1  name2  POINT (2.00000 1.00000)
1502        2  name1  POINT (0.00000 1.00000)
1503
1504        >>> dissolved = gdf.dissolve('col1')
1505        >>> dissolved  # doctest: +SKIP
1506                                                    geometry
1507        col1
1508        name1  MULTIPOINT (0.00000 1.00000, 1.00000 2.00000)
1509        name2                        POINT (2.00000 1.00000)
1510
1511        See also
1512        --------
1513        GeoDataFrame.explode : explode muti-part geometries into single geometries
1514
1515        """
1516
1517        if by is None and level is None:
1518            by = np.zeros(len(self), dtype="int64")
1519
1520        groupby_kwargs = dict(
1521            by=by, level=level, sort=sort, observed=observed, dropna=dropna
1522        )
1523        if not compat.PANDAS_GE_11:
1524            groupby_kwargs.pop("dropna")
1525
1526            if not dropna:  # If they passed a non-default dropna value
1527                warnings.warn("dropna kwarg is not supported for pandas < 1.1.0")
1528
1529        # Process non-spatial component
1530        data = self.drop(labels=self.geometry.name, axis=1)
1531        aggregated_data = data.groupby(**groupby_kwargs).agg(aggfunc)
1532
1533        # Process spatial component
1534        def merge_geometries(block):
1535            merged_geom = block.unary_union
1536            return merged_geom
1537
1538        g = self.groupby(group_keys=False, **groupby_kwargs)[self.geometry.name].agg(
1539            merge_geometries
1540        )
1541
1542        # Aggregate
1543        aggregated_geometry = GeoDataFrame(g, geometry=self.geometry.name, crs=self.crs)
1544        # Recombine
1545        aggregated = aggregated_geometry.join(aggregated_data)
1546
1547        # Reset if requested
1548        if not as_index:
1549            aggregated = aggregated.reset_index()
1550
1551        return aggregated
1552
1553    # overrides the pandas native explode method to break up features geometrically
1554    def explode(self, column=None, ignore_index=False, index_parts=None, **kwargs):
1555        """
1556        Explode muti-part geometries into multiple single geometries.
1557
1558        Each row containing a multi-part geometry will be split into
1559        multiple rows with single geometries, thereby increasing the vertical
1560        size of the GeoDataFrame.
1561
1562        .. note:: ignore_index requires pandas 1.1.0 or newer.
1563
1564        Parameters
1565        ----------
1566        column : string, default None
1567            Column to explode. In the case of a geometry column, multi-part
1568            geometries are converted to single-part.
1569            If None, the active geometry column is used.
1570        ignore_index : bool, default False
1571            If True, the resulting index will be labelled 0, 1, …, n - 1,
1572            ignoring `index_parts`.
1573        index_parts : boolean, default True
1574            If True, the resulting index will be a multi-index (original
1575            index with an additional level indicating the multiple
1576            geometries: a new zero-based index for each single part geometry
1577            per multi-part geometry).
1578
1579        Returns
1580        -------
1581        GeoDataFrame
1582            Exploded geodataframe with each single geometry
1583            as a separate entry in the geodataframe.
1584
1585        Examples
1586        --------
1587
1588        >>> from shapely.geometry import MultiPoint
1589        >>> d = {
1590        ...     "col1": ["name1", "name2"],
1591        ...     "geometry": [
1592        ...         MultiPoint([(1, 2), (3, 4)]),
1593        ...         MultiPoint([(2, 1), (0, 0)]),
1594        ...     ],
1595        ... }
1596        >>> gdf = geopandas.GeoDataFrame(d, crs=4326)
1597        >>> gdf
1598            col1                                       geometry
1599        0  name1  MULTIPOINT (1.00000 2.00000, 3.00000 4.00000)
1600        1  name2  MULTIPOINT (2.00000 1.00000, 0.00000 0.00000)
1601
1602        >>> exploded = gdf.explode(index_parts=True)
1603        >>> exploded
1604              col1                 geometry
1605        0 0  name1  POINT (1.00000 2.00000)
1606          1  name1  POINT (3.00000 4.00000)
1607        1 0  name2  POINT (2.00000 1.00000)
1608          1  name2  POINT (0.00000 0.00000)
1609
1610        >>> exploded = gdf.explode(index_parts=False)
1611        >>> exploded
1612            col1                 geometry
1613        0  name1  POINT (1.00000 2.00000)
1614        0  name1  POINT (3.00000 4.00000)
1615        1  name2  POINT (2.00000 1.00000)
1616        1  name2  POINT (0.00000 0.00000)
1617
1618        >>> exploded = gdf.explode(ignore_index=True)
1619        >>> exploded
1620            col1                 geometry
1621        0  name1  POINT (1.00000 2.00000)
1622        1  name1  POINT (3.00000 4.00000)
1623        2  name2  POINT (2.00000 1.00000)
1624        3  name2  POINT (0.00000 0.00000)
1625
1626        See also
1627        --------
1628        GeoDataFrame.dissolve : dissolve geometries into a single observation.
1629
1630        """
1631
1632        # If no column is specified then default to the active geometry column
1633        if column is None:
1634            column = self.geometry.name
1635        # If the specified column is not a geometry dtype use pandas explode
1636        if not isinstance(self[column].dtype, GeometryDtype):
1637            if compat.PANDAS_GE_11:
1638                return super().explode(column, ignore_index=ignore_index, **kwargs)
1639            else:
1640                return super().explode(column, **kwargs)
1641
1642        if index_parts is None:
1643            if not ignore_index:
1644                warnings.warn(
1645                    "Currently, index_parts defaults to True, but in the future, "
1646                    "it will default to False to be consistent with Pandas. "
1647                    "Use `index_parts=True` to keep the current behavior and "
1648                    "True/False to silence the warning.",
1649                    FutureWarning,
1650                    stacklevel=2,
1651                )
1652            index_parts = True
1653
1654        df_copy = self.copy()
1655
1656        level_str = f"level_{df_copy.index.nlevels}"
1657
1658        if level_str in df_copy.columns:  # GH1393
1659            df_copy = df_copy.rename(columns={level_str: f"__{level_str}"})
1660
1661        if index_parts:
1662            exploded_geom = df_copy.geometry.explode(index_parts=True)
1663            exploded_index = exploded_geom.index
1664            exploded_geom = exploded_geom.reset_index(level=-1, drop=True)
1665        else:
1666            exploded_geom = df_copy.geometry.explode(index_parts=True).reset_index(
1667                level=-1, drop=True
1668            )
1669            exploded_index = exploded_geom.index
1670
1671        df = (
1672            df_copy.drop(df_copy._geometry_column_name, axis=1)
1673            .join(exploded_geom)
1674            .__finalize__(self)
1675        )
1676
1677        if ignore_index:
1678            df.reset_index(inplace=True, drop=True)
1679        elif index_parts:
1680            # reset to MultiIndex, otherwise df index is only first level of
1681            # exploded GeoSeries index.
1682            df.set_index(exploded_index, inplace=True)
1683            df.index.names = list(self.index.names) + [None]
1684        else:
1685            df.set_index(exploded_index, inplace=True)
1686            df.index.names = self.index.names
1687
1688        if f"__{level_str}" in df.columns:
1689            df = df.rename(columns={f"__{level_str}": level_str})
1690
1691        geo_df = df.set_geometry(self._geometry_column_name)
1692        return geo_df
1693
1694    # overrides the pandas astype method to ensure the correct return type
1695    def astype(self, dtype, copy=True, errors="raise", **kwargs):
1696        """
1697        Cast a pandas object to a specified dtype ``dtype``.
1698
1699        Returns a GeoDataFrame when the geometry column is kept as geometries,
1700        otherwise returns a pandas DataFrame.
1701
1702        See the pandas.DataFrame.astype docstring for more details.
1703
1704        Returns
1705        -------
1706        GeoDataFrame or DataFrame
1707        """
1708        df = super().astype(dtype, copy=copy, errors=errors, **kwargs)
1709
1710        try:
1711            geoms = df[self._geometry_column_name]
1712            if is_geometry_type(geoms):
1713                return geopandas.GeoDataFrame(df, geometry=self._geometry_column_name)
1714        except KeyError:
1715            pass
1716        # if the geometry column is converted to non-geometries or did not exist
1717        # do not return a GeoDataFrame
1718        return pd.DataFrame(df)
1719
1720    def convert_dtypes(self, *args, **kwargs):
1721        """
1722        Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
1723
1724        Always returns a GeoDataFrame as no conversions are applied to the
1725        geometry column.
1726
1727        See the pandas.DataFrame.convert_dtypes docstring for more details.
1728
1729        Returns
1730        -------
1731        GeoDataFrame
1732
1733        """
1734        # Overridden to fix GH1870, that return type is not preserved always
1735        # (and where it was, geometry col was not)
1736
1737        if not compat.PANDAS_GE_10:
1738            raise NotImplementedError(
1739                "GeoDataFrame.convert_dtypes requires pandas >= 1.0"
1740            )
1741
1742        return GeoDataFrame(
1743            super().convert_dtypes(*args, **kwargs),
1744            geometry=self.geometry.name,
1745            crs=self.crs,
1746        )
1747
1748    def to_postgis(
1749        self,
1750        name,
1751        con,
1752        schema=None,
1753        if_exists="fail",
1754        index=False,
1755        index_label=None,
1756        chunksize=None,
1757        dtype=None,
1758    ):
1759        """
1760        Upload GeoDataFrame into PostGIS database.
1761
1762        This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
1763        Python driver (e.g. psycopg2) to be installed.
1764
1765        Parameters
1766        ----------
1767        name : str
1768            Name of the target table.
1769        con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
1770            Active connection to the PostGIS database.
1771        if_exists : {'fail', 'replace', 'append'}, default 'fail'
1772            How to behave if the table already exists:
1773
1774            - fail: Raise a ValueError.
1775            - replace: Drop the table before inserting new values.
1776            - append: Insert new values to the existing table.
1777        schema : string, optional
1778            Specify the schema. If None, use default schema: 'public'.
1779        index : bool, default True
1780            Write DataFrame index as a column.
1781            Uses *index_label* as the column name in the table.
1782        index_label : string or sequence, default None
1783            Column label for index column(s).
1784            If None is given (default) and index is True,
1785            then the index names are used.
1786        chunksize : int, optional
1787            Rows will be written in batches of this size at a time.
1788            By default, all rows will be written at once.
1789        dtype : dict of column name to SQL type, default None
1790            Specifying the datatype for columns.
1791            The keys should be the column names and the values
1792            should be the SQLAlchemy types.
1793
1794        Examples
1795        --------
1796
1797        >>> from sqlalchemy import create_engine
1798        >>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\
1799/mydatabase")  # doctest: +SKIP
1800        >>> gdf.to_postgis("my_table", engine)  # doctest: +SKIP
1801
1802        See also
1803        --------
1804        GeoDataFrame.to_file : write GeoDataFrame to file
1805        read_postgis : read PostGIS database to GeoDataFrame
1806
1807        """
1808        geopandas.io.sql._write_postgis(
1809            self, name, con, schema, if_exists, index, index_label, chunksize, dtype
1810        )
1811
1812        #
1813        # Implement standard operators for GeoSeries
1814        #
1815
1816    def __xor__(self, other):
1817        """Implement ^ operator as for builtin set type"""
1818        warnings.warn(
1819            "'^' operator will be deprecated. Use the 'symmetric_difference' "
1820            "method instead.",
1821            DeprecationWarning,
1822            stacklevel=2,
1823        )
1824        return self.geometry.symmetric_difference(other)
1825
1826    def __or__(self, other):
1827        """Implement | operator as for builtin set type"""
1828        warnings.warn(
1829            "'|' operator will be deprecated. Use the 'union' method instead.",
1830            DeprecationWarning,
1831            stacklevel=2,
1832        )
1833        return self.geometry.union(other)
1834
1835    def __and__(self, other):
1836        """Implement & operator as for builtin set type"""
1837        warnings.warn(
1838            "'&' operator will be deprecated. Use the 'intersection' method instead.",
1839            DeprecationWarning,
1840            stacklevel=2,
1841        )
1842        return self.geometry.intersection(other)
1843
1844    def __sub__(self, other):
1845        """Implement - operator as for builtin set type"""
1846        warnings.warn(
1847            "'-' operator will be deprecated. Use the 'difference' method instead.",
1848            DeprecationWarning,
1849            stacklevel=2,
1850        )
1851        return self.geometry.difference(other)
1852
1853    plot = CachedAccessor("plot", geopandas.plotting.GeoplotAccessor)
1854
1855    @doc(_explore)
1856    def explore(self, *args, **kwargs):
1857        """Interactive map based on folium/leaflet.js"""
1858        return _explore(self, *args, **kwargs)
1859
1860    def sjoin(self, df, *args, **kwargs):
1861        """Spatial join of two GeoDataFrames.
1862
1863        See the User Guide page :doc:`../../user_guide/mergingdata` for details.
1864
1865        Parameters
1866        ----------
1867        df : GeoDataFrame
1868        how : string, default 'inner'
1869            The type of join:
1870
1871            * 'left': use keys from left_df; retain only left_df geometry column
1872            * 'right': use keys from right_df; retain only right_df geometry column
1873            * 'inner': use intersection of keys from both dfs; retain only
1874              left_df geometry column
1875
1876        predicate : string, default 'intersects'
1877            Binary predicate. Valid values are determined by the spatial index used.
1878            You can check the valid values in left_df or right_df as
1879            ``left_df.sindex.valid_query_predicates`` or
1880            ``right_df.sindex.valid_query_predicates``
1881        lsuffix : string, default 'left'
1882            Suffix to apply to overlapping column names (left GeoDataFrame).
1883        rsuffix : string, default 'right'
1884            Suffix to apply to overlapping column names (right GeoDataFrame).
1885
1886        Examples
1887        --------
1888        >>> countries = geopandas.read_file( \
1889    geopandas.datasets.get_path("naturalearth_lowres"))
1890        >>> cities = geopandas.read_file( \
1891    geopandas.datasets.get_path("naturalearth_cities"))
1892        >>> countries.head()  # doctest: +SKIP
1893            pop_est      continent                      name \
1894    iso_a3  gdp_md_est                                           geometry
1895        0     920938        Oceania                      Fiji    FJI      8374.0 \
1896    MULTIPOLYGON (((180.00000 -16.06713, 180.00000...
1897        1   53950935         Africa                  Tanzania    TZA    150600.0 \
1898    POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...
1899        2     603253         Africa                 W. Sahara    ESH       906.5 \
1900    POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
1901        3   35623680  North America                    Canada    CAN   1674000.0 \
1902    MULTIPOLYGON (((-122.84000 49.00000, -122.9742...
1903        4  326625791  North America  United States of America    USA  18560000.0 \
1904    MULTIPOLYGON (((-122.84000 49.00000, -120.0000...
1905        >>> cities.head()
1906                name                   geometry
1907        0  Vatican City  POINT (12.45339 41.90328)
1908        1    San Marino  POINT (12.44177 43.93610)
1909        2         Vaduz   POINT (9.51667 47.13372)
1910        3    Luxembourg   POINT (6.13000 49.61166)
1911        4       Palikir  POINT (158.14997 6.91664)
1912
1913        >>> cities_w_country_data = cities.sjoin(countries)
1914        >>> cities_w_country_data.head()  # doctest: +SKIP
1915                name_left                   geometry  index_right   pop_est \
1916    continent name_right iso_a3  gdp_md_est
1917        0    Vatican City  POINT (12.45339 41.90328)          141  62137802 \
1918    Europe    Italy    ITA   2221000.0
1919        1    San Marino  POINT (12.44177 43.93610)          141  62137802 \
1920    Europe    Italy    ITA   2221000.0
1921        192          Rome  POINT (12.48131 41.89790)          141  62137802 \
1922    Europe    Italy    ITA   2221000.0
1923        2           Vaduz   POINT (9.51667 47.13372)          114   8754413 \
1924    Europe    Au    stria    AUT    416600.0
1925        184        Vienna  POINT (16.36469 48.20196)          114   8754413 \
1926    Europe    Austria    AUT    416600.0
1927
1928        Notes
1929        ------
1930        Every operation in GeoPandas is planar, i.e. the potential third
1931        dimension is not taken into account.
1932
1933        See also
1934        --------
1935        GeoDataFrame.sjoin_nearest : nearest neighbor join
1936        sjoin : equivalent top-level function
1937        """
1938        return geopandas.sjoin(left_df=self, right_df=df, *args, **kwargs)
1939
1940    def sjoin_nearest(
1941        self,
1942        right,
1943        how="inner",
1944        max_distance=None,
1945        lsuffix="left",
1946        rsuffix="right",
1947        distance_col=None,
1948    ):
1949        """
1950        Spatial join of two GeoDataFrames based on the distance between their
1951        geometries.
1952
1953        Results will include multiple output records for a single input record
1954        where there are multiple equidistant nearest or intersected neighbors.
1955
1956        See the User Guide page
1957        https://geopandas.readthedocs.io/en/latest/docs/user_guide/mergingdata.html
1958        for more details.
1959
1960
1961        Parameters
1962        ----------
1963        right : GeoDataFrame
1964        how : string, default 'inner'
1965            The type of join:
1966
1967            * 'left': use keys from left_df; retain only left_df geometry column
1968            * 'right': use keys from right_df; retain only right_df geometry column
1969            * 'inner': use intersection of keys from both dfs; retain only
1970              left_df geometry column
1971
1972        max_distance : float, default None
1973            Maximum distance within which to query for nearest geometry.
1974            Must be greater than 0.
1975            The max_distance used to search for nearest items in the tree may have a
1976            significant impact on performance by reducing the number of input
1977            geometries that are evaluated for nearest items in the tree.
1978        lsuffix : string, default 'left'
1979            Suffix to apply to overlapping column names (left GeoDataFrame).
1980        rsuffix : string, default 'right'
1981            Suffix to apply to overlapping column names (right GeoDataFrame).
1982        distance_col : string, default None
1983            If set, save the distances computed between matching geometries under a
1984            column of this name in the joined GeoDataFrame.
1985
1986        Examples
1987        --------
1988        >>> countries = geopandas.read_file(geopandas.datasets.get_\
1989path("naturalearth_lowres"))
1990        >>> cities = geopandas.read_file(geopandas.datasets.get_path("naturalearth_citi\
1991es"))
1992        >>> countries.head(2).name  # doctest: +SKIP
1993            pop_est      continent                      name \
1994    iso_a3  gdp_md_est                                           geometry
1995        0     920938        Oceania                      Fiji    FJI      8374.0  MULTI\
1996    POLYGON (((180.00000 -16.06713, 180.00000...
1997        1   53950935         Africa                  Tanzania    TZA    150600.0  POLYG\
1998    ON ((33.90371 -0.95000, 34.07262 -1.05982...
1999        >>> cities.head(2).name  # doctest: +SKIP
2000                name                   geometry
2001        0  Vatican City  POINT (12.45339 41.90328)
2002        1    San Marino  POINT (12.44177 43.93610)
2003
2004        >>> cities_w_country_data = cities.sjoin_nearest(countries)
2005        >>> cities_w_country_data[['name_left', 'name_right']].head(2)  # doctest: +SKIP
2006                name_left                   geometry  index_right   pop_est continent n\
2007    ame_right iso_a3  gdp_md_est
2008        0    Vatican City  POINT (12.45339 41.90328)          141  62137802    Europe  \
2009        Italy    ITA   2221000.0
2010        1      San Marino  POINT (12.44177 43.93610)          141  62137802    Europe  \
2011        Italy    ITA   2221000.0
2012
2013        To include the distances:
2014
2015        >>> cities_w_country_data = cities.sjoin_nearest(countries, \
2016distance_col="distances")
2017        >>> cities_w_country_data[["name_left", "name_right", \
2018"distances"]].head(2)  # doctest: +SKIP
2019                name_left name_right distances
2020        0    Vatican City      Italy       0.0
2021        1      San Marino      Italy       0.0
2022
2023        In the following example, we get multiple cities for Italy because all results
2024        are equidistant (in this case zero because they intersect).
2025        In fact, we get 3 results in total:
2026
2027        >>> countries_w_city_data = cities.sjoin_nearest(countries, \
2028distance_col="distances", how="right")
2029        >>> italy_results = \
2030countries_w_city_data[countries_w_city_data["name_left"] == "Italy"]
2031        >>> italy_results  # doctest: +SKIP
2032            name_x        name_y
2033        141  Vatican City  Italy
2034        141    San Marino  Italy
2035        141          Rome  Italy
2036
2037        See also
2038        --------
2039        GeoDataFrame.sjoin : binary predicate joins
2040        sjoin_nearest : equivalent top-level function
2041
2042        Notes
2043        -----
2044        Since this join relies on distances, results will be innaccurate
2045        if your geometries are in a geographic CRS.
2046
2047        Every operation in GeoPandas is planar, i.e. the potential third
2048        dimension is not taken into account.
2049        """
2050        return geopandas.sjoin_nearest(
2051            self,
2052            right,
2053            how=how,
2054            max_distance=max_distance,
2055            lsuffix=lsuffix,
2056            rsuffix=rsuffix,
2057            distance_col=distance_col,
2058        )
2059
2060    def clip(self, mask, keep_geom_type=False):
2061        """Clip points, lines, or polygon geometries to the mask extent.
2062
2063        Both layers must be in the same Coordinate Reference System (CRS).
2064        The GeoDataFrame will be clipped to the full extent of the `mask` object.
2065
2066        If there are multiple polygons in mask, data from the GeoDataFrame will be
2067        clipped to the total boundary of all polygons in mask.
2068
2069        Parameters
2070        ----------
2071        mask : GeoDataFrame, GeoSeries, (Multi)Polygon
2072            Polygon vector layer used to clip `gdf`.
2073            The mask's geometry is dissolved into one geometric feature
2074            and intersected with `gdf`.
2075        keep_geom_type : boolean, default False
2076            If True, return only geometries of original type in case of intersection
2077            resulting in multiple geometry types or GeometryCollections.
2078            If False, return all resulting geometries (potentially mixed types).
2079
2080        Returns
2081        -------
2082        GeoDataFrame
2083            Vector data (points, lines, polygons) from `gdf` clipped to
2084            polygon boundary from mask.
2085
2086        See also
2087        --------
2088        clip : equivalent top-level function
2089
2090        Examples
2091        --------
2092        Clip points (global cities) with a polygon (the South American continent):
2093
2094        >>> world = geopandas.read_file(
2095        ...     geopandas.datasets.get_path('naturalearth_lowres'))
2096        >>> south_america = world[world['continent'] == "South America"]
2097        >>> capitals = geopandas.read_file(
2098        ...     geopandas.datasets.get_path('naturalearth_cities'))
2099        >>> capitals.shape
2100        (202, 2)
2101
2102        >>> sa_capitals = capitals.clip(south_america)
2103        >>> sa_capitals.shape
2104        (12, 2)
2105        """
2106        return geopandas.clip(self, mask=mask, keep_geom_type=keep_geom_type)
2107
2108    def overlay(self, right, how="intersection", keep_geom_type=None, make_valid=True):
2109        """Perform spatial overlay between GeoDataFrames.
2110
2111        Currently only supports data GeoDataFrames with uniform geometry types,
2112        i.e. containing only (Multi)Polygons, or only (Multi)Points, or a
2113        combination of (Multi)LineString and LinearRing shapes.
2114        Implements several methods that are all effectively subsets of the union.
2115
2116        See the User Guide page :doc:`../../user_guide/set_operations` for details.
2117
2118        Parameters
2119        ----------
2120        right : GeoDataFrame
2121        how : string
2122            Method of spatial overlay: 'intersection', 'union',
2123            'identity', 'symmetric_difference' or 'difference'.
2124        keep_geom_type : bool
2125            If True, return only geometries of the same geometry type the GeoDataFrame
2126            has, if False, return all resulting geometries. Default is None,
2127            which will set keep_geom_type to True but warn upon dropping
2128            geometries.
2129        make_valid : bool, default True
2130            If True, any invalid input geometries are corrected with a call to
2131            `buffer(0)`, if False, a `ValueError` is raised if any input geometries
2132            are invalid.
2133
2134        Returns
2135        -------
2136        df : GeoDataFrame
2137            GeoDataFrame with new set of polygons and attributes
2138            resulting from the overlay
2139
2140        Examples
2141        --------
2142        >>> from shapely.geometry import Polygon
2143        >>> polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]),
2144        ...                               Polygon([(2,2), (4,2), (4,4), (2,4)])])
2145        >>> polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]),
2146        ...                               Polygon([(3,3), (5,3), (5,5), (3,5)])])
2147        >>> df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1_data':[1,2]})
2148        >>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})
2149
2150        >>> df1.overlay(df2, how='union')
2151        df1_data  df2_data                                           geometry
2152        0       1.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
2153        1       2.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
2154        2       2.0       2.0  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
2155        3       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
2156        4       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...
2157        5       NaN       1.0  MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000...
2158        6       NaN       2.0  POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....
2159
2160        >>> df1.overlay(df2, how='intersection')
2161        df1_data  df2_data                                           geometry
2162        0         1         1  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
2163        1         2         1  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
2164        2         2         2  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
2165
2166        >>> df1.overlay(df2, how='symmetric_difference')
2167        df1_data  df2_data                                           geometry
2168        0       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
2169        1       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...
2170        2       NaN       1.0  MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000...
2171        3       NaN       2.0  POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....
2172
2173        >>> df1.overlay(df2, how='difference')
2174                                                geometry  df1_data
2175        0  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....         1
2176        1  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...         2
2177
2178        >>> df1.overlay(df2, how='identity')
2179        df1_data  df2_data                                           geometry
2180        0       1.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
2181        1       2.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
2182        2       2.0       2.0  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
2183        3       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
2184        4       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...
2185
2186        See also
2187        --------
2188        GeoDataFrame.sjoin : spatial join
2189        overlay : equivalent top-level function
2190
2191        Notes
2192        ------
2193        Every operation in GeoPandas is planar, i.e. the potential third
2194        dimension is not taken into account.
2195        """
2196        return geopandas.overlay(
2197            self, right, how=how, keep_geom_type=keep_geom_type, make_valid=make_valid
2198        )
2199
2200
2201def _dataframe_set_geometry(self, col, drop=False, inplace=False, crs=None):
2202    if inplace:
2203        raise ValueError(
2204            "Can't do inplace setting when converting from DataFrame to GeoDataFrame"
2205        )
2206    gf = GeoDataFrame(self)
2207    # this will copy so that BlockManager gets copied
2208    return gf.set_geometry(col, drop=drop, inplace=False, crs=crs)
2209
2210
2211DataFrame.set_geometry = _dataframe_set_geometry
2212