1import json 2import warnings 3 4import numpy as np 5import pandas as pd 6from pandas import DataFrame, Series 7from pandas.core.accessor import CachedAccessor 8 9from shapely.geometry import mapping, shape 10from shapely.geometry.base import BaseGeometry 11 12from pyproj import CRS 13 14from geopandas.array import GeometryArray, GeometryDtype, from_shapely, to_wkb, to_wkt 15from geopandas.base import GeoPandasBase, is_geometry_type 16from geopandas.geoseries import GeoSeries 17import geopandas.io 18from geopandas.explore import _explore 19from . import _compat as compat 20from ._decorator import doc 21 22 23DEFAULT_GEO_COLUMN_NAME = "geometry" 24 25 26def _ensure_geometry(data, crs=None): 27 """ 28 Ensure the data is of geometry dtype or converted to it. 29 30 If input is a (Geo)Series, output is a GeoSeries, otherwise output 31 is GeometryArray. 32 33 If the input is a GeometryDtype with a set CRS, `crs` is ignored. 34 """ 35 if is_geometry_type(data): 36 if isinstance(data, Series): 37 data = GeoSeries(data) 38 if data.crs is None: 39 data.crs = crs 40 return data 41 else: 42 if isinstance(data, Series): 43 out = from_shapely(np.asarray(data), crs=crs) 44 return GeoSeries(out, index=data.index, name=data.name) 45 else: 46 out = from_shapely(data, crs=crs) 47 return out 48 49 50def _crs_mismatch_warning(): 51 # TODO: raise error in 0.9 or 0.10. 52 warnings.warn( 53 "CRS mismatch between CRS of the passed geometries " 54 "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " 55 "allow_override=True)' to overwrite CRS or " 56 "'GeoDataFrame.to_crs(crs)' to reproject geometries. " 57 "CRS mismatch will raise an error in the future versions " 58 "of GeoPandas.", 59 FutureWarning, 60 stacklevel=3, 61 ) 62 63 64class GeoDataFrame(GeoPandasBase, DataFrame): 65 """ 66 A GeoDataFrame object is a pandas.DataFrame that has a column 67 with geometry. In addition to the standard DataFrame constructor arguments, 68 GeoDataFrame also accepts the following keyword arguments: 69 70 Parameters 71 ---------- 72 crs : value (optional) 73 Coordinate Reference System of the geometry objects. Can be anything accepted by 74 :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, 75 such as an authority string (eg "EPSG:4326") or a WKT string. 76 geometry : str or array (optional) 77 If str, column to use as geometry. If array, will be set as 'geometry' 78 column on GeoDataFrame. 79 80 Examples 81 -------- 82 Constructing GeoDataFrame from a dictionary. 83 84 >>> from shapely.geometry import Point 85 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 86 >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326") 87 >>> gdf 88 col1 geometry 89 0 name1 POINT (1.00000 2.00000) 90 1 name2 POINT (2.00000 1.00000) 91 92 Notice that the inferred dtype of 'geometry' columns is geometry. 93 94 >>> gdf.dtypes 95 col1 object 96 geometry geometry 97 dtype: object 98 99 Constructing GeoDataFrame from a pandas DataFrame with a column of WKT geometries: 100 101 >>> import pandas as pd 102 >>> d = {'col1': ['name1', 'name2'], 'wkt': ['POINT (1 2)', 'POINT (2 1)']} 103 >>> df = pd.DataFrame(d) 104 >>> gs = geopandas.GeoSeries.from_wkt(df['wkt']) 105 >>> gdf = geopandas.GeoDataFrame(df, geometry=gs, crs="EPSG:4326") 106 >>> gdf 107 col1 wkt geometry 108 0 name1 POINT (1 2) POINT (1.00000 2.00000) 109 1 name2 POINT (2 1) POINT (2.00000 1.00000) 110 111 See also 112 -------- 113 GeoSeries : Series object designed to store shapely geometry objects 114 """ 115 116 _metadata = ["_crs", "_geometry_column_name"] 117 118 _geometry_column_name = DEFAULT_GEO_COLUMN_NAME 119 120 def __init__(self, data=None, *args, geometry=None, crs=None, **kwargs): 121 with compat.ignore_shapely2_warnings(): 122 super().__init__(data, *args, **kwargs) 123 124 # need to set this before calling self['geometry'], because 125 # getitem accesses crs 126 self._crs = CRS.from_user_input(crs) if crs else None 127 128 # set_geometry ensures the geometry data have the proper dtype, 129 # but is not called if `geometry=None` ('geometry' column present 130 # in the data), so therefore need to ensure it here manually 131 # but within a try/except because currently non-geometries are 132 # allowed in that case 133 # TODO do we want to raise / return normal DataFrame in this case? 134 135 # if gdf passed in and geo_col is set, we use that for geometry 136 if geometry is None and isinstance(data, GeoDataFrame): 137 self._geometry_column_name = data._geometry_column_name 138 if crs is not None and data.crs != crs: 139 _crs_mismatch_warning() 140 # TODO: raise error in 0.9 or 0.10. 141 return 142 143 if geometry is None and "geometry" in self.columns: 144 # Check for multiple columns with name "geometry". If there are, 145 # self["geometry"] is a gdf and constructor gets recursively recalled 146 # by pandas internals trying to access this 147 if (self.columns == "geometry").sum() > 1: 148 raise ValueError( 149 "GeoDataFrame does not support multiple columns " 150 "using the geometry column name 'geometry'." 151 ) 152 153 # only if we have actual geometry values -> call set_geometry 154 index = self.index 155 try: 156 if ( 157 hasattr(self["geometry"].values, "crs") 158 and self["geometry"].values.crs 159 and crs 160 and not self["geometry"].values.crs == crs 161 ): 162 _crs_mismatch_warning() 163 # TODO: raise error in 0.9 or 0.10. 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs) 165 except TypeError: 166 pass 167 else: 168 if self.index is not index: 169 # With pandas < 1.0 and an empty frame (no rows), the index 170 # gets reset to a default RangeIndex -> set back the original 171 # index if needed 172 self.index = index 173 geometry = "geometry" 174 175 if geometry is not None: 176 if ( 177 hasattr(geometry, "crs") 178 and geometry.crs 179 and crs 180 and not geometry.crs == crs 181 ): 182 _crs_mismatch_warning() 183 # TODO: raise error in 0.9 or 0.10. 184 self.set_geometry(geometry, inplace=True) 185 186 if geometry is None and crs: 187 warnings.warn( 188 "Assigning CRS to a GeoDataFrame without a geometry column is now " 189 "deprecated and will not be supported in the future.", 190 FutureWarning, 191 stacklevel=2, 192 ) 193 194 def __setattr__(self, attr, val): 195 # have to special case geometry b/c pandas tries to use as column... 196 if attr == "geometry": 197 object.__setattr__(self, attr, val) 198 else: 199 super().__setattr__(attr, val) 200 201 def _get_geometry(self): 202 if self._geometry_column_name not in self: 203 raise AttributeError( 204 "No geometry data set yet (expected in" 205 " column '%s'.)" % self._geometry_column_name 206 ) 207 return self[self._geometry_column_name] 208 209 def _set_geometry(self, col): 210 if not pd.api.types.is_list_like(col): 211 raise ValueError("Must use a list-like to set the geometry property") 212 self.set_geometry(col, inplace=True) 213 214 geometry = property( 215 fget=_get_geometry, fset=_set_geometry, doc="Geometry data for GeoDataFrame" 216 ) 217 218 def set_geometry(self, col, drop=False, inplace=False, crs=None): 219 """ 220 Set the GeoDataFrame geometry using either an existing column or 221 the specified input. By default yields a new object. 222 223 The original geometry column is replaced with the input. 224 225 Parameters 226 ---------- 227 col : column label or array 228 drop : boolean, default False 229 Delete column to be used as the new geometry 230 inplace : boolean, default False 231 Modify the GeoDataFrame in place (do not create a new object) 232 crs : pyproj.CRS, optional 233 Coordinate system to use. The value can be anything accepted 234 by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, 235 such as an authority string (eg "EPSG:4326") or a WKT string. 236 If passed, overrides both DataFrame and col's crs. 237 Otherwise, tries to get crs from passed col values or DataFrame. 238 239 Examples 240 -------- 241 >>> from shapely.geometry import Point 242 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 243 >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326") 244 >>> gdf 245 col1 geometry 246 0 name1 POINT (1.00000 2.00000) 247 1 name2 POINT (2.00000 1.00000) 248 249 Passing an array: 250 251 >>> df1 = gdf.set_geometry([Point(0,0), Point(1,1)]) 252 >>> df1 253 col1 geometry 254 0 name1 POINT (0.00000 0.00000) 255 1 name2 POINT (1.00000 1.00000) 256 257 Using existing column: 258 259 >>> gdf["buffered"] = gdf.buffer(2) 260 >>> df2 = gdf.set_geometry("buffered") 261 >>> df2.geometry 262 0 POLYGON ((3.00000 2.00000, 2.99037 1.80397, 2.... 263 1 POLYGON ((4.00000 1.00000, 3.99037 0.80397, 3.... 264 Name: buffered, dtype: geometry 265 266 Returns 267 ------- 268 GeoDataFrame 269 270 See also 271 -------- 272 GeoDataFrame.rename_geometry : rename an active geometry column 273 """ 274 # Most of the code here is taken from DataFrame.set_index() 275 if inplace: 276 frame = self 277 else: 278 frame = self.copy() 279 280 to_remove = None 281 geo_column_name = self._geometry_column_name 282 if isinstance(col, (Series, list, np.ndarray, GeometryArray)): 283 level = col 284 elif hasattr(col, "ndim") and col.ndim != 1: 285 raise ValueError("Must pass array with one dimension only.") 286 else: 287 try: 288 level = frame[col] 289 except KeyError: 290 raise ValueError("Unknown column %s" % col) 291 except Exception: 292 raise 293 if isinstance(level, DataFrame): 294 raise ValueError( 295 "GeoDataFrame does not support setting the geometry column where " 296 "the column name is shared by multiple columns." 297 ) 298 299 if drop: 300 to_remove = col 301 geo_column_name = self._geometry_column_name 302 else: 303 geo_column_name = col 304 305 if to_remove: 306 del frame[to_remove] 307 308 if not crs: 309 level_crs = getattr(level, "crs", None) 310 crs = level_crs if level_crs is not None else self._crs 311 312 if isinstance(level, (GeoSeries, GeometryArray)) and level.crs != crs: 313 # Avoids caching issues/crs sharing issues 314 level = level.copy() 315 level.crs = crs 316 317 # Check that we are using a listlike of geometries 318 level = _ensure_geometry(level, crs=crs) 319 index = frame.index 320 frame[geo_column_name] = level 321 if frame.index is not index and len(frame.index) == len(index): 322 # With pandas < 1.0 and an empty frame (no rows), the index gets reset 323 # to a default RangeIndex -> set back the original index if needed 324 frame.index = index 325 frame._geometry_column_name = geo_column_name 326 frame.crs = crs 327 if not inplace: 328 return frame 329 330 def rename_geometry(self, col, inplace=False): 331 """ 332 Renames the GeoDataFrame geometry column to 333 the specified name. By default yields a new object. 334 335 The original geometry column is replaced with the input. 336 337 Parameters 338 ---------- 339 col : new geometry column label 340 inplace : boolean, default False 341 Modify the GeoDataFrame in place (do not create a new object) 342 343 Examples 344 -------- 345 >>> from shapely.geometry import Point 346 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 347 >>> df = geopandas.GeoDataFrame(d, crs="EPSG:4326") 348 >>> df1 = df.rename_geometry('geom1') 349 >>> df1.geometry.name 350 'geom1' 351 >>> df.rename_geometry('geom1', inplace=True) 352 >>> df.geometry.name 353 'geom1' 354 355 Returns 356 ------- 357 geodataframe : GeoDataFrame 358 359 See also 360 -------- 361 GeoDataFrame.set_geometry : set the active geometry 362 """ 363 geometry_col = self.geometry.name 364 if col in self.columns: 365 raise ValueError(f"Column named {col} already exists") 366 else: 367 if not inplace: 368 return self.rename(columns={geometry_col: col}).set_geometry( 369 col, inplace 370 ) 371 self.rename(columns={geometry_col: col}, inplace=inplace) 372 self.set_geometry(col, inplace=inplace) 373 374 @property 375 def crs(self): 376 """ 377 The Coordinate Reference System (CRS) represented as a ``pyproj.CRS`` 378 object. 379 380 Returns None if the CRS is not set, and to set the value it 381 :getter: Returns a ``pyproj.CRS`` or None. When setting, the value 382 can be anything accepted by 383 :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, 384 such as an authority string (eg "EPSG:4326") or a WKT string. 385 386 Examples 387 -------- 388 389 >>> gdf.crs # doctest: +SKIP 390 <Geographic 2D CRS: EPSG:4326> 391 Name: WGS 84 392 Axis Info [ellipsoidal]: 393 - Lat[north]: Geodetic latitude (degree) 394 - Lon[east]: Geodetic longitude (degree) 395 Area of Use: 396 - name: World 397 - bounds: (-180.0, -90.0, 180.0, 90.0) 398 Datum: World Geodetic System 1984 399 - Ellipsoid: WGS 84 400 - Prime Meridian: Greenwich 401 402 See also 403 -------- 404 GeoDataFrame.set_crs : assign CRS 405 GeoDataFrame.to_crs : re-project to another CRS 406 407 """ 408 return self._crs 409 410 @crs.setter 411 def crs(self, value): 412 """Sets the value of the crs""" 413 if self._geometry_column_name not in self: 414 warnings.warn( 415 "Assigning CRS to a GeoDataFrame without a geometry column is now " 416 "deprecated and will not be supported in the future.", 417 FutureWarning, 418 stacklevel=4, 419 ) 420 self._crs = None if not value else CRS.from_user_input(value) 421 else: 422 if hasattr(self.geometry.values, "crs"): 423 self.geometry.values.crs = value 424 self._crs = self.geometry.values.crs 425 else: 426 # column called 'geometry' without geometry 427 self._crs = None if not value else CRS.from_user_input(value) 428 429 def __setstate__(self, state): 430 # overriding DataFrame method for compat with older pickles (CRS handling) 431 if isinstance(state, dict): 432 if "_metadata" in state and "crs" in state["_metadata"]: 433 metadata = state["_metadata"] 434 metadata[metadata.index("crs")] = "_crs" 435 if "crs" in state and "_crs" not in state: 436 crs = state.pop("crs") 437 state["_crs"] = CRS.from_user_input(crs) if crs is not None else crs 438 439 super().__setstate__(state) 440 441 # for some versions that didn't yet have CRS at array level -> crs is set 442 # at GeoDataFrame level with '_crs' (and not 'crs'), so without propagating 443 # to the GeoSeries/GeometryArray 444 try: 445 if self.crs is not None: 446 if self.geometry.values.crs is None: 447 self.crs = self.crs 448 except Exception: 449 pass 450 451 @classmethod 452 def from_dict(cls, data, geometry=None, crs=None, **kwargs): 453 """ 454 Construct GeoDataFrame from dict of array-like or dicts by 455 overriding DataFrame.from_dict method with geometry and crs 456 457 Parameters 458 ---------- 459 data : dict 460 Of the form {field : array-like} or {field : dict}. 461 geometry : str or array (optional) 462 If str, column to use as geometry. If array, will be set as 'geometry' 463 column on GeoDataFrame. 464 crs : str or dict (optional) 465 Coordinate reference system to set on the resulting frame. 466 kwargs : key-word arguments 467 These arguments are passed to DataFrame.from_dict 468 469 Returns 470 ------- 471 GeoDataFrame 472 473 """ 474 dataframe = super().from_dict(data, **kwargs) 475 return GeoDataFrame(dataframe, geometry=geometry, crs=crs) 476 477 @classmethod 478 def from_file(cls, filename, **kwargs): 479 """Alternate constructor to create a ``GeoDataFrame`` from a file. 480 481 It is recommended to use :func:`geopandas.read_file` instead. 482 483 Can load a ``GeoDataFrame`` from a file in any format recognized by 484 `fiona`. See http://fiona.readthedocs.io/en/latest/manual.html for details. 485 486 Parameters 487 ---------- 488 filename : str 489 File path or file handle to read from. Depending on which kwargs 490 are included, the content of filename may vary. See 491 http://fiona.readthedocs.io/en/latest/README.html#usage for usage details. 492 kwargs : key-word arguments 493 These arguments are passed to fiona.open, and can be used to 494 access multi-layer data, data stored within archives (zip files), 495 etc. 496 497 Examples 498 -------- 499 500 >>> path = geopandas.datasets.get_path('nybb') 501 >>> gdf = geopandas.GeoDataFrame.from_file(path) 502 >>> gdf # doctest: +SKIP 503 BoroCode BoroName Shape_Leng Shape_Area \ 504 geometry 505 0 5 Staten Island 330470.010332 1.623820e+09 MULTIPOLYGON ((\ 506(970217.022 145643.332, 970227.... 507 1 4 Queens 896344.047763 3.045213e+09 MULTIPOLYGON ((\ 508(1029606.077 156073.814, 102957... 509 2 3 Brooklyn 741080.523166 1.937479e+09 MULTIPOLYGON ((\ 510(1021176.479 151374.797, 102100... 511 3 1 Manhattan 359299.096471 6.364715e+08 MULTIPOLYGON ((\ 512(981219.056 188655.316, 980940.... 513 4 2 Bronx 464392.991824 1.186925e+09 MULTIPOLYGON ((\ 514(1012821.806 229228.265, 101278... 515 516 The recommended method of reading files is :func:`geopandas.read_file`: 517 518 >>> gdf = geopandas.read_file(path) 519 520 See also 521 -------- 522 read_file : read file to GeoDataFame 523 GeoDataFrame.to_file : write GeoDataFrame to file 524 525 """ 526 return geopandas.io.file._read_file(filename, **kwargs) 527 528 @classmethod 529 def from_features(cls, features, crs=None, columns=None): 530 """ 531 Alternate constructor to create GeoDataFrame from an iterable of 532 features or a feature collection. 533 534 Parameters 535 ---------- 536 features 537 - Iterable of features, where each element must be a feature 538 dictionary or implement the __geo_interface__. 539 - Feature collection, where the 'features' key contains an 540 iterable of features. 541 - Object holding a feature collection that implements the 542 ``__geo_interface__``. 543 crs : str or dict (optional) 544 Coordinate reference system to set on the resulting frame. 545 columns : list of column names, optional 546 Optionally specify the column names to include in the output frame. 547 This does not overwrite the property names of the input, but can 548 ensure a consistent output format. 549 550 Returns 551 ------- 552 GeoDataFrame 553 554 Notes 555 ----- 556 For more information about the ``__geo_interface__``, see 557 https://gist.github.com/sgillies/2217756 558 559 Examples 560 -------- 561 >>> feature_coll = { 562 ... "type": "FeatureCollection", 563 ... "features": [ 564 ... { 565 ... "id": "0", 566 ... "type": "Feature", 567 ... "properties": {"col1": "name1"}, 568 ... "geometry": {"type": "Point", "coordinates": (1.0, 2.0)}, 569 ... "bbox": (1.0, 2.0, 1.0, 2.0), 570 ... }, 571 ... { 572 ... "id": "1", 573 ... "type": "Feature", 574 ... "properties": {"col1": "name2"}, 575 ... "geometry": {"type": "Point", "coordinates": (2.0, 1.0)}, 576 ... "bbox": (2.0, 1.0, 2.0, 1.0), 577 ... }, 578 ... ], 579 ... "bbox": (1.0, 1.0, 2.0, 2.0), 580 ... } 581 >>> df = geopandas.GeoDataFrame.from_features(feature_coll) 582 >>> df 583 geometry col1 584 0 POINT (1.00000 2.00000) name1 585 1 POINT (2.00000 1.00000) name2 586 587 """ 588 # Handle feature collections 589 if hasattr(features, "__geo_interface__"): 590 fs = features.__geo_interface__ 591 else: 592 fs = features 593 594 if isinstance(fs, dict) and fs.get("type") == "FeatureCollection": 595 features_lst = fs["features"] 596 else: 597 features_lst = features 598 599 rows = [] 600 for feature in features_lst: 601 # load geometry 602 if hasattr(feature, "__geo_interface__"): 603 feature = feature.__geo_interface__ 604 row = { 605 "geometry": shape(feature["geometry"]) if feature["geometry"] else None 606 } 607 # load properties 608 row.update(feature["properties"]) 609 rows.append(row) 610 return GeoDataFrame(rows, columns=columns, crs=crs) 611 612 @classmethod 613 def from_postgis( 614 cls, 615 sql, 616 con, 617 geom_col="geom", 618 crs=None, 619 index_col=None, 620 coerce_float=True, 621 parse_dates=None, 622 params=None, 623 chunksize=None, 624 ): 625 """ 626 Alternate constructor to create a ``GeoDataFrame`` from a sql query 627 containing a geometry column in WKB representation. 628 629 Parameters 630 ---------- 631 sql : string 632 con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine 633 geom_col : string, default 'geom' 634 column name to convert to shapely geometries 635 crs : optional 636 Coordinate reference system to use for the returned GeoDataFrame 637 index_col : string or list of strings, optional, default: None 638 Column(s) to set as index(MultiIndex) 639 coerce_float : boolean, default True 640 Attempt to convert values of non-string, non-numeric objects (like 641 decimal.Decimal) to floating point, useful for SQL result sets 642 parse_dates : list or dict, default None 643 - List of column names to parse as dates. 644 - Dict of ``{column_name: format string}`` where format string is 645 strftime compatible in case of parsing string times, or is one of 646 (D, s, ns, ms, us) in case of parsing integer timestamps. 647 - Dict of ``{column_name: arg dict}``, where the arg dict 648 corresponds to the keyword arguments of 649 :func:`pandas.to_datetime`. Especially useful with databases 650 without native Datetime support, such as SQLite. 651 params : list, tuple or dict, optional, default None 652 List of parameters to pass to execute method. 653 chunksize : int, default None 654 If specified, return an iterator where chunksize is the number 655 of rows to include in each chunk. 656 657 Examples 658 -------- 659 PostGIS 660 661 >>> from sqlalchemy import create_engine # doctest: +SKIP 662 >>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydb" 663 >>> con = create_engine(db_connection_url) # doctest: +SKIP 664 >>> sql = "SELECT geom, highway FROM roads" 665 >>> df = geopandas.GeoDataFrame.from_postgis(sql, con) # doctest: +SKIP 666 667 SpatiaLite 668 669 >>> sql = "SELECT ST_Binary(geom) AS geom, highway FROM roads" 670 >>> df = geopandas.GeoDataFrame.from_postgis(sql, con) # doctest: +SKIP 671 672 The recommended method of reading from PostGIS is 673 :func:`geopandas.read_postgis`: 674 675 >>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP 676 677 See also 678 -------- 679 geopandas.read_postgis : read PostGIS database to GeoDataFrame 680 """ 681 682 df = geopandas.io.sql._read_postgis( 683 sql, 684 con, 685 geom_col=geom_col, 686 crs=crs, 687 index_col=index_col, 688 coerce_float=coerce_float, 689 parse_dates=parse_dates, 690 params=params, 691 chunksize=chunksize, 692 ) 693 694 return df 695 696 def to_json(self, na="null", show_bbox=False, drop_id=False, **kwargs): 697 """ 698 Returns a GeoJSON representation of the ``GeoDataFrame`` as a string. 699 700 Parameters 701 ---------- 702 na : {'null', 'drop', 'keep'}, default 'null' 703 Indicates how to output missing (NaN) values in the GeoDataFrame. 704 See below. 705 show_bbox : bool, optional, default: False 706 Include bbox (bounds) in the geojson 707 drop_id : bool, default: False 708 Whether to retain the index of the GeoDataFrame as the id property 709 in the generated GeoJSON. Default is False, but may want True 710 if the index is just arbitrary row numbers. 711 712 Notes 713 ----- 714 The remaining *kwargs* are passed to json.dumps(). 715 716 Missing (NaN) values in the GeoDataFrame can be represented as follows: 717 718 - ``null``: output the missing entries as JSON null. 719 - ``drop``: remove the property from the feature. This applies to each 720 feature individually so that features may have different properties. 721 - ``keep``: output the missing entries as NaN. 722 723 Examples 724 -------- 725 726 >>> from shapely.geometry import Point 727 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 728 >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326") 729 >>> gdf 730 col1 geometry 731 0 name1 POINT (1.00000 2.00000) 732 1 name2 POINT (2.00000 1.00000) 733 734 >>> gdf.to_json() 735 '{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", \ 736"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates": [1.0,\ 737 2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"}, "geometry"\ 738: {"type": "Point", "coordinates": [2.0, 1.0]}}]}' 739 740 Alternatively, you can write GeoJSON to file: 741 742 >>> gdf.to_file(path, driver="GeoJSON") # doctest: +SKIP 743 744 See also 745 -------- 746 GeoDataFrame.to_file : write GeoDataFrame to file 747 748 """ 749 return json.dumps( 750 self._to_geo(na=na, show_bbox=show_bbox, drop_id=drop_id), **kwargs 751 ) 752 753 @property 754 def __geo_interface__(self): 755 """Returns a ``GeoDataFrame`` as a python feature collection. 756 757 Implements the `geo_interface`. The returned python data structure 758 represents the ``GeoDataFrame`` as a GeoJSON-like 759 ``FeatureCollection``. 760 761 This differs from `_to_geo()` only in that it is a property with 762 default args instead of a method 763 764 Examples 765 -------- 766 767 >>> from shapely.geometry import Point 768 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 769 >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326") 770 >>> gdf 771 col1 geometry 772 0 name1 POINT (1.00000 2.00000) 773 1 name2 POINT (2.00000 1.00000) 774 775 >>> gdf.__geo_interface__ 776 {'type': 'FeatureCollection', 'features': [{'id': '0', 'type': 'Feature', \ 777'properties': {'col1': 'name1'}, 'geometry': {'type': 'Point', 'coordinates': (1.0\ 778, 2.0)}, 'bbox': (1.0, 2.0, 1.0, 2.0)}, {'id': '1', 'type': 'Feature', 'properties\ 779': {'col1': 'name2'}, 'geometry': {'type': 'Point', 'coordinates': (2.0, 1.0)}, 'b\ 780box': (2.0, 1.0, 2.0, 1.0)}], 'bbox': (1.0, 1.0, 2.0, 2.0)} 781 782 783 """ 784 return self._to_geo(na="null", show_bbox=True, drop_id=False) 785 786 def iterfeatures(self, na="null", show_bbox=False, drop_id=False): 787 """ 788 Returns an iterator that yields feature dictionaries that comply with 789 __geo_interface__ 790 791 Parameters 792 ---------- 793 na : str, optional 794 Options are {'null', 'drop', 'keep'}, default 'null'. 795 Indicates how to output missing (NaN) values in the GeoDataFrame 796 797 - null: output the missing entries as JSON null 798 - drop: remove the property from the feature. This applies to each feature \ 799individually so that features may have different properties 800 - keep: output the missing entries as NaN 801 802 show_bbox : bool, optional 803 Include bbox (bounds) in the geojson. Default False. 804 drop_id : bool, default: False 805 Whether to retain the index of the GeoDataFrame as the id property 806 in the generated GeoJSON. Default is False, but may want True 807 if the index is just arbitrary row numbers. 808 809 Examples 810 -------- 811 812 >>> from shapely.geometry import Point 813 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 814 >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326") 815 >>> gdf 816 col1 geometry 817 0 name1 POINT (1.00000 2.00000) 818 1 name2 POINT (2.00000 1.00000) 819 820 >>> feature = next(gdf.iterfeatures()) 821 >>> feature 822 {'id': '0', 'type': 'Feature', 'properties': {'col1': 'name1'}, 'geometry': {\ 823'type': 'Point', 'coordinates': (1.0, 2.0)}} 824 """ 825 if na not in ["null", "drop", "keep"]: 826 raise ValueError("Unknown na method {0}".format(na)) 827 828 if self._geometry_column_name not in self: 829 raise AttributeError( 830 "No geometry data set (expected in" 831 " column '%s')." % self._geometry_column_name 832 ) 833 834 ids = np.array(self.index, copy=False) 835 geometries = np.array(self[self._geometry_column_name], copy=False) 836 837 if not self.columns.is_unique: 838 raise ValueError("GeoDataFrame cannot contain duplicated column names.") 839 840 properties_cols = self.columns.difference([self._geometry_column_name]) 841 842 if len(properties_cols) > 0: 843 # convert to object to get python scalars. 844 properties = self[properties_cols].astype(object).values 845 if na == "null": 846 properties[pd.isnull(self[properties_cols]).values] = None 847 848 for i, row in enumerate(properties): 849 geom = geometries[i] 850 851 if na == "drop": 852 properties_items = { 853 k: v for k, v in zip(properties_cols, row) if not pd.isnull(v) 854 } 855 else: 856 properties_items = {k: v for k, v in zip(properties_cols, row)} 857 858 if drop_id: 859 feature = {} 860 else: 861 feature = {"id": str(ids[i])} 862 863 feature["type"] = "Feature" 864 feature["properties"] = properties_items 865 feature["geometry"] = mapping(geom) if geom else None 866 867 if show_bbox: 868 feature["bbox"] = geom.bounds if geom else None 869 870 yield feature 871 872 else: 873 for fid, geom in zip(ids, geometries): 874 875 if drop_id: 876 feature = {} 877 else: 878 feature = {"id": str(fid)} 879 880 feature["type"] = "Feature" 881 feature["properties"] = {} 882 feature["geometry"] = mapping(geom) if geom else None 883 884 if show_bbox: 885 feature["bbox"] = geom.bounds if geom else None 886 887 yield feature 888 889 def _to_geo(self, **kwargs): 890 """ 891 Returns a python feature collection (i.e. the geointerface) 892 representation of the GeoDataFrame. 893 894 """ 895 geo = { 896 "type": "FeatureCollection", 897 "features": list(self.iterfeatures(**kwargs)), 898 } 899 900 if kwargs.get("show_bbox", False): 901 geo["bbox"] = tuple(self.total_bounds) 902 903 return geo 904 905 def to_wkb(self, hex=False, **kwargs): 906 """ 907 Encode all geometry columns in the GeoDataFrame to WKB. 908 909 Parameters 910 ---------- 911 hex : bool 912 If true, export the WKB as a hexadecimal string. 913 The default is to return a binary bytes object. 914 kwargs 915 Additional keyword args will be passed to 916 :func:`pygeos.to_wkb` if pygeos is installed. 917 918 Returns 919 ------- 920 DataFrame 921 geometry columns are encoded to WKB 922 """ 923 924 df = DataFrame(self.copy()) 925 926 # Encode all geometry columns to WKB 927 for col in df.columns[df.dtypes == "geometry"]: 928 df[col] = to_wkb(df[col].values, hex=hex, **kwargs) 929 930 return df 931 932 def to_wkt(self, **kwargs): 933 """ 934 Encode all geometry columns in the GeoDataFrame to WKT. 935 936 Parameters 937 ---------- 938 kwargs 939 Keyword args will be passed to :func:`pygeos.to_wkt` 940 if pygeos is installed. 941 942 Returns 943 ------- 944 DataFrame 945 geometry columns are encoded to WKT 946 """ 947 948 df = DataFrame(self.copy()) 949 950 # Encode all geometry columns to WKT 951 for col in df.columns[df.dtypes == "geometry"]: 952 df[col] = to_wkt(df[col].values, **kwargs) 953 954 return df 955 956 def to_parquet(self, path, index=None, compression="snappy", **kwargs): 957 """Write a GeoDataFrame to the Parquet format. 958 959 Any geometry columns present are serialized to WKB format in the file. 960 961 Requires 'pyarrow'. 962 963 WARNING: this is an initial implementation of Parquet file support and 964 associated metadata. This is tracking version 0.1.0 of the metadata 965 specification at: 966 https://github.com/geopandas/geo-arrow-spec 967 968 This metadata specification does not yet make stability promises. As such, 969 we do not yet recommend using this in a production setting unless you are 970 able to rewrite your Parquet files. 971 972 .. versionadded:: 0.8 973 974 Parameters 975 ---------- 976 path : str, path object 977 index : bool, default None 978 If ``True``, always include the dataframe's index(es) as columns 979 in the file output. 980 If ``False``, the index(es) will not be written to the file. 981 If ``None``, the index(ex) will be included as columns in the file 982 output except `RangeIndex` which is stored as metadata only. 983 compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' 984 Name of the compression to use. Use ``None`` for no compression. 985 kwargs 986 Additional keyword arguments passed to :func:`pyarrow.parquet.write_table`. 987 988 Examples 989 -------- 990 991 >>> gdf.to_parquet('data.parquet') # doctest: +SKIP 992 993 See also 994 -------- 995 GeoDataFrame.to_feather : write GeoDataFrame to feather 996 GeoDataFrame.to_file : write GeoDataFrame to file 997 """ 998 999 from geopandas.io.arrow import _to_parquet 1000 1001 _to_parquet(self, path, compression=compression, index=index, **kwargs) 1002 1003 def to_feather(self, path, index=None, compression=None, **kwargs): 1004 """Write a GeoDataFrame to the Feather format. 1005 1006 Any geometry columns present are serialized to WKB format in the file. 1007 1008 Requires 'pyarrow' >= 0.17. 1009 1010 WARNING: this is an initial implementation of Feather file support and 1011 associated metadata. This is tracking version 0.1.0 of the metadata 1012 specification at: 1013 https://github.com/geopandas/geo-arrow-spec 1014 1015 This metadata specification does not yet make stability promises. As such, 1016 we do not yet recommend using this in a production setting unless you are 1017 able to rewrite your Feather files. 1018 1019 .. versionadded:: 0.8 1020 1021 Parameters 1022 ---------- 1023 path : str, path object 1024 index : bool, default None 1025 If ``True``, always include the dataframe's index(es) as columns 1026 in the file output. 1027 If ``False``, the index(es) will not be written to the file. 1028 If ``None``, the index(ex) will be included as columns in the file 1029 output except `RangeIndex` which is stored as metadata only. 1030 compression : {'zstd', 'lz4', 'uncompressed'}, optional 1031 Name of the compression to use. Use ``"uncompressed"`` for no 1032 compression. By default uses LZ4 if available, otherwise uncompressed. 1033 kwargs 1034 Additional keyword arguments passed to to 1035 :func:`pyarrow.feather.write_feather`. 1036 1037 Examples 1038 -------- 1039 1040 >>> gdf.to_feather('data.feather') # doctest: +SKIP 1041 1042 See also 1043 -------- 1044 GeoDataFrame.to_parquet : write GeoDataFrame to parquet 1045 GeoDataFrame.to_file : write GeoDataFrame to file 1046 """ 1047 1048 from geopandas.io.arrow import _to_feather 1049 1050 _to_feather(self, path, index=index, compression=compression, **kwargs) 1051 1052 def to_file(self, filename, driver=None, schema=None, index=None, **kwargs): 1053 """Write the ``GeoDataFrame`` to a file. 1054 1055 By default, an ESRI shapefile is written, but any OGR data source 1056 supported by Fiona can be written. A dictionary of supported OGR 1057 providers is available via: 1058 1059 >>> import fiona 1060 >>> fiona.supported_drivers # doctest: +SKIP 1061 1062 Parameters 1063 ---------- 1064 filename : string 1065 File path or file handle to write to. 1066 driver : string, default None 1067 The OGR format driver used to write the vector file. 1068 If not specified, it attempts to infer it from the file extension. 1069 If no extension is specified, it saves ESRI Shapefile to a folder. 1070 schema : dict, default: None 1071 If specified, the schema dictionary is passed to Fiona to 1072 better control how the file is written. 1073 index : bool, default None 1074 If True, write index into one or more columns (for MultiIndex). 1075 Default None writes the index into one or more columns only if 1076 the index is named, is a MultiIndex, or has a non-integer data 1077 type. If False, no index is written. 1078 1079 .. versionadded:: 0.7 1080 Previously the index was not written. 1081 1082 Notes 1083 ----- 1084 The extra keyword arguments ``**kwargs`` are passed to fiona.open and 1085 can be used to write to multi-layer data, store data within archives 1086 (zip files), etc. 1087 1088 The format drivers will attempt to detect the encoding of your data, but 1089 may fail. In this case, the proper encoding can be specified explicitly 1090 by using the encoding keyword parameter, e.g. ``encoding='utf-8'``. 1091 1092 See Also 1093 -------- 1094 GeoSeries.to_file 1095 GeoDataFrame.to_postgis : write GeoDataFrame to PostGIS database 1096 GeoDataFrame.to_parquet : write GeoDataFrame to parquet 1097 GeoDataFrame.to_feather : write GeoDataFrame to feather 1098 1099 Examples 1100 -------- 1101 1102 >>> gdf.to_file('dataframe.shp') # doctest: +SKIP 1103 1104 >>> gdf.to_file('dataframe.gpkg', driver='GPKG', layer='name') # doctest: +SKIP 1105 1106 >>> gdf.to_file('dataframe.geojson', driver='GeoJSON') # doctest: +SKIP 1107 1108 With selected drivers you can also append to a file with `mode="a"`: 1109 1110 >>> gdf.to_file('dataframe.shp', mode="a") # doctest: +SKIP 1111 """ 1112 from geopandas.io.file import _to_file 1113 1114 _to_file(self, filename, driver, schema, index, **kwargs) 1115 1116 def set_crs(self, crs=None, epsg=None, inplace=False, allow_override=False): 1117 """ 1118 Set the Coordinate Reference System (CRS) of the ``GeoDataFrame``. 1119 1120 If there are multiple geometry columns within the GeoDataFrame, only 1121 the CRS of the active geometry column is set. 1122 1123 NOTE: The underlying geometries are not transformed to this CRS. To 1124 transform the geometries to a new CRS, use the ``to_crs`` method. 1125 1126 Parameters 1127 ---------- 1128 crs : pyproj.CRS, optional if `epsg` is specified 1129 The value can be anything accepted 1130 by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, 1131 such as an authority string (eg "EPSG:4326") or a WKT string. 1132 epsg : int, optional if `crs` is specified 1133 EPSG code specifying the projection. 1134 inplace : bool, default False 1135 If True, the CRS of the GeoDataFrame will be changed in place 1136 (while still returning the result) instead of making a copy of 1137 the GeoDataFrame. 1138 allow_override : bool, default False 1139 If the the GeoDataFrame already has a CRS, allow to replace the 1140 existing CRS, even when both are not equal. 1141 1142 Examples 1143 -------- 1144 >>> from shapely.geometry import Point 1145 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 1146 >>> gdf = geopandas.GeoDataFrame(d) 1147 >>> gdf 1148 col1 geometry 1149 0 name1 POINT (1.00000 2.00000) 1150 1 name2 POINT (2.00000 1.00000) 1151 1152 Setting CRS to a GeoDataFrame without one: 1153 1154 >>> gdf.crs is None 1155 True 1156 1157 >>> gdf = gdf.set_crs('epsg:3857') 1158 >>> gdf.crs # doctest: +SKIP 1159 <Projected CRS: EPSG:3857> 1160 Name: WGS 84 / Pseudo-Mercator 1161 Axis Info [cartesian]: 1162 - X[east]: Easting (metre) 1163 - Y[north]: Northing (metre) 1164 Area of Use: 1165 - name: World - 85°S to 85°N 1166 - bounds: (-180.0, -85.06, 180.0, 85.06) 1167 Coordinate Operation: 1168 - name: Popular Visualisation Pseudo-Mercator 1169 - method: Popular Visualisation Pseudo Mercator 1170 Datum: World Geodetic System 1984 1171 - Ellipsoid: WGS 84 1172 - Prime Meridian: Greenwich 1173 1174 Overriding existing CRS: 1175 1176 >>> gdf = gdf.set_crs(4326, allow_override=True) 1177 1178 Without ``allow_override=True``, ``set_crs`` returns an error if you try to 1179 override CRS. 1180 1181 See also 1182 -------- 1183 GeoDataFrame.to_crs : re-project to another CRS 1184 1185 """ 1186 if not inplace: 1187 df = self.copy() 1188 else: 1189 df = self 1190 df.geometry = df.geometry.set_crs( 1191 crs=crs, epsg=epsg, allow_override=allow_override, inplace=True 1192 ) 1193 return df 1194 1195 def to_crs(self, crs=None, epsg=None, inplace=False): 1196 """Transform geometries to a new coordinate reference system. 1197 1198 Transform all geometries in an active geometry column to a different coordinate 1199 reference system. The ``crs`` attribute on the current GeoSeries must 1200 be set. Either ``crs`` or ``epsg`` may be specified for output. 1201 1202 This method will transform all points in all objects. It has no notion 1203 or projecting entire geometries. All segments joining points are 1204 assumed to be lines in the current projection, not geodesics. Objects 1205 crossing the dateline (or other projection boundary) will have 1206 undesirable behavior. 1207 1208 Parameters 1209 ---------- 1210 crs : pyproj.CRS, optional if `epsg` is specified 1211 The value can be anything accepted by 1212 :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, 1213 such as an authority string (eg "EPSG:4326") or a WKT string. 1214 epsg : int, optional if `crs` is specified 1215 EPSG code specifying output projection. 1216 inplace : bool, optional, default: False 1217 Whether to return a new GeoDataFrame or do the transformation in 1218 place. 1219 1220 Returns 1221 ------- 1222 GeoDataFrame 1223 1224 Examples 1225 -------- 1226 >>> from shapely.geometry import Point 1227 >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]} 1228 >>> gdf = geopandas.GeoDataFrame(d, crs=4326) 1229 >>> gdf 1230 col1 geometry 1231 0 name1 POINT (1.00000 2.00000) 1232 1 name2 POINT (2.00000 1.00000) 1233 >>> gdf.crs # doctest: +SKIP 1234 <Geographic 2D CRS: EPSG:4326> 1235 Name: WGS 84 1236 Axis Info [ellipsoidal]: 1237 - Lat[north]: Geodetic latitude (degree) 1238 - Lon[east]: Geodetic longitude (degree) 1239 Area of Use: 1240 - name: World 1241 - bounds: (-180.0, -90.0, 180.0, 90.0) 1242 Datum: World Geodetic System 1984 1243 - Ellipsoid: WGS 84 1244 - Prime Meridian: Greenwich 1245 1246 >>> gdf = gdf.to_crs(3857) 1247 >>> gdf 1248 col1 geometry 1249 0 name1 POINT (111319.491 222684.209) 1250 1 name2 POINT (222638.982 111325.143) 1251 >>> gdf.crs # doctest: +SKIP 1252 <Projected CRS: EPSG:3857> 1253 Name: WGS 84 / Pseudo-Mercator 1254 Axis Info [cartesian]: 1255 - X[east]: Easting (metre) 1256 - Y[north]: Northing (metre) 1257 Area of Use: 1258 - name: World - 85°S to 85°N 1259 - bounds: (-180.0, -85.06, 180.0, 85.06) 1260 Coordinate Operation: 1261 - name: Popular Visualisation Pseudo-Mercator 1262 - method: Popular Visualisation Pseudo Mercator 1263 Datum: World Geodetic System 1984 1264 - Ellipsoid: WGS 84 1265 - Prime Meridian: Greenwich 1266 1267 See also 1268 -------- 1269 GeoDataFrame.set_crs : assign CRS without re-projection 1270 """ 1271 if inplace: 1272 df = self 1273 else: 1274 df = self.copy() 1275 geom = df.geometry.to_crs(crs=crs, epsg=epsg) 1276 df.geometry = geom 1277 df.crs = geom.crs 1278 if not inplace: 1279 return df 1280 1281 def estimate_utm_crs(self, datum_name="WGS 84"): 1282 """Returns the estimated UTM CRS based on the bounds of the dataset. 1283 1284 .. versionadded:: 0.9 1285 1286 .. note:: Requires pyproj 3+ 1287 1288 Parameters 1289 ---------- 1290 datum_name : str, optional 1291 The name of the datum to use in the query. Default is WGS 84. 1292 1293 Returns 1294 ------- 1295 pyproj.CRS 1296 1297 Examples 1298 -------- 1299 >>> world = geopandas.read_file( 1300 ... geopandas.datasets.get_path("naturalearth_lowres") 1301 ... ) 1302 >>> germany = world.loc[world.name == "Germany"] 1303 >>> germany.estimate_utm_crs() # doctest: +SKIP 1304 <Projected CRS: EPSG:32632> 1305 Name: WGS 84 / UTM zone 32N 1306 Axis Info [cartesian]: 1307 - E[east]: Easting (metre) 1308 - N[north]: Northing (metre) 1309 Area of Use: 1310 - name: World - N hemisphere - 6°E to 12°E - by country 1311 - bounds: (6.0, 0.0, 12.0, 84.0) 1312 Coordinate Operation: 1313 - name: UTM zone 32N 1314 - method: Transverse Mercator 1315 Datum: World Geodetic System 1984 1316 - Ellipsoid: WGS 84 1317 - Prime Meridian: Greenwich 1318 """ 1319 return self.geometry.estimate_utm_crs(datum_name=datum_name) 1320 1321 def __getitem__(self, key): 1322 """ 1323 If the result is a column containing only 'geometry', return a 1324 GeoSeries. If it's a DataFrame with a 'geometry' column, return a 1325 GeoDataFrame. 1326 """ 1327 result = super().__getitem__(key) 1328 geo_col = self._geometry_column_name 1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype): 1330 result.__class__ = GeoSeries 1331 elif isinstance(result, DataFrame) and geo_col in result: 1332 result.__class__ = GeoDataFrame 1333 result._geometry_column_name = geo_col 1334 elif isinstance(result, DataFrame) and geo_col not in result: 1335 result.__class__ = DataFrame 1336 return result 1337 1338 def __setitem__(self, key, value): 1339 """ 1340 Overwritten to preserve CRS of GeometryArray in cases like 1341 df['geometry'] = [geom... for geom in df.geometry] 1342 """ 1343 if not pd.api.types.is_list_like(key) and key == self._geometry_column_name: 1344 if pd.api.types.is_scalar(value) or isinstance(value, BaseGeometry): 1345 value = [value] * self.shape[0] 1346 try: 1347 value = _ensure_geometry(value, crs=self.crs) 1348 self._crs = value.crs 1349 except TypeError: 1350 warnings.warn("Geometry column does not contain geometry.") 1351 super().__setitem__(key, value) 1352 1353 # 1354 # Implement pandas methods 1355 # 1356 1357 def merge(self, *args, **kwargs): 1358 r"""Merge two ``GeoDataFrame`` objects with a database-style join. 1359 1360 Returns a ``GeoDataFrame`` if a geometry column is present; otherwise, 1361 returns a pandas ``DataFrame``. 1362 1363 Returns 1364 ------- 1365 GeoDataFrame or DataFrame 1366 1367 Notes 1368 ----- 1369 The extra arguments ``*args`` and keyword arguments ``**kwargs`` are 1370 passed to DataFrame.merge. 1371 1372 Reference 1373 --------- 1374 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas\ 1375 .DataFrame.merge.html 1376 1377 """ 1378 result = DataFrame.merge(self, *args, **kwargs) 1379 geo_col = self._geometry_column_name 1380 if isinstance(result, DataFrame) and geo_col in result: 1381 result.__class__ = GeoDataFrame 1382 result.crs = self.crs 1383 result._geometry_column_name = geo_col 1384 elif isinstance(result, DataFrame) and geo_col not in result: 1385 result.__class__ = DataFrame 1386 return result 1387 1388 @doc(pd.DataFrame) 1389 def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwargs): 1390 result = super().apply( 1391 func, axis=axis, raw=raw, result_type=result_type, args=args, **kwargs 1392 ) 1393 if ( 1394 isinstance(result, GeoDataFrame) 1395 and self._geometry_column_name in result.columns 1396 and isinstance(result[self._geometry_column_name].dtype, GeometryDtype) 1397 ): 1398 # apply calls _constructor which resets geom col name to geometry 1399 result._geometry_column_name = self._geometry_column_name 1400 if self.crs is not None and result.crs is None: 1401 result.set_crs(self.crs, inplace=True) 1402 return result 1403 1404 @property 1405 def _constructor(self): 1406 return GeoDataFrame 1407 1408 def __finalize__(self, other, method=None, **kwargs): 1409 """propagate metadata from other to self""" 1410 self = super().__finalize__(other, method=method, **kwargs) 1411 1412 # merge operation: using metadata of the left object 1413 if method == "merge": 1414 for name in self._metadata: 1415 object.__setattr__(self, name, getattr(other.left, name, None)) 1416 # concat operation: using metadata of the first object 1417 elif method == "concat": 1418 for name in self._metadata: 1419 object.__setattr__(self, name, getattr(other.objs[0], name, None)) 1420 1421 if (self.columns == self._geometry_column_name).sum() > 1: 1422 raise ValueError( 1423 "Concat operation has resulted in multiple columns using " 1424 f"the geometry column name '{self._geometry_column_name}'.\n" 1425 f"Please ensure this column from the first DataFrame is not " 1426 f"repeated." 1427 ) 1428 return self 1429 1430 def dissolve( 1431 self, 1432 by=None, 1433 aggfunc="first", 1434 as_index=True, 1435 level=None, 1436 sort=True, 1437 observed=False, 1438 dropna=True, 1439 ): 1440 """ 1441 Dissolve geometries within `groupby` into single observation. 1442 This is accomplished by applying the `unary_union` method 1443 to all geometries within a groupself. 1444 1445 Observations associated with each `groupby` group will be aggregated 1446 using the `aggfunc`. 1447 1448 Parameters 1449 ---------- 1450 by : string, default None 1451 Column whose values define groups to be dissolved. If None, 1452 whole GeoDataFrame is considered a single group. 1453 aggfunc : function or string, default "first" 1454 Aggregation function for manipulation of data associated 1455 with each group. Passed to pandas `groupby.agg` method. 1456 as_index : boolean, default True 1457 If true, groupby columns become index of result. 1458 level : int or str or sequence of int or sequence of str, default None 1459 If the axis is a MultiIndex (hierarchical), group by a 1460 particular level or levels. 1461 1462 .. versionadded:: 0.9.0 1463 sort : bool, default True 1464 Sort group keys. Get better performance by turning this off. 1465 Note this does not influence the order of observations within 1466 each group. Groupby preserves the order of rows within each group. 1467 1468 .. versionadded:: 0.9.0 1469 observed : bool, default False 1470 This only applies if any of the groupers are Categoricals. 1471 If True: only show observed values for categorical groupers. 1472 If False: show all values for categorical groupers. 1473 1474 .. versionadded:: 0.9.0 1475 dropna : bool, default True 1476 If True, and if group keys contain NA values, NA values 1477 together with row/column will be dropped. If False, NA 1478 values will also be treated as the key in groups. 1479 1480 This parameter is not supported for pandas < 1.1.0. 1481 A warning will be emitted for earlier pandas versions 1482 if a non-default value is given for this parameter. 1483 1484 .. versionadded:: 0.9.0 1485 1486 Returns 1487 ------- 1488 GeoDataFrame 1489 1490 Examples 1491 -------- 1492 >>> from shapely.geometry import Point 1493 >>> d = { 1494 ... "col1": ["name1", "name2", "name1"], 1495 ... "geometry": [Point(1, 2), Point(2, 1), Point(0, 1)], 1496 ... } 1497 >>> gdf = geopandas.GeoDataFrame(d, crs=4326) 1498 >>> gdf 1499 col1 geometry 1500 0 name1 POINT (1.00000 2.00000) 1501 1 name2 POINT (2.00000 1.00000) 1502 2 name1 POINT (0.00000 1.00000) 1503 1504 >>> dissolved = gdf.dissolve('col1') 1505 >>> dissolved # doctest: +SKIP 1506 geometry 1507 col1 1508 name1 MULTIPOINT (0.00000 1.00000, 1.00000 2.00000) 1509 name2 POINT (2.00000 1.00000) 1510 1511 See also 1512 -------- 1513 GeoDataFrame.explode : explode muti-part geometries into single geometries 1514 1515 """ 1516 1517 if by is None and level is None: 1518 by = np.zeros(len(self), dtype="int64") 1519 1520 groupby_kwargs = dict( 1521 by=by, level=level, sort=sort, observed=observed, dropna=dropna 1522 ) 1523 if not compat.PANDAS_GE_11: 1524 groupby_kwargs.pop("dropna") 1525 1526 if not dropna: # If they passed a non-default dropna value 1527 warnings.warn("dropna kwarg is not supported for pandas < 1.1.0") 1528 1529 # Process non-spatial component 1530 data = self.drop(labels=self.geometry.name, axis=1) 1531 aggregated_data = data.groupby(**groupby_kwargs).agg(aggfunc) 1532 1533 # Process spatial component 1534 def merge_geometries(block): 1535 merged_geom = block.unary_union 1536 return merged_geom 1537 1538 g = self.groupby(group_keys=False, **groupby_kwargs)[self.geometry.name].agg( 1539 merge_geometries 1540 ) 1541 1542 # Aggregate 1543 aggregated_geometry = GeoDataFrame(g, geometry=self.geometry.name, crs=self.crs) 1544 # Recombine 1545 aggregated = aggregated_geometry.join(aggregated_data) 1546 1547 # Reset if requested 1548 if not as_index: 1549 aggregated = aggregated.reset_index() 1550 1551 return aggregated 1552 1553 # overrides the pandas native explode method to break up features geometrically 1554 def explode(self, column=None, ignore_index=False, index_parts=None, **kwargs): 1555 """ 1556 Explode muti-part geometries into multiple single geometries. 1557 1558 Each row containing a multi-part geometry will be split into 1559 multiple rows with single geometries, thereby increasing the vertical 1560 size of the GeoDataFrame. 1561 1562 .. note:: ignore_index requires pandas 1.1.0 or newer. 1563 1564 Parameters 1565 ---------- 1566 column : string, default None 1567 Column to explode. In the case of a geometry column, multi-part 1568 geometries are converted to single-part. 1569 If None, the active geometry column is used. 1570 ignore_index : bool, default False 1571 If True, the resulting index will be labelled 0, 1, …, n - 1, 1572 ignoring `index_parts`. 1573 index_parts : boolean, default True 1574 If True, the resulting index will be a multi-index (original 1575 index with an additional level indicating the multiple 1576 geometries: a new zero-based index for each single part geometry 1577 per multi-part geometry). 1578 1579 Returns 1580 ------- 1581 GeoDataFrame 1582 Exploded geodataframe with each single geometry 1583 as a separate entry in the geodataframe. 1584 1585 Examples 1586 -------- 1587 1588 >>> from shapely.geometry import MultiPoint 1589 >>> d = { 1590 ... "col1": ["name1", "name2"], 1591 ... "geometry": [ 1592 ... MultiPoint([(1, 2), (3, 4)]), 1593 ... MultiPoint([(2, 1), (0, 0)]), 1594 ... ], 1595 ... } 1596 >>> gdf = geopandas.GeoDataFrame(d, crs=4326) 1597 >>> gdf 1598 col1 geometry 1599 0 name1 MULTIPOINT (1.00000 2.00000, 3.00000 4.00000) 1600 1 name2 MULTIPOINT (2.00000 1.00000, 0.00000 0.00000) 1601 1602 >>> exploded = gdf.explode(index_parts=True) 1603 >>> exploded 1604 col1 geometry 1605 0 0 name1 POINT (1.00000 2.00000) 1606 1 name1 POINT (3.00000 4.00000) 1607 1 0 name2 POINT (2.00000 1.00000) 1608 1 name2 POINT (0.00000 0.00000) 1609 1610 >>> exploded = gdf.explode(index_parts=False) 1611 >>> exploded 1612 col1 geometry 1613 0 name1 POINT (1.00000 2.00000) 1614 0 name1 POINT (3.00000 4.00000) 1615 1 name2 POINT (2.00000 1.00000) 1616 1 name2 POINT (0.00000 0.00000) 1617 1618 >>> exploded = gdf.explode(ignore_index=True) 1619 >>> exploded 1620 col1 geometry 1621 0 name1 POINT (1.00000 2.00000) 1622 1 name1 POINT (3.00000 4.00000) 1623 2 name2 POINT (2.00000 1.00000) 1624 3 name2 POINT (0.00000 0.00000) 1625 1626 See also 1627 -------- 1628 GeoDataFrame.dissolve : dissolve geometries into a single observation. 1629 1630 """ 1631 1632 # If no column is specified then default to the active geometry column 1633 if column is None: 1634 column = self.geometry.name 1635 # If the specified column is not a geometry dtype use pandas explode 1636 if not isinstance(self[column].dtype, GeometryDtype): 1637 if compat.PANDAS_GE_11: 1638 return super().explode(column, ignore_index=ignore_index, **kwargs) 1639 else: 1640 return super().explode(column, **kwargs) 1641 1642 if index_parts is None: 1643 if not ignore_index: 1644 warnings.warn( 1645 "Currently, index_parts defaults to True, but in the future, " 1646 "it will default to False to be consistent with Pandas. " 1647 "Use `index_parts=True` to keep the current behavior and " 1648 "True/False to silence the warning.", 1649 FutureWarning, 1650 stacklevel=2, 1651 ) 1652 index_parts = True 1653 1654 df_copy = self.copy() 1655 1656 level_str = f"level_{df_copy.index.nlevels}" 1657 1658 if level_str in df_copy.columns: # GH1393 1659 df_copy = df_copy.rename(columns={level_str: f"__{level_str}"}) 1660 1661 if index_parts: 1662 exploded_geom = df_copy.geometry.explode(index_parts=True) 1663 exploded_index = exploded_geom.index 1664 exploded_geom = exploded_geom.reset_index(level=-1, drop=True) 1665 else: 1666 exploded_geom = df_copy.geometry.explode(index_parts=True).reset_index( 1667 level=-1, drop=True 1668 ) 1669 exploded_index = exploded_geom.index 1670 1671 df = ( 1672 df_copy.drop(df_copy._geometry_column_name, axis=1) 1673 .join(exploded_geom) 1674 .__finalize__(self) 1675 ) 1676 1677 if ignore_index: 1678 df.reset_index(inplace=True, drop=True) 1679 elif index_parts: 1680 # reset to MultiIndex, otherwise df index is only first level of 1681 # exploded GeoSeries index. 1682 df.set_index(exploded_index, inplace=True) 1683 df.index.names = list(self.index.names) + [None] 1684 else: 1685 df.set_index(exploded_index, inplace=True) 1686 df.index.names = self.index.names 1687 1688 if f"__{level_str}" in df.columns: 1689 df = df.rename(columns={f"__{level_str}": level_str}) 1690 1691 geo_df = df.set_geometry(self._geometry_column_name) 1692 return geo_df 1693 1694 # overrides the pandas astype method to ensure the correct return type 1695 def astype(self, dtype, copy=True, errors="raise", **kwargs): 1696 """ 1697 Cast a pandas object to a specified dtype ``dtype``. 1698 1699 Returns a GeoDataFrame when the geometry column is kept as geometries, 1700 otherwise returns a pandas DataFrame. 1701 1702 See the pandas.DataFrame.astype docstring for more details. 1703 1704 Returns 1705 ------- 1706 GeoDataFrame or DataFrame 1707 """ 1708 df = super().astype(dtype, copy=copy, errors=errors, **kwargs) 1709 1710 try: 1711 geoms = df[self._geometry_column_name] 1712 if is_geometry_type(geoms): 1713 return geopandas.GeoDataFrame(df, geometry=self._geometry_column_name) 1714 except KeyError: 1715 pass 1716 # if the geometry column is converted to non-geometries or did not exist 1717 # do not return a GeoDataFrame 1718 return pd.DataFrame(df) 1719 1720 def convert_dtypes(self, *args, **kwargs): 1721 """ 1722 Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. 1723 1724 Always returns a GeoDataFrame as no conversions are applied to the 1725 geometry column. 1726 1727 See the pandas.DataFrame.convert_dtypes docstring for more details. 1728 1729 Returns 1730 ------- 1731 GeoDataFrame 1732 1733 """ 1734 # Overridden to fix GH1870, that return type is not preserved always 1735 # (and where it was, geometry col was not) 1736 1737 if not compat.PANDAS_GE_10: 1738 raise NotImplementedError( 1739 "GeoDataFrame.convert_dtypes requires pandas >= 1.0" 1740 ) 1741 1742 return GeoDataFrame( 1743 super().convert_dtypes(*args, **kwargs), 1744 geometry=self.geometry.name, 1745 crs=self.crs, 1746 ) 1747 1748 def to_postgis( 1749 self, 1750 name, 1751 con, 1752 schema=None, 1753 if_exists="fail", 1754 index=False, 1755 index_label=None, 1756 chunksize=None, 1757 dtype=None, 1758 ): 1759 """ 1760 Upload GeoDataFrame into PostGIS database. 1761 1762 This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL 1763 Python driver (e.g. psycopg2) to be installed. 1764 1765 Parameters 1766 ---------- 1767 name : str 1768 Name of the target table. 1769 con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine 1770 Active connection to the PostGIS database. 1771 if_exists : {'fail', 'replace', 'append'}, default 'fail' 1772 How to behave if the table already exists: 1773 1774 - fail: Raise a ValueError. 1775 - replace: Drop the table before inserting new values. 1776 - append: Insert new values to the existing table. 1777 schema : string, optional 1778 Specify the schema. If None, use default schema: 'public'. 1779 index : bool, default True 1780 Write DataFrame index as a column. 1781 Uses *index_label* as the column name in the table. 1782 index_label : string or sequence, default None 1783 Column label for index column(s). 1784 If None is given (default) and index is True, 1785 then the index names are used. 1786 chunksize : int, optional 1787 Rows will be written in batches of this size at a time. 1788 By default, all rows will be written at once. 1789 dtype : dict of column name to SQL type, default None 1790 Specifying the datatype for columns. 1791 The keys should be the column names and the values 1792 should be the SQLAlchemy types. 1793 1794 Examples 1795 -------- 1796 1797 >>> from sqlalchemy import create_engine 1798 >>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\ 1799/mydatabase") # doctest: +SKIP 1800 >>> gdf.to_postgis("my_table", engine) # doctest: +SKIP 1801 1802 See also 1803 -------- 1804 GeoDataFrame.to_file : write GeoDataFrame to file 1805 read_postgis : read PostGIS database to GeoDataFrame 1806 1807 """ 1808 geopandas.io.sql._write_postgis( 1809 self, name, con, schema, if_exists, index, index_label, chunksize, dtype 1810 ) 1811 1812 # 1813 # Implement standard operators for GeoSeries 1814 # 1815 1816 def __xor__(self, other): 1817 """Implement ^ operator as for builtin set type""" 1818 warnings.warn( 1819 "'^' operator will be deprecated. Use the 'symmetric_difference' " 1820 "method instead.", 1821 DeprecationWarning, 1822 stacklevel=2, 1823 ) 1824 return self.geometry.symmetric_difference(other) 1825 1826 def __or__(self, other): 1827 """Implement | operator as for builtin set type""" 1828 warnings.warn( 1829 "'|' operator will be deprecated. Use the 'union' method instead.", 1830 DeprecationWarning, 1831 stacklevel=2, 1832 ) 1833 return self.geometry.union(other) 1834 1835 def __and__(self, other): 1836 """Implement & operator as for builtin set type""" 1837 warnings.warn( 1838 "'&' operator will be deprecated. Use the 'intersection' method instead.", 1839 DeprecationWarning, 1840 stacklevel=2, 1841 ) 1842 return self.geometry.intersection(other) 1843 1844 def __sub__(self, other): 1845 """Implement - operator as for builtin set type""" 1846 warnings.warn( 1847 "'-' operator will be deprecated. Use the 'difference' method instead.", 1848 DeprecationWarning, 1849 stacklevel=2, 1850 ) 1851 return self.geometry.difference(other) 1852 1853 plot = CachedAccessor("plot", geopandas.plotting.GeoplotAccessor) 1854 1855 @doc(_explore) 1856 def explore(self, *args, **kwargs): 1857 """Interactive map based on folium/leaflet.js""" 1858 return _explore(self, *args, **kwargs) 1859 1860 def sjoin(self, df, *args, **kwargs): 1861 """Spatial join of two GeoDataFrames. 1862 1863 See the User Guide page :doc:`../../user_guide/mergingdata` for details. 1864 1865 Parameters 1866 ---------- 1867 df : GeoDataFrame 1868 how : string, default 'inner' 1869 The type of join: 1870 1871 * 'left': use keys from left_df; retain only left_df geometry column 1872 * 'right': use keys from right_df; retain only right_df geometry column 1873 * 'inner': use intersection of keys from both dfs; retain only 1874 left_df geometry column 1875 1876 predicate : string, default 'intersects' 1877 Binary predicate. Valid values are determined by the spatial index used. 1878 You can check the valid values in left_df or right_df as 1879 ``left_df.sindex.valid_query_predicates`` or 1880 ``right_df.sindex.valid_query_predicates`` 1881 lsuffix : string, default 'left' 1882 Suffix to apply to overlapping column names (left GeoDataFrame). 1883 rsuffix : string, default 'right' 1884 Suffix to apply to overlapping column names (right GeoDataFrame). 1885 1886 Examples 1887 -------- 1888 >>> countries = geopandas.read_file( \ 1889 geopandas.datasets.get_path("naturalearth_lowres")) 1890 >>> cities = geopandas.read_file( \ 1891 geopandas.datasets.get_path("naturalearth_cities")) 1892 >>> countries.head() # doctest: +SKIP 1893 pop_est continent name \ 1894 iso_a3 gdp_md_est geometry 1895 0 920938 Oceania Fiji FJI 8374.0 \ 1896 MULTIPOLYGON (((180.00000 -16.06713, 180.00000... 1897 1 53950935 Africa Tanzania TZA 150600.0 \ 1898 POLYGON ((33.90371 -0.95000, 34.07262 -1.05982... 1899 2 603253 Africa W. Sahara ESH 906.5 \ 1900 POLYGON ((-8.66559 27.65643, -8.66512 27.58948... 1901 3 35623680 North America Canada CAN 1674000.0 \ 1902 MULTIPOLYGON (((-122.84000 49.00000, -122.9742... 1903 4 326625791 North America United States of America USA 18560000.0 \ 1904 MULTIPOLYGON (((-122.84000 49.00000, -120.0000... 1905 >>> cities.head() 1906 name geometry 1907 0 Vatican City POINT (12.45339 41.90328) 1908 1 San Marino POINT (12.44177 43.93610) 1909 2 Vaduz POINT (9.51667 47.13372) 1910 3 Luxembourg POINT (6.13000 49.61166) 1911 4 Palikir POINT (158.14997 6.91664) 1912 1913 >>> cities_w_country_data = cities.sjoin(countries) 1914 >>> cities_w_country_data.head() # doctest: +SKIP 1915 name_left geometry index_right pop_est \ 1916 continent name_right iso_a3 gdp_md_est 1917 0 Vatican City POINT (12.45339 41.90328) 141 62137802 \ 1918 Europe Italy ITA 2221000.0 1919 1 San Marino POINT (12.44177 43.93610) 141 62137802 \ 1920 Europe Italy ITA 2221000.0 1921 192 Rome POINT (12.48131 41.89790) 141 62137802 \ 1922 Europe Italy ITA 2221000.0 1923 2 Vaduz POINT (9.51667 47.13372) 114 8754413 \ 1924 Europe Au stria AUT 416600.0 1925 184 Vienna POINT (16.36469 48.20196) 114 8754413 \ 1926 Europe Austria AUT 416600.0 1927 1928 Notes 1929 ------ 1930 Every operation in GeoPandas is planar, i.e. the potential third 1931 dimension is not taken into account. 1932 1933 See also 1934 -------- 1935 GeoDataFrame.sjoin_nearest : nearest neighbor join 1936 sjoin : equivalent top-level function 1937 """ 1938 return geopandas.sjoin(left_df=self, right_df=df, *args, **kwargs) 1939 1940 def sjoin_nearest( 1941 self, 1942 right, 1943 how="inner", 1944 max_distance=None, 1945 lsuffix="left", 1946 rsuffix="right", 1947 distance_col=None, 1948 ): 1949 """ 1950 Spatial join of two GeoDataFrames based on the distance between their 1951 geometries. 1952 1953 Results will include multiple output records for a single input record 1954 where there are multiple equidistant nearest or intersected neighbors. 1955 1956 See the User Guide page 1957 https://geopandas.readthedocs.io/en/latest/docs/user_guide/mergingdata.html 1958 for more details. 1959 1960 1961 Parameters 1962 ---------- 1963 right : GeoDataFrame 1964 how : string, default 'inner' 1965 The type of join: 1966 1967 * 'left': use keys from left_df; retain only left_df geometry column 1968 * 'right': use keys from right_df; retain only right_df geometry column 1969 * 'inner': use intersection of keys from both dfs; retain only 1970 left_df geometry column 1971 1972 max_distance : float, default None 1973 Maximum distance within which to query for nearest geometry. 1974 Must be greater than 0. 1975 The max_distance used to search for nearest items in the tree may have a 1976 significant impact on performance by reducing the number of input 1977 geometries that are evaluated for nearest items in the tree. 1978 lsuffix : string, default 'left' 1979 Suffix to apply to overlapping column names (left GeoDataFrame). 1980 rsuffix : string, default 'right' 1981 Suffix to apply to overlapping column names (right GeoDataFrame). 1982 distance_col : string, default None 1983 If set, save the distances computed between matching geometries under a 1984 column of this name in the joined GeoDataFrame. 1985 1986 Examples 1987 -------- 1988 >>> countries = geopandas.read_file(geopandas.datasets.get_\ 1989path("naturalearth_lowres")) 1990 >>> cities = geopandas.read_file(geopandas.datasets.get_path("naturalearth_citi\ 1991es")) 1992 >>> countries.head(2).name # doctest: +SKIP 1993 pop_est continent name \ 1994 iso_a3 gdp_md_est geometry 1995 0 920938 Oceania Fiji FJI 8374.0 MULTI\ 1996 POLYGON (((180.00000 -16.06713, 180.00000... 1997 1 53950935 Africa Tanzania TZA 150600.0 POLYG\ 1998 ON ((33.90371 -0.95000, 34.07262 -1.05982... 1999 >>> cities.head(2).name # doctest: +SKIP 2000 name geometry 2001 0 Vatican City POINT (12.45339 41.90328) 2002 1 San Marino POINT (12.44177 43.93610) 2003 2004 >>> cities_w_country_data = cities.sjoin_nearest(countries) 2005 >>> cities_w_country_data[['name_left', 'name_right']].head(2) # doctest: +SKIP 2006 name_left geometry index_right pop_est continent n\ 2007 ame_right iso_a3 gdp_md_est 2008 0 Vatican City POINT (12.45339 41.90328) 141 62137802 Europe \ 2009 Italy ITA 2221000.0 2010 1 San Marino POINT (12.44177 43.93610) 141 62137802 Europe \ 2011 Italy ITA 2221000.0 2012 2013 To include the distances: 2014 2015 >>> cities_w_country_data = cities.sjoin_nearest(countries, \ 2016distance_col="distances") 2017 >>> cities_w_country_data[["name_left", "name_right", \ 2018"distances"]].head(2) # doctest: +SKIP 2019 name_left name_right distances 2020 0 Vatican City Italy 0.0 2021 1 San Marino Italy 0.0 2022 2023 In the following example, we get multiple cities for Italy because all results 2024 are equidistant (in this case zero because they intersect). 2025 In fact, we get 3 results in total: 2026 2027 >>> countries_w_city_data = cities.sjoin_nearest(countries, \ 2028distance_col="distances", how="right") 2029 >>> italy_results = \ 2030countries_w_city_data[countries_w_city_data["name_left"] == "Italy"] 2031 >>> italy_results # doctest: +SKIP 2032 name_x name_y 2033 141 Vatican City Italy 2034 141 San Marino Italy 2035 141 Rome Italy 2036 2037 See also 2038 -------- 2039 GeoDataFrame.sjoin : binary predicate joins 2040 sjoin_nearest : equivalent top-level function 2041 2042 Notes 2043 ----- 2044 Since this join relies on distances, results will be innaccurate 2045 if your geometries are in a geographic CRS. 2046 2047 Every operation in GeoPandas is planar, i.e. the potential third 2048 dimension is not taken into account. 2049 """ 2050 return geopandas.sjoin_nearest( 2051 self, 2052 right, 2053 how=how, 2054 max_distance=max_distance, 2055 lsuffix=lsuffix, 2056 rsuffix=rsuffix, 2057 distance_col=distance_col, 2058 ) 2059 2060 def clip(self, mask, keep_geom_type=False): 2061 """Clip points, lines, or polygon geometries to the mask extent. 2062 2063 Both layers must be in the same Coordinate Reference System (CRS). 2064 The GeoDataFrame will be clipped to the full extent of the `mask` object. 2065 2066 If there are multiple polygons in mask, data from the GeoDataFrame will be 2067 clipped to the total boundary of all polygons in mask. 2068 2069 Parameters 2070 ---------- 2071 mask : GeoDataFrame, GeoSeries, (Multi)Polygon 2072 Polygon vector layer used to clip `gdf`. 2073 The mask's geometry is dissolved into one geometric feature 2074 and intersected with `gdf`. 2075 keep_geom_type : boolean, default False 2076 If True, return only geometries of original type in case of intersection 2077 resulting in multiple geometry types or GeometryCollections. 2078 If False, return all resulting geometries (potentially mixed types). 2079 2080 Returns 2081 ------- 2082 GeoDataFrame 2083 Vector data (points, lines, polygons) from `gdf` clipped to 2084 polygon boundary from mask. 2085 2086 See also 2087 -------- 2088 clip : equivalent top-level function 2089 2090 Examples 2091 -------- 2092 Clip points (global cities) with a polygon (the South American continent): 2093 2094 >>> world = geopandas.read_file( 2095 ... geopandas.datasets.get_path('naturalearth_lowres')) 2096 >>> south_america = world[world['continent'] == "South America"] 2097 >>> capitals = geopandas.read_file( 2098 ... geopandas.datasets.get_path('naturalearth_cities')) 2099 >>> capitals.shape 2100 (202, 2) 2101 2102 >>> sa_capitals = capitals.clip(south_america) 2103 >>> sa_capitals.shape 2104 (12, 2) 2105 """ 2106 return geopandas.clip(self, mask=mask, keep_geom_type=keep_geom_type) 2107 2108 def overlay(self, right, how="intersection", keep_geom_type=None, make_valid=True): 2109 """Perform spatial overlay between GeoDataFrames. 2110 2111 Currently only supports data GeoDataFrames with uniform geometry types, 2112 i.e. containing only (Multi)Polygons, or only (Multi)Points, or a 2113 combination of (Multi)LineString and LinearRing shapes. 2114 Implements several methods that are all effectively subsets of the union. 2115 2116 See the User Guide page :doc:`../../user_guide/set_operations` for details. 2117 2118 Parameters 2119 ---------- 2120 right : GeoDataFrame 2121 how : string 2122 Method of spatial overlay: 'intersection', 'union', 2123 'identity', 'symmetric_difference' or 'difference'. 2124 keep_geom_type : bool 2125 If True, return only geometries of the same geometry type the GeoDataFrame 2126 has, if False, return all resulting geometries. Default is None, 2127 which will set keep_geom_type to True but warn upon dropping 2128 geometries. 2129 make_valid : bool, default True 2130 If True, any invalid input geometries are corrected with a call to 2131 `buffer(0)`, if False, a `ValueError` is raised if any input geometries 2132 are invalid. 2133 2134 Returns 2135 ------- 2136 df : GeoDataFrame 2137 GeoDataFrame with new set of polygons and attributes 2138 resulting from the overlay 2139 2140 Examples 2141 -------- 2142 >>> from shapely.geometry import Polygon 2143 >>> polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]), 2144 ... Polygon([(2,2), (4,2), (4,4), (2,4)])]) 2145 >>> polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]), 2146 ... Polygon([(3,3), (5,3), (5,5), (3,5)])]) 2147 >>> df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1_data':[1,2]}) 2148 >>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]}) 2149 2150 >>> df1.overlay(df2, how='union') 2151 df1_data df2_data geometry 2152 0 1.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1.... 2153 1 2.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3.... 2154 2 2.0 2.0 POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3.... 2155 3 1.0 NaN POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0.... 2156 4 2.0 NaN MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000... 2157 5 NaN 1.0 MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000... 2158 6 NaN 2.0 POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5.... 2159 2160 >>> df1.overlay(df2, how='intersection') 2161 df1_data df2_data geometry 2162 0 1 1 POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1.... 2163 1 2 1 POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3.... 2164 2 2 2 POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3.... 2165 2166 >>> df1.overlay(df2, how='symmetric_difference') 2167 df1_data df2_data geometry 2168 0 1.0 NaN POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0.... 2169 1 2.0 NaN MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000... 2170 2 NaN 1.0 MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000... 2171 3 NaN 2.0 POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5.... 2172 2173 >>> df1.overlay(df2, how='difference') 2174 geometry df1_data 2175 0 POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0.... 1 2176 1 MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000... 2 2177 2178 >>> df1.overlay(df2, how='identity') 2179 df1_data df2_data geometry 2180 0 1.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1.... 2181 1 2.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3.... 2182 2 2.0 2.0 POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3.... 2183 3 1.0 NaN POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0.... 2184 4 2.0 NaN MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000... 2185 2186 See also 2187 -------- 2188 GeoDataFrame.sjoin : spatial join 2189 overlay : equivalent top-level function 2190 2191 Notes 2192 ------ 2193 Every operation in GeoPandas is planar, i.e. the potential third 2194 dimension is not taken into account. 2195 """ 2196 return geopandas.overlay( 2197 self, right, how=how, keep_geom_type=keep_geom_type, make_valid=make_valid 2198 ) 2199 2200 2201def _dataframe_set_geometry(self, col, drop=False, inplace=False, crs=None): 2202 if inplace: 2203 raise ValueError( 2204 "Can't do inplace setting when converting from DataFrame to GeoDataFrame" 2205 ) 2206 gf = GeoDataFrame(self) 2207 # this will copy so that BlockManager gets copied 2208 return gf.set_geometry(col, drop=drop, inplace=False, crs=crs) 2209 2210 2211DataFrame.set_geometry = _dataframe_set_geometry 2212