1"""Functions for working with features in a raster dataset.""" 2 3 4import logging 5import math 6import os 7import warnings 8 9import numpy as np 10 11import rasterio 12from rasterio.dtypes import validate_dtype, can_cast_dtype, get_minimum_dtype, _getnpdtype 13from rasterio.enums import MergeAlg 14from rasterio.env import ensure_env 15from rasterio.errors import ShapeSkipWarning 16from rasterio._features import _shapes, _sieve, _rasterize, _bounds 17from rasterio import warp 18from rasterio.rio.helpers import coords 19from rasterio.transform import Affine 20from rasterio.transform import IDENTITY, guard_transform, rowcol 21from rasterio.windows import Window 22 23log = logging.getLogger(__name__) 24 25 26@ensure_env 27def geometry_mask( 28 geometries, 29 out_shape, 30 transform, 31 all_touched=False, 32 invert=False): 33 """Create a mask from shapes. 34 35 By default, mask is intended for use as a 36 numpy mask, where pixels that overlap shapes are False. 37 38 Parameters 39 ---------- 40 geometries : iterable over geometries (GeoJSON-like objects) 41 out_shape : tuple or list 42 Shape of output numpy ndarray. 43 transform : Affine transformation object 44 Transformation from pixel coordinates of `source` to the 45 coordinate system of the input `shapes`. See the `transform` 46 property of dataset objects. 47 all_touched : boolean, optional 48 If True, all pixels touched by geometries will be burned in. If 49 false, only pixels whose center is within the polygon or that 50 are selected by Bresenham's line algorithm will be burned in. 51 invert: boolean, optional 52 If True, mask will be True for pixels that overlap shapes. 53 False by default. 54 55 Returns 56 ------- 57 numpy ndarray of type 'bool' 58 Result 59 60 Notes 61 ----- 62 See rasterize() for performance notes. 63 64 """ 65 fill, mask_value = (0, 1) if invert else (1, 0) 66 67 return rasterize( 68 geometries, 69 out_shape=out_shape, 70 transform=transform, 71 all_touched=all_touched, 72 fill=fill, 73 default_value=mask_value).astype('bool') 74 75 76@ensure_env 77def shapes(source, mask=None, connectivity=4, transform=IDENTITY): 78 """Get shapes and values of connected regions in a dataset or array. 79 80 Parameters 81 ---------- 82 source : array, dataset object, Band, or tuple(dataset, bidx) 83 Data type must be one of rasterio.int16, rasterio.int32, 84 rasterio.uint8, rasterio.uint16, or rasterio.float32. 85 mask : numpy ndarray or rasterio Band object, optional 86 Must evaluate to bool (rasterio.bool_ or rasterio.uint8). Values 87 of False or 0 will be excluded from feature generation. Note 88 well that this is the inverse sense from Numpy's, where a mask 89 value of True indicates invalid data in an array. If `source` is 90 a Numpy masked array and `mask` is None, the source's mask will 91 be inverted and used in place of `mask`. 92 connectivity : int, optional 93 Use 4 or 8 pixel connectivity for grouping pixels into features 94 transform : Affine transformation, optional 95 If not provided, feature coordinates will be generated based on 96 pixel coordinates 97 98 Yields 99 ------- 100 polygon, value 101 A pair of (polygon, value) for each feature found in the image. 102 Polygons are GeoJSON-like dicts and the values are the 103 associated value from the image, in the data type of the image. 104 Note: due to floating point precision issues, values returned 105 from a floating point image may not exactly match the original 106 values. 107 108 Notes 109 ----- 110 The amount of memory used by this algorithm is proportional to the 111 number and complexity of polygons produced. This algorithm is most 112 appropriate for simple thematic data. Data with high pixel-to-pixel 113 variability, such as imagery, may produce one polygon per pixel and 114 consume large amounts of memory. 115 116 Because the low-level implementation uses either an int32 or float32 117 buffer, uint32 and float64 data cannot be operated on without 118 truncation issues. 119 120 """ 121 if hasattr(source, 'mask') and mask is None: 122 mask = ~source.mask 123 source = source.data 124 125 transform = guard_transform(transform) 126 for s, v in _shapes(source, mask, connectivity, transform): 127 yield s, v 128 129 130@ensure_env 131def sieve(source, size, out=None, mask=None, connectivity=4): 132 """Replace small polygons in `source` with value of their largest neighbor. 133 134 Polygons are found for each set of neighboring pixels of the same value. 135 136 Parameters 137 ---------- 138 source : array or dataset object opened in 'r' mode or Band or tuple(dataset, bidx) 139 Must be of type rasterio.int16, rasterio.int32, rasterio.uint8, 140 rasterio.uint16, or rasterio.float32 141 size : int 142 minimum polygon size (number of pixels) to retain. 143 out : numpy ndarray, optional 144 Array of same shape and data type as `source` in which to store results. 145 mask : numpy ndarray or rasterio Band object, optional 146 Values of False or 0 will be excluded from feature generation 147 Must evaluate to bool (rasterio.bool_ or rasterio.uint8) 148 connectivity : int, optional 149 Use 4 or 8 pixel connectivity for grouping pixels into features 150 151 Returns 152 ------- 153 out : numpy ndarray 154 Result 155 156 Notes 157 ----- 158 GDAL only supports values that can be cast to 32-bit integers for this 159 operation. 160 161 The amount of memory used by this algorithm is proportional to the number 162 and complexity of polygons found in the image. This algorithm is most 163 appropriate for simple thematic data. Data with high pixel-to-pixel 164 variability, such as imagery, may produce one polygon per pixel and consume 165 large amounts of memory. 166 167 """ 168 169 if out is None: 170 out = np.zeros(source.shape, source.dtype) 171 _sieve(source, size, out, mask, connectivity) 172 return out 173 174 175@ensure_env 176def rasterize( 177 shapes, 178 out_shape=None, 179 fill=0, 180 out=None, 181 transform=IDENTITY, 182 all_touched=False, 183 merge_alg=MergeAlg.replace, 184 default_value=1, 185 dtype=None): 186 """Return an image array with input geometries burned in. 187 188 Warnings will be raised for any invalid or empty geometries, and 189 an exception will be raised if there are no valid shapes 190 to rasterize. 191 192 Parameters 193 ---------- 194 shapes : iterable of (`geometry`, `value`) pairs or iterable over 195 geometries. The `geometry` can either be an object that 196 implements the geo interface or GeoJSON-like object. If no 197 `value` is provided the `default_value` will be used. If `value` 198 is `None` the `fill` value will be used. 199 out_shape : tuple or list with 2 integers 200 Shape of output numpy ndarray. 201 fill : int or float, optional 202 Used as fill value for all areas not covered by input 203 geometries. 204 out : numpy ndarray, optional 205 Array of same shape and data type as `source` in which to store 206 results. 207 transform : Affine transformation object, optional 208 Transformation from pixel coordinates of `source` to the 209 coordinate system of the input `shapes`. See the `transform` 210 property of dataset objects. 211 all_touched : boolean, optional 212 If True, all pixels touched by geometries will be burned in. If 213 false, only pixels whose center is within the polygon or that 214 are selected by Bresenham's line algorithm will be burned in. 215 merge_alg : MergeAlg, optional 216 Merge algorithm to use. One of: 217 MergeAlg.replace (default): 218 the new value will overwrite the existing value. 219 MergeAlg.add: 220 the new value will be added to the existing raster. 221 default_value : int or float, optional 222 Used as value for all geometries, if not provided in `shapes`. 223 dtype : rasterio or numpy data type, optional 224 Used as data type for results, if `out` is not provided. 225 226 Returns 227 ------- 228 numpy ndarray 229 If `out` was not None then `out` is returned, it will have been 230 modified in-place. If `out` was None, this will be a new array. 231 232 Notes 233 ----- 234 Valid data types for `fill`, `default_value`, `out`, `dtype` and 235 shape values are "int16", "int32", "uint8", "uint16", "uint32", 236 "float32", and "float64". 237 238 This function requires significant memory resources. The shapes 239 iterator will be materialized to a Python list and another C copy of 240 that list will be made. The `out` array will be copied and 241 additional temporary raster memory equal to 2x the smaller of `out` 242 data or GDAL's max cache size (controlled by GDAL_CACHEMAX, default 243 is 5% of the computer's physical memory) is required. 244 245 If GDAL max cache size is smaller than the output data, the array of 246 shapes will be iterated multiple times. Performance is thus a linear 247 function of buffer size. For maximum speed, ensure that 248 GDAL_CACHEMAX is larger than the size of `out` or `out_shape`. 249 250 """ 251 valid_dtypes = ( 252 'int16', 'int32', 'uint8', 'uint16', 'uint32', 'float32', 'float64' 253 ) 254 255 def format_invalid_dtype(param): 256 return '{0} dtype must be one of: {1}'.format( 257 param, ', '.join(valid_dtypes) 258 ) 259 260 def format_cast_error(param, dtype): 261 return '{0} cannot be cast to specified dtype: {1}'.format(param, dtype) 262 263 if fill != 0: 264 fill_array = np.array([fill]) 265 if not validate_dtype(fill_array, valid_dtypes): 266 raise ValueError(format_invalid_dtype('fill')) 267 268 if dtype is not None and not can_cast_dtype(fill_array, dtype): 269 raise ValueError(format_cast_error('fill', dtype)) 270 271 if default_value != 1: 272 default_value_array = np.array([default_value]) 273 if not validate_dtype(default_value_array, valid_dtypes): 274 raise ValueError(format_invalid_dtype('default_value')) 275 276 if dtype is not None and not can_cast_dtype(default_value_array, dtype): 277 raise ValueError(format_cast_error('default_vaue', dtype)) 278 279 if dtype is not None and _getnpdtype(dtype).name not in valid_dtypes: 280 raise ValueError(format_invalid_dtype('dtype')) 281 282 valid_shapes = [] 283 shape_values = [] 284 for index, item in enumerate(shapes): 285 if isinstance(item, (tuple, list)): 286 geom, value = item 287 if value is None: 288 value = fill 289 else: 290 geom = item 291 value = default_value 292 geom = getattr(geom, '__geo_interface__', None) or geom 293 294 if is_valid_geom(geom): 295 shape_values.append(value) 296 297 geom_type = geom['type'] 298 299 if geom_type == 'GeometryCollection': 300 # GeometryCollections need to be handled as individual parts to 301 # avoid holes in output: 302 # https://github.com/mapbox/rasterio/issues/1253. 303 # Only 1-level deep since GeoJSON spec discourages nested 304 # GeometryCollections 305 for part in geom['geometries']: 306 valid_shapes.append((part, value)) 307 308 elif geom_type == 'MultiPolygon': 309 # Same issue as above 310 for poly in geom['coordinates']: 311 valid_shapes.append(({'type': 'Polygon', 'coordinates': poly}, value)) 312 313 else: 314 valid_shapes.append((geom, value)) 315 316 else: 317 # invalid or empty geometries are skipped and raise a warning instead 318 warnings.warn('Invalid or empty shape {} at index {} will not be rasterized.'.format(geom, index), ShapeSkipWarning) 319 320 if not valid_shapes: 321 raise ValueError('No valid geometry objects found for rasterize') 322 323 shape_values = np.array(shape_values) 324 325 if not validate_dtype(shape_values, valid_dtypes): 326 raise ValueError(format_invalid_dtype('shape values')) 327 328 if dtype is None: 329 dtype = get_minimum_dtype(np.append(shape_values, fill)) 330 331 elif not can_cast_dtype(shape_values, dtype): 332 raise ValueError(format_cast_error('shape values', dtype)) 333 334 if out is not None: 335 if _getnpdtype(out.dtype).name not in valid_dtypes: 336 raise ValueError(format_invalid_dtype('out')) 337 338 if not can_cast_dtype(shape_values, out.dtype): 339 raise ValueError(format_cast_error('shape values', out.dtype.name)) 340 341 elif out_shape is not None: 342 343 if len(out_shape) != 2: 344 raise ValueError('Invalid out_shape, must be 2D') 345 346 out = np.empty(out_shape, dtype=dtype) 347 out.fill(fill) 348 349 else: 350 raise ValueError('Either an out_shape or image must be provided') 351 352 if min(out.shape) == 0: 353 raise ValueError("width and height must be > 0") 354 355 transform = guard_transform(transform) 356 _rasterize(valid_shapes, out, transform, all_touched, merge_alg) 357 return out 358 359 360def bounds(geometry, north_up=True, transform=None): 361 """Return a (left, bottom, right, top) bounding box. 362 363 From Fiona 1.4.8. Modified to return bbox from geometry if available. 364 365 Parameters 366 ---------- 367 geometry: GeoJSON-like feature (implements __geo_interface__), 368 feature collection, or geometry. 369 370 Returns 371 ------- 372 tuple 373 Bounding box: (left, bottom, right, top) 374 """ 375 376 geometry = getattr(geometry, '__geo_interface__', None) or geometry 377 378 if 'bbox' in geometry: 379 return tuple(geometry['bbox']) 380 381 geom = geometry.get('geometry') or geometry 382 383 # geometry must be a geometry, GeometryCollection, or FeatureCollection 384 if not ('coordinates' in geom or 'geometries' in geom or 'features' in geom): 385 raise ValueError( 386 "geometry must be a GeoJSON-like geometry, GeometryCollection, " 387 "or FeatureCollection" 388 ) 389 390 return _bounds(geom, north_up=north_up, transform=transform) 391 392 393def geometry_window( 394 dataset, 395 shapes, 396 pad_x=0, 397 pad_y=0, 398 north_up=None, 399 rotated=None, 400 pixel_precision=None, 401 boundless=False, 402): 403 """Calculate the window within the raster that fits the bounds of 404 the geometry plus optional padding. The window is the outermost 405 pixel indices that contain the geometry (floor of offsets, ceiling 406 of width and height). 407 408 If shapes do not overlap raster, a WindowError is raised. 409 410 Parameters 411 ---------- 412 dataset : dataset object opened in 'r' mode 413 Raster for which the mask will be created. 414 shapes : iterable over geometries. 415 A geometry is a GeoJSON-like object or implements the geo 416 interface. Must be in same coordinate system as dataset. 417 pad_x : float 418 Amount of padding (as fraction of raster's x pixel size) to add 419 to left and right side of bounds. 420 pad_y : float 421 Amount of padding (as fraction of raster's y pixel size) to add 422 to top and bottom of bounds. 423 north_up : optional 424 This parameter is ignored since version 1.2.1. A deprecation 425 warning will be emitted in 1.3.0. 426 rotated : optional 427 This parameter is ignored since version 1.2.1. A deprecation 428 warning will be emitted in 1.3.0. 429 pixel_precision : int or float, optional 430 Number of places of rounding precision or absolute precision for 431 evaluating bounds of shapes. 432 boundless : bool, optional 433 Whether to allow a boundless window or not. 434 435 Returns 436 ------- 437 rasterio.windows.Window 438 439 """ 440 441 all_bounds = [bounds(shape, transform=~dataset.transform) for shape in shapes] 442 443 cols = [ 444 x 445 for (left, bottom, right, top) in all_bounds 446 for x in (left - pad_x, right + pad_x, right + pad_x, left - pad_x) 447 ] 448 rows = [ 449 y 450 for (left, bottom, right, top) in all_bounds 451 for y in (top - pad_y, top - pad_y, bottom + pad_y, bottom + pad_y) 452 ] 453 454 row_start, row_stop = int(math.floor(min(rows))), int(math.ceil(max(rows))) 455 col_start, col_stop = int(math.floor(min(cols))), int(math.ceil(max(cols))) 456 457 window = Window( 458 col_off=col_start, 459 row_off=row_start, 460 width=max(col_stop - col_start, 0.0), 461 height=max(row_stop - row_start, 0.0), 462 ) 463 464 # Make sure that window overlaps raster 465 raster_window = Window(0, 0, dataset.width, dataset.height) 466 if not boundless: 467 window = window.intersection(raster_window) 468 469 return window 470 471 472def is_valid_geom(geom): 473 """ 474 Checks to see if geometry is a valid GeoJSON geometry type or 475 GeometryCollection. Geometry must be GeoJSON or implement the geo 476 interface. 477 478 Geometries must be non-empty, and have at least x, y coordinates. 479 480 Note: only the first coordinate is checked for validity. 481 482 Parameters 483 ---------- 484 geom: an object that implements the geo interface or GeoJSON-like object 485 486 Returns 487 ------- 488 bool: True if object is a valid GeoJSON geometry type 489 """ 490 491 geom_types = {'Point', 'MultiPoint', 'LineString', 'LinearRing', 492 'MultiLineString', 'Polygon', 'MultiPolygon'} 493 494 geom = getattr(geom, '__geo_interface__', None) or geom 495 496 try: 497 geom_type = geom["type"] 498 if geom_type not in geom_types.union({'GeometryCollection'}): 499 return False 500 501 except (KeyError, TypeError): 502 return False 503 504 if geom_type in geom_types: 505 if 'coordinates' not in geom: 506 return False 507 508 coords = geom['coordinates'] 509 510 if geom_type == 'Point': 511 # Points must have at least x, y 512 return len(coords) >= 2 513 514 if geom_type == 'MultiPoint': 515 # Multi points must have at least one point with at least x, y 516 return len(coords) > 0 and len(coords[0]) >= 2 517 518 if geom_type == 'LineString': 519 # Lines must have at least 2 coordinates and at least x, y for 520 # a coordinate 521 return len(coords) >= 2 and len(coords[0]) >= 2 522 523 if geom_type == 'LinearRing': 524 # Rings must have at least 4 coordinates and at least x, y for 525 # a coordinate 526 return len(coords) >= 4 and len(coords[0]) >= 2 527 528 if geom_type == 'MultiLineString': 529 # Multi lines must have at least one LineString 530 return (len(coords) > 0 and len(coords[0]) >= 2 and 531 len(coords[0][0]) >= 2) 532 533 if geom_type == 'Polygon': 534 # Polygons must have at least 1 ring, with at least 4 coordinates, 535 # with at least x, y for a coordinate 536 return (len(coords) > 0 and len(coords[0]) >= 4 and 537 len(coords[0][0]) >= 2) 538 539 if geom_type == 'MultiPolygon': 540 # Muti polygons must have at least one Polygon 541 return (len(coords) > 0 and len(coords[0]) > 0 and 542 len(coords[0][0]) >= 4 and len(coords[0][0][0]) >= 2) 543 544 if geom_type == 'GeometryCollection': 545 if 'geometries' not in geom: 546 return False 547 548 if not len(geom['geometries']) > 0: 549 # While technically valid according to GeoJSON spec, an empty 550 # GeometryCollection will cause issues if used in rasterio 551 return False 552 553 for g in geom['geometries']: 554 if not is_valid_geom(g): 555 return False # short-circuit and fail early 556 557 return True 558 559 560def dataset_features( 561 src, 562 bidx=None, 563 sampling=1, 564 band=True, 565 as_mask=False, 566 with_nodata=False, 567 geographic=True, 568 precision=-1): 569 """Yield GeoJSON features for the dataset 570 571 The geometries are polygons bounding contiguous regions of the same raster value. 572 573 Parameters 574 ---------- 575 src: Rasterio Dataset 576 577 bidx: int 578 band index 579 580 sampling: int (DEFAULT: 1) 581 Inverse of the sampling fraction; a value of 10 decimates 582 583 band: boolean (DEFAULT: True) 584 extract features from a band (True) or a mask (False) 585 586 as_mask: boolean (DEFAULT: False) 587 Interpret band as a mask and output only one class of valid data shapes? 588 589 with_nodata: boolean (DEFAULT: False) 590 Include nodata regions? 591 592 geographic: str (DEFAULT: True) 593 Output shapes in EPSG:4326? Otherwise use the native CRS. 594 595 precision: int (DEFAULT: -1) 596 Decimal precision of coordinates. -1 for full float precision output 597 598 Yields 599 ------ 600 GeoJSON-like Feature dictionaries for shapes found in the given band 601 """ 602 if bidx is not None and bidx > src.count: 603 raise ValueError('bidx is out of range for raster') 604 605 img = None 606 msk = None 607 608 # Adjust transforms. 609 transform = src.transform 610 if sampling > 1: 611 # Determine the target shape (to decimate) 612 shape = (int(math.ceil(src.height / sampling)), 613 int(math.ceil(src.width / sampling))) 614 615 # Calculate independent sampling factors 616 x_sampling = src.width / shape[1] 617 y_sampling = src.height / shape[0] 618 619 # Decimation of the raster produces a georeferencing 620 # shift that we correct with a translation. 621 transform *= Affine.translation( 622 src.width % x_sampling, src.height % y_sampling) 623 624 # And follow by scaling. 625 transform *= Affine.scale(x_sampling, y_sampling) 626 627 # Most of the time, we'll use the valid data mask. 628 # We skip reading it if we're extracting every possible 629 # feature (even invalid data features) from a band. 630 if not band or (band and not as_mask and not with_nodata): 631 if sampling == 1: 632 msk = src.read_masks(bidx) 633 else: 634 msk_shape = shape 635 if bidx is None: 636 msk = np.zeros( 637 (src.count,) + msk_shape, 'uint8') 638 else: 639 msk = np.zeros(msk_shape, 'uint8') 640 msk = src.read_masks(bidx, msk) 641 642 if bidx is None: 643 msk = np.logical_or.reduce(msk).astype('uint8') 644 645 # Possibly overridden below. 646 img = msk 647 648 # Read the band data unless the --mask option is given. 649 if band: 650 if sampling == 1: 651 img = src.read(bidx, masked=False) 652 else: 653 img = np.zeros( 654 shape, 655 dtype=src.dtypes[src.indexes.index(bidx)]) 656 img = src.read(bidx, img, masked=False) 657 658 # If as_mask option was given, convert the image 659 # to a binary image. This reduces the number of shape 660 # categories to 2 and likely reduces the number of 661 # shapes. 662 if as_mask: 663 tmp = np.ones_like(img, 'uint8') * 255 664 tmp[img == 0] = 0 665 img = tmp 666 if not with_nodata: 667 msk = tmp 668 669 # Prepare keyword arguments for shapes(). 670 kwargs = {'transform': transform} 671 if not with_nodata: 672 kwargs['mask'] = msk 673 674 src_basename = os.path.basename(src.name) 675 676 # Yield GeoJSON features. 677 for i, (g, val) in enumerate( 678 rasterio.features.shapes(img, **kwargs)): 679 if geographic: 680 g = warp.transform_geom( 681 src.crs, 'EPSG:4326', g, 682 antimeridian_cutting=True, precision=precision) 683 xs, ys = zip(*coords(g)) 684 yield { 685 'type': 'Feature', 686 'id': "{0}:{1}".format(src_basename, i), 687 'properties': { 688 'val': val, 689 'filename': src_basename 690 }, 691 'bbox': [min(xs), min(ys), max(xs), max(ys)], 692 'geometry': g 693 } 694