1"""Functions for working with features in a raster dataset."""
2
3
4import logging
5import math
6import os
7import warnings
8
9import numpy as np
10
11import rasterio
12from rasterio.dtypes import validate_dtype, can_cast_dtype, get_minimum_dtype, _getnpdtype
13from rasterio.enums import MergeAlg
14from rasterio.env import ensure_env
15from rasterio.errors import ShapeSkipWarning
16from rasterio._features import _shapes, _sieve, _rasterize, _bounds
17from rasterio import warp
18from rasterio.rio.helpers import coords
19from rasterio.transform import Affine
20from rasterio.transform import IDENTITY, guard_transform, rowcol
21from rasterio.windows import Window
22
23log = logging.getLogger(__name__)
24
25
26@ensure_env
27def geometry_mask(
28        geometries,
29        out_shape,
30        transform,
31        all_touched=False,
32        invert=False):
33    """Create a mask from shapes.
34
35    By default, mask is intended for use as a
36    numpy mask, where pixels that overlap shapes are False.
37
38    Parameters
39    ----------
40    geometries : iterable over geometries (GeoJSON-like objects)
41    out_shape : tuple or list
42        Shape of output numpy ndarray.
43    transform : Affine transformation object
44        Transformation from pixel coordinates of `source` to the
45        coordinate system of the input `shapes`. See the `transform`
46        property of dataset objects.
47    all_touched : boolean, optional
48        If True, all pixels touched by geometries will be burned in.  If
49        false, only pixels whose center is within the polygon or that
50        are selected by Bresenham's line algorithm will be burned in.
51    invert: boolean, optional
52        If True, mask will be True for pixels that overlap shapes.
53        False by default.
54
55    Returns
56    -------
57    numpy ndarray of type 'bool'
58        Result
59
60    Notes
61    -----
62    See rasterize() for performance notes.
63
64    """
65    fill, mask_value = (0, 1) if invert else (1, 0)
66
67    return rasterize(
68        geometries,
69        out_shape=out_shape,
70        transform=transform,
71        all_touched=all_touched,
72        fill=fill,
73        default_value=mask_value).astype('bool')
74
75
76@ensure_env
77def shapes(source, mask=None, connectivity=4, transform=IDENTITY):
78    """Get shapes and values of connected regions in a dataset or array.
79
80    Parameters
81    ----------
82    source : array, dataset object, Band, or tuple(dataset, bidx)
83        Data type must be one of rasterio.int16, rasterio.int32,
84        rasterio.uint8, rasterio.uint16, or rasterio.float32.
85    mask : numpy ndarray or rasterio Band object, optional
86        Must evaluate to bool (rasterio.bool_ or rasterio.uint8). Values
87        of False or 0 will be excluded from feature generation.  Note
88        well that this is the inverse sense from Numpy's, where a mask
89        value of True indicates invalid data in an array. If `source` is
90        a Numpy masked array and `mask` is None, the source's mask will
91        be inverted and used in place of `mask`.
92    connectivity : int, optional
93        Use 4 or 8 pixel connectivity for grouping pixels into features
94    transform : Affine transformation, optional
95        If not provided, feature coordinates will be generated based on
96        pixel coordinates
97
98    Yields
99    -------
100    polygon, value
101        A pair of (polygon, value) for each feature found in the image.
102        Polygons are GeoJSON-like dicts and the values are the
103        associated value from the image, in the data type of the image.
104        Note: due to floating point precision issues, values returned
105        from a floating point image may not exactly match the original
106        values.
107
108    Notes
109    -----
110    The amount of memory used by this algorithm is proportional to the
111    number and complexity of polygons produced.  This algorithm is most
112    appropriate for simple thematic data.  Data with high pixel-to-pixel
113    variability, such as imagery, may produce one polygon per pixel and
114    consume large amounts of memory.
115
116    Because the low-level implementation uses either an int32 or float32
117    buffer, uint32 and float64 data cannot be operated on without
118    truncation issues.
119
120    """
121    if hasattr(source, 'mask') and mask is None:
122        mask = ~source.mask
123        source = source.data
124
125    transform = guard_transform(transform)
126    for s, v in _shapes(source, mask, connectivity, transform):
127        yield s, v
128
129
130@ensure_env
131def sieve(source, size, out=None, mask=None, connectivity=4):
132    """Replace small polygons in `source` with value of their largest neighbor.
133
134    Polygons are found for each set of neighboring pixels of the same value.
135
136    Parameters
137    ----------
138    source : array or dataset object opened in 'r' mode or Band or tuple(dataset, bidx)
139        Must be of type rasterio.int16, rasterio.int32, rasterio.uint8,
140        rasterio.uint16, or rasterio.float32
141    size : int
142        minimum polygon size (number of pixels) to retain.
143    out : numpy ndarray, optional
144        Array of same shape and data type as `source` in which to store results.
145    mask : numpy ndarray or rasterio Band object, optional
146        Values of False or 0 will be excluded from feature generation
147        Must evaluate to bool (rasterio.bool_ or rasterio.uint8)
148    connectivity : int, optional
149        Use 4 or 8 pixel connectivity for grouping pixels into features
150
151    Returns
152    -------
153    out : numpy ndarray
154        Result
155
156    Notes
157    -----
158    GDAL only supports values that can be cast to 32-bit integers for this
159    operation.
160
161    The amount of memory used by this algorithm is proportional to the number
162    and complexity of polygons found in the image.  This algorithm is most
163    appropriate for simple thematic data.  Data with high pixel-to-pixel
164    variability, such as imagery, may produce one polygon per pixel and consume
165    large amounts of memory.
166
167    """
168
169    if out is None:
170        out = np.zeros(source.shape, source.dtype)
171    _sieve(source, size, out, mask, connectivity)
172    return out
173
174
175@ensure_env
176def rasterize(
177        shapes,
178        out_shape=None,
179        fill=0,
180        out=None,
181        transform=IDENTITY,
182        all_touched=False,
183        merge_alg=MergeAlg.replace,
184        default_value=1,
185        dtype=None):
186    """Return an image array with input geometries burned in.
187
188    Warnings will be raised for any invalid or empty geometries, and
189    an exception will be raised if there are no valid shapes
190    to rasterize.
191
192    Parameters
193    ----------
194    shapes : iterable of (`geometry`, `value`) pairs or iterable over
195        geometries. The `geometry` can either be an object that
196        implements the geo interface or GeoJSON-like object. If no
197        `value` is provided the `default_value` will be used. If `value`
198        is `None` the `fill` value will be used.
199    out_shape : tuple or list with 2 integers
200        Shape of output numpy ndarray.
201    fill : int or float, optional
202        Used as fill value for all areas not covered by input
203        geometries.
204    out : numpy ndarray, optional
205        Array of same shape and data type as `source` in which to store
206        results.
207    transform : Affine transformation object, optional
208        Transformation from pixel coordinates of `source` to the
209        coordinate system of the input `shapes`. See the `transform`
210        property of dataset objects.
211    all_touched : boolean, optional
212        If True, all pixels touched by geometries will be burned in.  If
213        false, only pixels whose center is within the polygon or that
214        are selected by Bresenham's line algorithm will be burned in.
215    merge_alg : MergeAlg, optional
216        Merge algorithm to use. One of:
217            MergeAlg.replace (default):
218                the new value will overwrite the existing value.
219            MergeAlg.add:
220                the new value will be added to the existing raster.
221    default_value : int or float, optional
222        Used as value for all geometries, if not provided in `shapes`.
223    dtype : rasterio or numpy data type, optional
224        Used as data type for results, if `out` is not provided.
225
226    Returns
227    -------
228    numpy ndarray
229        If `out` was not None then `out` is returned, it will have been
230        modified in-place. If `out` was None, this will be a new array.
231
232    Notes
233    -----
234    Valid data types for `fill`, `default_value`, `out`, `dtype` and
235    shape values are "int16", "int32", "uint8", "uint16", "uint32",
236    "float32", and "float64".
237
238    This function requires significant memory resources. The shapes
239    iterator will be materialized to a Python list and another C copy of
240    that list will be made. The `out` array will be copied and
241    additional temporary raster memory equal to 2x the smaller of `out`
242    data or GDAL's max cache size (controlled by GDAL_CACHEMAX, default
243    is 5% of the computer's physical memory) is required.
244
245    If GDAL max cache size is smaller than the output data, the array of
246    shapes will be iterated multiple times. Performance is thus a linear
247    function of buffer size. For maximum speed, ensure that
248    GDAL_CACHEMAX is larger than the size of `out` or `out_shape`.
249
250    """
251    valid_dtypes = (
252        'int16', 'int32', 'uint8', 'uint16', 'uint32', 'float32', 'float64'
253    )
254
255    def format_invalid_dtype(param):
256        return '{0} dtype must be one of: {1}'.format(
257            param, ', '.join(valid_dtypes)
258        )
259
260    def format_cast_error(param, dtype):
261        return '{0} cannot be cast to specified dtype: {1}'.format(param, dtype)
262
263    if fill != 0:
264        fill_array = np.array([fill])
265        if not validate_dtype(fill_array, valid_dtypes):
266            raise ValueError(format_invalid_dtype('fill'))
267
268        if dtype is not None and not can_cast_dtype(fill_array, dtype):
269            raise ValueError(format_cast_error('fill', dtype))
270
271    if default_value != 1:
272        default_value_array = np.array([default_value])
273        if not validate_dtype(default_value_array, valid_dtypes):
274            raise ValueError(format_invalid_dtype('default_value'))
275
276        if dtype is not None and not can_cast_dtype(default_value_array, dtype):
277            raise ValueError(format_cast_error('default_vaue', dtype))
278
279    if dtype is not None and _getnpdtype(dtype).name not in valid_dtypes:
280        raise ValueError(format_invalid_dtype('dtype'))
281
282    valid_shapes = []
283    shape_values = []
284    for index, item in enumerate(shapes):
285        if isinstance(item, (tuple, list)):
286            geom, value = item
287            if value is None:
288                value = fill
289        else:
290            geom = item
291            value = default_value
292        geom = getattr(geom, '__geo_interface__', None) or geom
293
294        if is_valid_geom(geom):
295            shape_values.append(value)
296
297            geom_type = geom['type']
298
299            if geom_type == 'GeometryCollection':
300                # GeometryCollections need to be handled as individual parts to
301                # avoid holes in output:
302                # https://github.com/mapbox/rasterio/issues/1253.
303                # Only 1-level deep since GeoJSON spec discourages nested
304                # GeometryCollections
305                for part in geom['geometries']:
306                    valid_shapes.append((part, value))
307
308            elif geom_type == 'MultiPolygon':
309                # Same issue as above
310                for poly in geom['coordinates']:
311                    valid_shapes.append(({'type': 'Polygon', 'coordinates': poly}, value))
312
313            else:
314                valid_shapes.append((geom, value))
315
316        else:
317            # invalid or empty geometries are skipped and raise a warning instead
318            warnings.warn('Invalid or empty shape {} at index {} will not be rasterized.'.format(geom, index), ShapeSkipWarning)
319
320    if not valid_shapes:
321        raise ValueError('No valid geometry objects found for rasterize')
322
323    shape_values = np.array(shape_values)
324
325    if not validate_dtype(shape_values, valid_dtypes):
326        raise ValueError(format_invalid_dtype('shape values'))
327
328    if dtype is None:
329        dtype = get_minimum_dtype(np.append(shape_values, fill))
330
331    elif not can_cast_dtype(shape_values, dtype):
332        raise ValueError(format_cast_error('shape values', dtype))
333
334    if out is not None:
335        if _getnpdtype(out.dtype).name not in valid_dtypes:
336            raise ValueError(format_invalid_dtype('out'))
337
338        if not can_cast_dtype(shape_values, out.dtype):
339            raise ValueError(format_cast_error('shape values', out.dtype.name))
340
341    elif out_shape is not None:
342
343        if len(out_shape) != 2:
344            raise ValueError('Invalid out_shape, must be 2D')
345
346        out = np.empty(out_shape, dtype=dtype)
347        out.fill(fill)
348
349    else:
350        raise ValueError('Either an out_shape or image must be provided')
351
352    if min(out.shape) == 0:
353        raise ValueError("width and height must be > 0")
354
355    transform = guard_transform(transform)
356    _rasterize(valid_shapes, out, transform, all_touched, merge_alg)
357    return out
358
359
360def bounds(geometry, north_up=True, transform=None):
361    """Return a (left, bottom, right, top) bounding box.
362
363    From Fiona 1.4.8. Modified to return bbox from geometry if available.
364
365    Parameters
366    ----------
367    geometry: GeoJSON-like feature (implements __geo_interface__),
368              feature collection, or geometry.
369
370    Returns
371    -------
372    tuple
373        Bounding box: (left, bottom, right, top)
374    """
375
376    geometry = getattr(geometry, '__geo_interface__', None) or geometry
377
378    if 'bbox' in geometry:
379        return tuple(geometry['bbox'])
380
381    geom = geometry.get('geometry') or geometry
382
383    # geometry must be a geometry, GeometryCollection, or FeatureCollection
384    if not ('coordinates' in geom or 'geometries' in geom or 'features' in geom):
385        raise ValueError(
386            "geometry must be a GeoJSON-like geometry, GeometryCollection, "
387            "or FeatureCollection"
388        )
389
390    return _bounds(geom, north_up=north_up, transform=transform)
391
392
393def geometry_window(
394    dataset,
395    shapes,
396    pad_x=0,
397    pad_y=0,
398    north_up=None,
399    rotated=None,
400    pixel_precision=None,
401    boundless=False,
402):
403    """Calculate the window within the raster that fits the bounds of
404    the geometry plus optional padding.  The window is the outermost
405    pixel indices that contain the geometry (floor of offsets, ceiling
406    of width and height).
407
408    If shapes do not overlap raster, a WindowError is raised.
409
410    Parameters
411    ----------
412    dataset : dataset object opened in 'r' mode
413        Raster for which the mask will be created.
414    shapes : iterable over geometries.
415        A geometry is a GeoJSON-like object or implements the geo
416        interface.  Must be in same coordinate system as dataset.
417    pad_x : float
418        Amount of padding (as fraction of raster's x pixel size) to add
419        to left and right side of bounds.
420    pad_y : float
421        Amount of padding (as fraction of raster's y pixel size) to add
422        to top and bottom of bounds.
423    north_up : optional
424        This parameter is ignored since version 1.2.1. A deprecation
425        warning will be emitted in 1.3.0.
426    rotated : optional
427        This parameter is ignored since version 1.2.1. A deprecation
428        warning will be emitted in 1.3.0.
429    pixel_precision : int or float, optional
430        Number of places of rounding precision or absolute precision for
431        evaluating bounds of shapes.
432    boundless : bool, optional
433        Whether to allow a boundless window or not.
434
435    Returns
436    -------
437    rasterio.windows.Window
438
439    """
440
441    all_bounds = [bounds(shape, transform=~dataset.transform) for shape in shapes]
442
443    cols = [
444        x
445        for (left, bottom, right, top) in all_bounds
446        for x in (left - pad_x, right + pad_x, right + pad_x, left - pad_x)
447    ]
448    rows = [
449        y
450        for (left, bottom, right, top) in all_bounds
451        for y in (top - pad_y, top - pad_y, bottom + pad_y, bottom + pad_y)
452    ]
453
454    row_start, row_stop = int(math.floor(min(rows))), int(math.ceil(max(rows)))
455    col_start, col_stop = int(math.floor(min(cols))), int(math.ceil(max(cols)))
456
457    window = Window(
458        col_off=col_start,
459        row_off=row_start,
460        width=max(col_stop - col_start, 0.0),
461        height=max(row_stop - row_start, 0.0),
462    )
463
464    # Make sure that window overlaps raster
465    raster_window = Window(0, 0, dataset.width, dataset.height)
466    if not boundless:
467        window = window.intersection(raster_window)
468
469    return window
470
471
472def is_valid_geom(geom):
473    """
474    Checks to see if geometry is a valid GeoJSON geometry type or
475    GeometryCollection.  Geometry must be GeoJSON or implement the geo
476    interface.
477
478    Geometries must be non-empty, and have at least x, y coordinates.
479
480    Note: only the first coordinate is checked for validity.
481
482    Parameters
483    ----------
484    geom: an object that implements the geo interface or GeoJSON-like object
485
486    Returns
487    -------
488    bool: True if object is a valid GeoJSON geometry type
489    """
490
491    geom_types = {'Point', 'MultiPoint', 'LineString', 'LinearRing',
492                  'MultiLineString', 'Polygon', 'MultiPolygon'}
493
494    geom = getattr(geom, '__geo_interface__', None) or geom
495
496    try:
497        geom_type = geom["type"]
498        if geom_type not in geom_types.union({'GeometryCollection'}):
499            return False
500
501    except (KeyError, TypeError):
502        return False
503
504    if geom_type in geom_types:
505        if 'coordinates' not in geom:
506            return False
507
508        coords = geom['coordinates']
509
510        if geom_type == 'Point':
511            # Points must have at least x, y
512            return len(coords) >= 2
513
514        if geom_type == 'MultiPoint':
515            # Multi points must have at least one point with at least x, y
516            return len(coords) > 0 and len(coords[0]) >= 2
517
518        if geom_type == 'LineString':
519            # Lines must have at least 2 coordinates and at least x, y for
520            # a coordinate
521            return len(coords) >= 2 and len(coords[0]) >= 2
522
523        if geom_type == 'LinearRing':
524            # Rings must have at least 4 coordinates and at least x, y for
525            # a coordinate
526            return len(coords) >= 4 and len(coords[0]) >= 2
527
528        if geom_type == 'MultiLineString':
529            # Multi lines must have at least one LineString
530            return (len(coords) > 0 and len(coords[0]) >= 2 and
531                    len(coords[0][0]) >= 2)
532
533        if geom_type == 'Polygon':
534            # Polygons must have at least 1 ring, with at least 4 coordinates,
535            # with at least x, y for a coordinate
536            return (len(coords) > 0 and len(coords[0]) >= 4 and
537                    len(coords[0][0]) >= 2)
538
539        if geom_type == 'MultiPolygon':
540            # Muti polygons must have at least one Polygon
541            return (len(coords) > 0 and len(coords[0]) > 0 and
542                    len(coords[0][0]) >= 4 and len(coords[0][0][0]) >= 2)
543
544    if geom_type == 'GeometryCollection':
545        if 'geometries' not in geom:
546            return False
547
548        if not len(geom['geometries']) > 0:
549            # While technically valid according to GeoJSON spec, an empty
550            # GeometryCollection will cause issues if used in rasterio
551            return False
552
553        for g in geom['geometries']:
554            if not is_valid_geom(g):
555                return False  # short-circuit and fail early
556
557    return True
558
559
560def dataset_features(
561        src,
562        bidx=None,
563        sampling=1,
564        band=True,
565        as_mask=False,
566        with_nodata=False,
567        geographic=True,
568        precision=-1):
569    """Yield GeoJSON features for the dataset
570
571    The geometries are polygons bounding contiguous regions of the same raster value.
572
573    Parameters
574    ----------
575    src: Rasterio Dataset
576
577    bidx: int
578        band index
579
580    sampling: int (DEFAULT: 1)
581        Inverse of the sampling fraction; a value of 10 decimates
582
583    band: boolean (DEFAULT: True)
584        extract features from a band (True) or a mask (False)
585
586    as_mask: boolean (DEFAULT: False)
587        Interpret band as a mask and output only one class of valid data shapes?
588
589    with_nodata: boolean (DEFAULT: False)
590        Include nodata regions?
591
592    geographic: str (DEFAULT: True)
593        Output shapes in EPSG:4326? Otherwise use the native CRS.
594
595    precision: int (DEFAULT: -1)
596        Decimal precision of coordinates. -1 for full float precision output
597
598    Yields
599    ------
600    GeoJSON-like Feature dictionaries for shapes found in the given band
601    """
602    if bidx is not None and bidx > src.count:
603        raise ValueError('bidx is out of range for raster')
604
605    img = None
606    msk = None
607
608    # Adjust transforms.
609    transform = src.transform
610    if sampling > 1:
611        # Determine the target shape (to decimate)
612        shape = (int(math.ceil(src.height / sampling)),
613                 int(math.ceil(src.width / sampling)))
614
615        # Calculate independent sampling factors
616        x_sampling = src.width / shape[1]
617        y_sampling = src.height / shape[0]
618
619        # Decimation of the raster produces a georeferencing
620        # shift that we correct with a translation.
621        transform *= Affine.translation(
622            src.width % x_sampling, src.height % y_sampling)
623
624        # And follow by scaling.
625        transform *= Affine.scale(x_sampling, y_sampling)
626
627    # Most of the time, we'll use the valid data mask.
628    # We skip reading it if we're extracting every possible
629    # feature (even invalid data features) from a band.
630    if not band or (band and not as_mask and not with_nodata):
631        if sampling == 1:
632            msk = src.read_masks(bidx)
633        else:
634            msk_shape = shape
635            if bidx is None:
636                msk = np.zeros(
637                    (src.count,) + msk_shape, 'uint8')
638            else:
639                msk = np.zeros(msk_shape, 'uint8')
640            msk = src.read_masks(bidx, msk)
641
642        if bidx is None:
643            msk = np.logical_or.reduce(msk).astype('uint8')
644
645        # Possibly overridden below.
646        img = msk
647
648    # Read the band data unless the --mask option is given.
649    if band:
650        if sampling == 1:
651            img = src.read(bidx, masked=False)
652        else:
653            img = np.zeros(
654                shape,
655                dtype=src.dtypes[src.indexes.index(bidx)])
656            img = src.read(bidx, img, masked=False)
657
658    # If as_mask option was given, convert the image
659    # to a binary image. This reduces the number of shape
660    # categories to 2 and likely reduces the number of
661    # shapes.
662    if as_mask:
663        tmp = np.ones_like(img, 'uint8') * 255
664        tmp[img == 0] = 0
665        img = tmp
666        if not with_nodata:
667            msk = tmp
668
669    # Prepare keyword arguments for shapes().
670    kwargs = {'transform': transform}
671    if not with_nodata:
672        kwargs['mask'] = msk
673
674    src_basename = os.path.basename(src.name)
675
676    # Yield GeoJSON features.
677    for i, (g, val) in enumerate(
678            rasterio.features.shapes(img, **kwargs)):
679        if geographic:
680            g = warp.transform_geom(
681                src.crs, 'EPSG:4326', g,
682                antimeridian_cutting=True, precision=precision)
683        xs, ys = zip(*coords(g))
684        yield {
685            'type': 'Feature',
686            'id': "{0}:{1}".format(src_basename, i),
687            'properties': {
688                'val': val,
689                'filename': src_basename
690            },
691            'bbox': [min(xs), min(ys), max(xs), max(ys)],
692            'geometry': g
693        }
694