perrygeo · sgoodm · Nov 21, 2016 · Nov 21, 2016 · Jan 11, 2017 · Mar 3, 2017
diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py
@@ -10,7 +10,9 @@
 
 from .io import read_features, Raster
 from .utils import (rasterize_geom, get_percentile, check_stats,
-                    remap_categories, key_assoc_val, boxify_points)
+                    remap_categories, key_assoc_val, boxify_points,
+                    rasterize_pctcover_geom,
+                    rs_mean, rs_count, rs_sum)
 
 
 def raster_stats(*args, **kwargs):
@@ -39,6 +41,9 @@ def gen_zonal_stats(
         affine=None,
         stats=None,
         all_touched=False,
+        percent_cover_selection=None,
+        percent_cover_weighting=False,
+        percent_cover_scale=None,
         categorical=False,
         category_map=None,
         add_stats=None,
@@ -83,6 +88,29 @@ def gen_zonal_stats(
         those having a center point within the polygon.
         defaults to `False`
 
+    percent_cover_selection: float, optional
+        Include only raster cells that have at least the given percent
+        covered by the vector feature. Requires percent_cover_scale argument
+        be used to specify scale at which to generate percent coverage
+        estimates
+
+    percent_cover_weighting: bool, optional
+        whether or not to use percent coverage of cells during calculations
+        to adjust stats (only applies to mean, count and sum)
+
+    percent_cover_scale: int, optional
+        Scale used when generating percent coverage estimates of each
+        raster cell by vector feature. Percent coverage is generated by
+        rasterizing the feature at a finer resolution than the raster
+        (based on percent_cover_scale value) then using a summation to aggregate
+        to the raster resolution and dividing by the square of percent_cover_scale
+        to get percent coverage value for each cell. Increasing percent_cover_scale
+        will increase the accuracy of percent coverage values; three orders
+        magnitude finer resolution (percent_cover_scale=1000) is usually enough to
+        get coverage estimates with <1% error in individual edge cells coverage
+        estimates, though much smaller values (e.g., percent_cover_scale=10) are often
+        sufficient (<10% error) and require less memory.
+
     categorical: bool, optional
 
     category_map: dict
@@ -142,20 +170,74 @@ def gen_zonal_stats(
         warnings.warn("Use `band` to specify band number", DeprecationWarning)
         band = band_num
 
+    # check inputs related to percent coverage
+    percent_cover = False
+    if percent_cover_weighting or percent_cover_selection is not None:
+        percent_cover = True
+        if percent_cover_scale is None:
+            warnings.warn('No value for `percent_cover_scale` was given. '
+                          'Using default value of 10.')
+            percent_cover_scale = 10
+
+        try:
+            if percent_cover_scale != int(percent_cover_scale):
+                warnings.warn('Value for `percent_cover_scale` given ({0}) '
+                              'was converted to int ({1}) but does not '
+                              'match original value'.format(
+                                percent_cover_scale, int(percent_cover_scale)))
+
+            percent_cover_scale = int(percent_cover_scale)
+
+            if percent_cover_scale <= 1:
+                raise Exception('Value for `percent_cover_scale` must be '
+                                'greater than one ({0})'.format(
+                                    percent_cover_scale))
+
+        except:
+            raise Exception('Invalid value for `percent_cover_scale` '
+                            'provided ({0}). Must be type int.'.format(
+                                percent_cover_scale))
+
+        if percent_cover_selection is not None:
+            try:
+                percent_cover_selection = float(percent_cover_selection)
+            except:
+                raise Exception('Invalid value for `percent_cover_selection` '
+                                'provided ({0}). Must be able to be converted '
+                                'to a float.'.format(percent_cover_selection))
+
+        # if not all_touched:
+        #     warnings.warn('`all_touched` was not enabled but an option requiring '
+        #                   'percent_cover calculations was selected. Automatically '
+        #                   'enabling `all_touched`.')
+        # all_touched = True
+
+
     with Raster(raster, affine, nodata, band) as rast:
         features_iter = read_features(vectors, layer)
         for _, feat in enumerate(features_iter):
             geom = shape(feat['geometry'])
 
             if 'Point' in geom.type:
                 geom = boxify_points(geom, rast)
+                percent_cover = False
 
             geom_bounds = tuple(geom.bounds)
 
             fsrc = rast.read(bounds=geom_bounds)
 
             # rasterized geometry
-            rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)
+            if percent_cover:
+                cover_weights = rasterize_pctcover_geom(
+                    geom, like=fsrc,
+                    scale=percent_cover_scale,
+                    all_touched=all_touched)
+                rv_array = cover_weights > (percent_cover_selection or 0)
+            else:
+                cover_weights = None
+                rv_array = rasterize_geom(
+                    geom, like=fsrc,
+                    all_touched=all_touched)
 
             # nodata mask
             isnodata = (fsrc.array == fsrc.nodata)
@@ -169,10 +251,12 @@ def gen_zonal_stats(
 
             # Mask the source data array
             # mask everything that is not a valid value or not within our geom
+
             masked = np.ma.MaskedArray(
                 fsrc.array,
                 mask=(isnodata | ~rv_array))
 
+
             # If we're on 64 bit platform and the array is an integer type
             # make sure we cast to 64 bit to avoid overflow.
             # workaround for https://github.com/numpy/numpy/issues/8433
@@ -212,12 +296,12 @@ def gen_zonal_stats(
                 if 'max' in stats:
                     feature_stats['max'] = float(masked.max())
                 if 'mean' in stats:
-                    feature_stats['mean'] = float(masked.mean())
+                    feature_stats['mean'] = rs_mean(masked, cover_weights)
                 if 'count' in stats:
-                    feature_stats['count'] = int(masked.count())
+                    feature_stats['count'] = rs_count(masked, cover_weights)
                 # optional
                 if 'sum' in stats:
-                    feature_stats['sum'] = float(masked.sum())
+                    feature_stats['sum'] = rs_sum(masked, cover_weights)
                 if 'std' in stats:
                     feature_stats['std'] = float(masked.std())
                 if 'median' in stats:

diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py
@@ -2,7 +2,10 @@
 from __future__ import absolute_import
 from __future__ import division
 import sys
+import numpy as np
 from rasterio import features
+from affine import Affine
+from numpy import min_scalar_type
 from shapely.geometry import box, MultiPolygon
 from .io import window_bounds
 
@@ -45,10 +48,54 @@ def rasterize_geom(geom, like, all_touched=False):
         fill=0,
         dtype='uint8',
         all_touched=all_touched)
-
     return rv_array.astype(bool)
 
 
+# https://stackoverflow.com/questions/8090229/
+#   resize-with-averaging-or-rebin-a-numpy-2d-array/8090605#8090605
+def rebin_sum(a, shape, dtype):
+    sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1]
+    return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype)
+
+
+class objectview(object):
+    def __init__(self, d):
+        self.__dict__ = d
+
+def rasterize_pctcover_geom(geom, like, scale=None, all_touched=False):
+    """
+    Parameters
+    ----------
+    geom: GeoJSON geometry
+    like: raster object with desired shape and transform
+    scale: scale at which to generate percent cover estimate
+
+    Returns
+    -------
+    ndarray: float32
+    """
+    scale = scale if scale is not None else 10
+    min_dtype = min_scalar_type(scale**2)
+
+    pixel_size_lon = like.affine[0]/scale
+    pixel_size_lat = like.affine[4]/scale
+
+    topleftlon = like.affine[2]
+    topleftlat = like.affine[5]
+
+    new_affine = Affine(pixel_size_lon, 0, topleftlon,
+                        0, pixel_size_lat, topleftlat)
+
+    new_shape = (like.shape[0]*scale, like.shape[1]*scale)
+
+    new_like = objectview({'shape': new_shape, 'affine': new_affine})
+
+    rv_array = rasterize_geom(geom, new_like, all_touched=all_touched)
+    rv_array = rebin_sum(rv_array, like.shape, min_dtype)
+
+    return rv_array.astype('float32') / (scale**2)
+
+
 def stats_to_csv(stats):
     if sys.version_info[0] >= 3:
         from io import StringIO as IO  # pragma: no cover
@@ -146,3 +193,30 @@ def boxify_points(geom, rast):
         geoms.append(box(*window_bounds(win, rast.affine)).buffer(buff))
 
     return MultiPolygon(geoms)
+
+
+
+def rs_mean(masked, cover_weights=None):
+    if cover_weights is not None:
+        val = float(
+            np.sum(masked * cover_weights) /
+            np.sum(~masked.mask * cover_weights))
+    else:
+        val = float(masked.mean())
+    return val
+
+
+def rs_count(masked, cover_weights=None):
+    if cover_weights is not None:
+        val = float(np.sum(~masked.mask * cover_weights))
+    else:
+        val = int(masked.count())
+    return val
+
+
+def rs_sum(masked, cover_weights=None):
+    if cover_weights is not None:
+        val = float(np.sum(masked * cover_weights))
+    else:
+        val = float(masked.sum())
+    return val
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -26,6 +26,25 @@ def test_cli_feature():
     assert 'test_count' not in feature['properties']
 
 
+def test_cli_feature_info():
+    raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif')
+    vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson')
+    runner = CliRunner()
+    warnings.simplefilter('ignore')
+    result = runner.invoke(zonalstats, [vector,
+                                        '--raster', raster,
+                                        '--stats', 'mean',
+                                        '--prefix', 'test_',
+                                        '--info'])
+    assert result.exit_code == 0
+    outdata = json.loads(result.output)
+    assert len(outdata['features']) == 1
+    feature = outdata['features'][0]
+    assert 'test_mean' in feature['properties']
+    assert round(feature['properties']['test_mean'], 2) == 14.66
+    assert 'test_count' not in feature['properties']
+
+
 def test_cli_feature_stdin():
     raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif')
     vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson')

diff --git a/tests/test_point.py b/tests/test_point.py
@@ -1,4 +1,5 @@
 import os
+import pytest
 import rasterio
 from rasterstats.point import point_window_unitxy, bilinear, geom_xys
 from rasterstats import point_query
@@ -86,6 +87,12 @@ def test_point_query():
     assert round(val) == 74
 
 
+def test_point_query_invalid_interp():
+    point = "POINT(245309 1000064)"
+    with pytest.raises(ValueError):
+        point_query(point, raster, interpolate="invalid_type")
+
+
 def test_point_query_geojson():
     point = "POINT(245309 1000064)"
     features = point_query(point, raster, property_name="TEST", geojson_out=True)

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,9 +1,12 @@
 import sys
 import os
 import pytest
-from shapely.geometry import LineString
+import numpy as np
+from affine import Affine
+from shapely.geometry import LineString, Polygon
 from rasterstats.utils import \
-    stats_to_csv, get_percentile, remap_categories, boxify_points
+    stats_to_csv, get_percentile, remap_categories, boxify_points, \
+    rebin_sum, rasterize_pctcover_geom
 from rasterstats import zonal_stats
 from rasterstats.utils import VALID_STATS
 
@@ -63,3 +66,53 @@ def test_boxify_non_point():
     line = LineString([(0, 0), (1, 1)])
     with pytest.raises(ValueError):
         boxify_points(line, None)
+
+
+def test_rebin_sum():
+    test_input = np.array(
+        [
+            [1, 1, 2, 2],
+            [1, 1, 2, 2],
+            [3, 3, 4, 4],
+            [3, 3, 4, 4]
+        ])
+    test_output = rebin_sum(test_input, (2,2), np.int32)
+    correct_output = np.array([[4, 8],[12, 16]])
+    assert np.array_equal(test_output, correct_output)
+
+
+def test_rasterize_pctcover_geom():
+    # https://goodcode.io/articles/python-dict-object/
+    class objectview(object):
+        def __init__(self, d):
+            self.__dict__ = d
+
+    polygon_a = Polygon([[0, 0], [2, 0], [2, 2], [0, 2]])
+    shape_a = (2, 2)
+    affine_a = Affine(1, 0, 0,
+                      0, -1, 2)
+    like_a = objectview({'shape': shape_a, 'affine': affine_a})
+
+    pct_cover_a = rasterize_pctcover_geom(polygon_a, like_a, scale=10, all_touched=False)
+    correct_output_a = np.array([[1, 1], [1, 1]])
+    assert np.array_equal(pct_cover_a, correct_output_a)
+
+    polygon_b = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]])
+    shape_b = (2, 2)
+    affine_b = Affine(1, 0, 0,
+                      0, -1, 2)
+    like_b = objectview({'shape': shape_b, 'affine': affine_b})
+
+    pct_cover_b = rasterize_pctcover_geom(polygon_b, like_b, scale=10, all_touched=False)
+    correct_output_b = np.array([[0.25, 0.25], [0.25, 0.25]])
+    assert np.array_equal(pct_cover_b, correct_output_b)
+
+    polygon_c = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]])
+    shape_c = (2, 2)
+    affine_c = Affine(1, 0, 0,
+                      0, -1, 2)
+    like_c = objectview({'shape': shape_c, 'affine': affine_c})
+
+    pct_cover_c = rasterize_pctcover_geom(polygon_c, like_c, scale=100, all_touched=False)
+    correct_output_c = np.array([[0.25, 0.25], [0.25, 0.25]])
+    assert np.array_equal(pct_cover_c, correct_output_c)