Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New options: Percent coverage selection and weighting #136

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
34dff52
Add cell percent coverage selection method and stats weighting (with …
sgoodm Nov 21, 2016
ba627d7
Fix tests.
sgoodm Nov 21, 2016
6725da1
Modify all_touched for percent cover to rely on user input
sgoodm Jan 11, 2017
4ffe2ad
Clearly distinguish between raster mask and weight arrays
cmutel Mar 3, 2017
c0d9bc3
Pixels aren't always square
cmutel Mar 10, 2017
a03eb04
Rename rasterize array for pct cover case, fix masked array mask defi…
sgoodm Mar 21, 2017
795d86e
Add tests for percent cover functions
sgoodm Mar 21, 2017
a9a4a3c
Fix
sgoodm Mar 21, 2017
cfa198a
Fix mask conditionals for non-bool arrays
sgoodm Mar 21, 2017
7000632
Update old version of test case
sgoodm Mar 21, 2017
b52a368
Merge branch 'percent_cover' into rectangular-pixels
sgoodm Mar 24, 2017
85f62a1
Merge pull request #3 from cmutel/rectangular-pixels
sgoodm Mar 24, 2017
76c8667
Merge branch 'percent_cover' into percent_cover
sgoodm Mar 24, 2017
cb87d40
Merge pull request #2 from cmutel/percent_cover
sgoodm Mar 24, 2017
644ddc3
Update nodata stat for percent cover changes (may need review)
sgoodm Mar 24, 2017
143a7cf
Merge branch 'geo-master' into percent_cover
sgoodm Mar 30, 2017
7a59a52
Merge pull request #4 from sgoodm/percent_cover
sgoodm Mar 30, 2017
41222f9
Resolve upstream merge
sgoodm Aug 31, 2018
6955476
Use 'like' object for rasterize funcs to keep inline with original code
sgoodm Aug 31, 2018
1c39067
Add correct usage of in rasterize_pctcover_geom func
sgoodm Aug 31, 2018
2fe6d66
Reduce main function complexity by moving percent cover stat calc log…
sgoodm Aug 31, 2018
d986437
Fix indent
sgoodm Aug 31, 2018
46fcebc
Fix check for percent cover usage in stat functions
sgoodm Aug 31, 2018
fc558c2
Reduce lines to get test coverage
sgoodm Sep 4, 2018
c475b58
Add in some easy test coverage
sgoodm Sep 4, 2018
a52055f
Add missing import for tests
sgoodm Sep 4, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 89 additions & 5 deletions src/rasterstats/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@

from .io import read_features, Raster
from .utils import (rasterize_geom, get_percentile, check_stats,
remap_categories, key_assoc_val, boxify_points)
remap_categories, key_assoc_val, boxify_points,
rasterize_pctcover_geom,
rs_mean, rs_count, rs_sum)


def raster_stats(*args, **kwargs):
Expand Down Expand Up @@ -39,6 +41,9 @@ def gen_zonal_stats(
affine=None,
stats=None,
all_touched=False,
percent_cover_selection=None,
percent_cover_weighting=False,
percent_cover_scale=None,
categorical=False,
category_map=None,
add_stats=None,
Expand Down Expand Up @@ -83,6 +88,29 @@ def gen_zonal_stats(
those having a center point within the polygon.
defaults to `False`

percent_cover_selection: float, optional
Include only raster cells that have at least the given percent
covered by the vector feature. Requires percent_cover_scale argument
be used to specify scale at which to generate percent coverage
estimates

percent_cover_weighting: bool, optional
whether or not to use percent coverage of cells during calculations
to adjust stats (only applies to mean, count and sum)

percent_cover_scale: int, optional
Scale used when generating percent coverage estimates of each
raster cell by vector feature. Percent coverage is generated by
rasterizing the feature at a finer resolution than the raster
(based on percent_cover_scale value) then using a summation to aggregate
to the raster resolution and dividing by the square of percent_cover_scale
to get percent coverage value for each cell. Increasing percent_cover_scale
will increase the accuracy of percent coverage values; three orders
magnitude finer resolution (percent_cover_scale=1000) is usually enough to
get coverage estimates with <1% error in individual edge cells coverage
estimates, though much smaller values (e.g., percent_cover_scale=10) are often
sufficient (<10% error) and require less memory.

categorical: bool, optional

category_map: dict
Expand Down Expand Up @@ -142,20 +170,74 @@ def gen_zonal_stats(
warnings.warn("Use `band` to specify band number", DeprecationWarning)
band = band_num

# check inputs related to percent coverage
percent_cover = False
if percent_cover_weighting or percent_cover_selection is not None:
percent_cover = True
if percent_cover_scale is None:
warnings.warn('No value for `percent_cover_scale` was given. '
'Using default value of 10.')
percent_cover_scale = 10

try:
if percent_cover_scale != int(percent_cover_scale):
warnings.warn('Value for `percent_cover_scale` given ({0}) '
'was converted to int ({1}) but does not '
'match original value'.format(
percent_cover_scale, int(percent_cover_scale)))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why we need to limit to integers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reshape performed in the rebin_sum function requires ints
https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html


percent_cover_scale = int(percent_cover_scale)

if percent_cover_scale <= 1:
raise Exception('Value for `percent_cover_scale` must be '
'greater than one ({0})'.format(
percent_cover_scale))

except:
raise Exception('Invalid value for `percent_cover_scale` '
'provided ({0}). Must be type int.'.format(
percent_cover_scale))

if percent_cover_selection is not None:
try:
percent_cover_selection = float(percent_cover_selection)
except:
raise Exception('Invalid value for `percent_cover_selection` '
'provided ({0}). Must be able to be converted '
'to a float.'.format(percent_cover_selection))

# if not all_touched:
# warnings.warn('`all_touched` was not enabled but an option requiring '
# 'percent_cover calculations was selected. Automatically '
# 'enabling `all_touched`.')
# all_touched = True


with Raster(raster, affine, nodata, band) as rast:
features_iter = read_features(vectors, layer)
for _, feat in enumerate(features_iter):
geom = shape(feat['geometry'])

if 'Point' in geom.type:
geom = boxify_points(geom, rast)
percent_cover = False

geom_bounds = tuple(geom.bounds)

fsrc = rast.read(bounds=geom_bounds)

# rasterized geometry
rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)
if percent_cover:
cover_weights = rasterize_pctcover_geom(
geom, like=fsrc,
scale=percent_cover_scale,
all_touched=all_touched)
rv_array = cover_weights > (percent_cover_selection or 0)
else:
cover_weights = None
rv_array = rasterize_geom(
geom, like=fsrc,
all_touched=all_touched)

# nodata mask
isnodata = (fsrc.array == fsrc.nodata)
Expand All @@ -169,10 +251,12 @@ def gen_zonal_stats(

# Mask the source data array
# mask everything that is not a valid value or not within our geom

masked = np.ma.MaskedArray(
fsrc.array,
mask=(isnodata | ~rv_array))


# If we're on 64 bit platform and the array is an integer type
# make sure we cast to 64 bit to avoid overflow.
# workaround for https://github.com/numpy/numpy/issues/8433
Expand Down Expand Up @@ -212,12 +296,12 @@ def gen_zonal_stats(
if 'max' in stats:
feature_stats['max'] = float(masked.max())
if 'mean' in stats:
feature_stats['mean'] = float(masked.mean())
feature_stats['mean'] = rs_mean(masked, cover_weights)
if 'count' in stats:
feature_stats['count'] = int(masked.count())
feature_stats['count'] = rs_count(masked, cover_weights)
# optional
if 'sum' in stats:
feature_stats['sum'] = float(masked.sum())
feature_stats['sum'] = rs_sum(masked, cover_weights)
if 'std' in stats:
feature_stats['std'] = float(masked.std())
if 'median' in stats:
Expand Down
76 changes: 75 additions & 1 deletion src/rasterstats/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
from __future__ import absolute_import
from __future__ import division
import sys
import numpy as np
from rasterio import features
from affine import Affine
from numpy import min_scalar_type
from shapely.geometry import box, MultiPolygon
from .io import window_bounds

Expand Down Expand Up @@ -45,10 +48,54 @@ def rasterize_geom(geom, like, all_touched=False):
fill=0,
dtype='uint8',
all_touched=all_touched)

return rv_array.astype(bool)


# https://stackoverflow.com/questions/8090229/
# resize-with-averaging-or-rebin-a-numpy-2d-array/8090605#8090605
def rebin_sum(a, shape, dtype):
sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1]
return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't dug into this code but why choose this implementation over other methods of resampling? Specifically, using Rasterio's resampling techniques would give us more control over the resampling methods versus assuming sum: https://mapbox.github.io/rasterio/topics/resampling.html

"rebin" implies categorizing pixel values, I think "upsample" or similar would be a more accurate function name.

Copy link
Contributor Author

@sgoodm sgoodm Feb 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I looked into Rasterio's resampling methods, but I tested out a couple of different implementations (one was a proof of concept you put together for Rasterio, rasterio/rasterio#232, another was a more generalized aggregation scheme I pulled from another project of mine which had way too much overhead for what was needed here) and this method was a fair bit faster with less code. My main concern with any method here is going to be minimizing the additional time/memory required to run when using this feature.

Did you have a use case in mind that would require using a method other than sum?

I am on board with renaming to something more accurate, I had just kept it similar to the original function from SO I used.



class objectview(object):
def __init__(self, d):
self.__dict__ = d

def rasterize_pctcover_geom(geom, like, scale=None, all_touched=False):
"""
Parameters
----------
geom: GeoJSON geometry
like: raster object with desired shape and transform
scale: scale at which to generate percent cover estimate

Returns
-------
ndarray: float32
"""
scale = scale if scale is not None else 10
min_dtype = min_scalar_type(scale**2)

pixel_size_lon = like.affine[0]/scale
pixel_size_lat = like.affine[4]/scale

topleftlon = like.affine[2]
topleftlat = like.affine[5]

new_affine = Affine(pixel_size_lon, 0, topleftlon,
0, pixel_size_lat, topleftlat)

new_shape = (like.shape[0]*scale, like.shape[1]*scale)

new_like = objectview({'shape': new_shape, 'affine': new_affine})

rv_array = rasterize_geom(geom, new_like, all_touched=all_touched)
rv_array = rebin_sum(rv_array, like.shape, min_dtype)

return rv_array.astype('float32') / (scale**2)


def stats_to_csv(stats):
if sys.version_info[0] >= 3:
from io import StringIO as IO # pragma: no cover
Expand Down Expand Up @@ -146,3 +193,30 @@ def boxify_points(geom, rast):
geoms.append(box(*window_bounds(win, rast.affine)).buffer(buff))

return MultiPolygon(geoms)



def rs_mean(masked, cover_weights=None):
if cover_weights is not None:
val = float(
np.sum(masked * cover_weights) /
np.sum(~masked.mask * cover_weights))
else:
val = float(masked.mean())
return val


def rs_count(masked, cover_weights=None):
if cover_weights is not None:
val = float(np.sum(~masked.mask * cover_weights))
else:
val = int(masked.count())
return val


def rs_sum(masked, cover_weights=None):
if cover_weights is not None:
val = float(np.sum(masked * cover_weights))
else:
val = float(masked.sum())
return val
19 changes: 19 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,25 @@ def test_cli_feature():
assert 'test_count' not in feature['properties']


def test_cli_feature_info():
raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif')
vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson')
runner = CliRunner()
warnings.simplefilter('ignore')
result = runner.invoke(zonalstats, [vector,
'--raster', raster,
'--stats', 'mean',
'--prefix', 'test_',
'--info'])
assert result.exit_code == 0
outdata = json.loads(result.output)
assert len(outdata['features']) == 1
feature = outdata['features'][0]
assert 'test_mean' in feature['properties']
assert round(feature['properties']['test_mean'], 2) == 14.66
assert 'test_count' not in feature['properties']


def test_cli_feature_stdin():
raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif')
vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson')
Expand Down
7 changes: 7 additions & 0 deletions tests/test_point.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pytest
import rasterio
from rasterstats.point import point_window_unitxy, bilinear, geom_xys
from rasterstats import point_query
Expand Down Expand Up @@ -86,6 +87,12 @@ def test_point_query():
assert round(val) == 74


def test_point_query_invalid_interp():
point = "POINT(245309 1000064)"
with pytest.raises(ValueError):
point_query(point, raster, interpolate="invalid_type")


def test_point_query_geojson():
point = "POINT(245309 1000064)"
features = point_query(point, raster, property_name="TEST", geojson_out=True)
Expand Down
57 changes: 55 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import sys
import os
import pytest
from shapely.geometry import LineString
import numpy as np
from affine import Affine
from shapely.geometry import LineString, Polygon
from rasterstats.utils import \
stats_to_csv, get_percentile, remap_categories, boxify_points
stats_to_csv, get_percentile, remap_categories, boxify_points, \
rebin_sum, rasterize_pctcover_geom
from rasterstats import zonal_stats
from rasterstats.utils import VALID_STATS

Expand Down Expand Up @@ -63,3 +66,53 @@ def test_boxify_non_point():
line = LineString([(0, 0), (1, 1)])
with pytest.raises(ValueError):
boxify_points(line, None)


def test_rebin_sum():
test_input = np.array(
[
[1, 1, 2, 2],
[1, 1, 2, 2],
[3, 3, 4, 4],
[3, 3, 4, 4]
])
test_output = rebin_sum(test_input, (2,2), np.int32)
correct_output = np.array([[4, 8],[12, 16]])
assert np.array_equal(test_output, correct_output)


def test_rasterize_pctcover_geom():
# https://goodcode.io/articles/python-dict-object/
class objectview(object):
def __init__(self, d):
self.__dict__ = d

polygon_a = Polygon([[0, 0], [2, 0], [2, 2], [0, 2]])
shape_a = (2, 2)
affine_a = Affine(1, 0, 0,
0, -1, 2)
like_a = objectview({'shape': shape_a, 'affine': affine_a})

pct_cover_a = rasterize_pctcover_geom(polygon_a, like_a, scale=10, all_touched=False)
correct_output_a = np.array([[1, 1], [1, 1]])
assert np.array_equal(pct_cover_a, correct_output_a)

polygon_b = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]])
shape_b = (2, 2)
affine_b = Affine(1, 0, 0,
0, -1, 2)
like_b = objectview({'shape': shape_b, 'affine': affine_b})

pct_cover_b = rasterize_pctcover_geom(polygon_b, like_b, scale=10, all_touched=False)
correct_output_b = np.array([[0.25, 0.25], [0.25, 0.25]])
assert np.array_equal(pct_cover_b, correct_output_b)

polygon_c = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]])
shape_c = (2, 2)
affine_c = Affine(1, 0, 0,
0, -1, 2)
like_c = objectview({'shape': shape_c, 'affine': affine_c})

pct_cover_c = rasterize_pctcover_geom(polygon_c, like_c, scale=100, all_touched=False)
correct_output_c = np.array([[0.25, 0.25], [0.25, 0.25]])
assert np.array_equal(pct_cover_c, correct_output_c)
Loading