From e2ecd33baf21e973d70a0373e2ca1bdba17af86e Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Sun, 25 Feb 2024 20:42:17 +0100 Subject: [PATCH 01/28] add examples for typed processes --- mapchete/__init__.py | 10 ++ mapchete/config/process_func.py | 4 +- mapchete/processing/mp.py | 13 ++- mapchete/processing/tasks.py | 20 ++-- mapchete/protocols.py | 106 +++++++++++++++++++++- mapchete/types.py | 2 + test/conftest.py | 9 ++ test/test_config.py | 5 + test/testdata/typed_raster_input.mapchete | 13 +++ test/testdata/typed_raster_input.py | 19 ++++ test/testdata/typed_raster_input_group.py | 18 ++++ test/testdata/typed_vector_input.py | 19 ++++ test/testdata/typed_vector_input_group.py | 18 ++++ 13 files changed, 242 insertions(+), 14 deletions(-) create mode 100644 test/testdata/typed_raster_input.mapchete create mode 100644 test/testdata/typed_raster_input.py create mode 100644 test/testdata/typed_raster_input_group.py create mode 100644 test/testdata/typed_vector_input.py create mode 100644 test/testdata/typed_vector_input_group.py diff --git a/mapchete/__init__.py b/mapchete/__init__.py index 1a608b64..1344a9e9 100644 --- a/mapchete/__init__.py +++ b/mapchete/__init__.py @@ -9,6 +9,12 @@ from mapchete.formats import read_output_metadata from mapchete.path import MPath, fs_from_path from mapchete.processing import Mapchete, MapcheteProcess +from mapchete.protocols import ( + RasterInput, + RasterInputGroup, + VectorInput, + VectorInputGroup, +) from mapchete.tile import count_tiles from mapchete.timer import Timer from mapchete.types import MPathLike @@ -20,6 +26,10 @@ "Timer", "Executor", "MFuture", + "RasterInput", + "RasterInputGroup", + "VectorInput", + "VectorInputGroup", ] __version__ = "2024.2.1" diff --git a/mapchete/config/process_func.py b/mapchete/config/process_func.py index dac84c37..3c2da447 100644 --- a/mapchete/config/process_func.py +++ b/mapchete/config/process_func.py @@ -52,7 +52,9 @@ def __init__(self, src, config_dir=None, run_compile=True): self.function_parameters = dict(**inspect.signature(func).parameters) def __call__(self, *args, **kwargs: Any) -> Any: - return self._load_func()(*args, **self.filter_parameters(kwargs)) + args = args + kwargs = self.filter_parameters(kwargs) + return self._load_func()(*args, **kwargs) def filter_parameters(self, params): """Return function kwargs.""" diff --git a/mapchete/processing/mp.py b/mapchete/processing/mp.py index abb2b022..8f35101d 100644 --- a/mapchete/processing/mp.py +++ b/mapchete/processing/mp.py @@ -1,6 +1,9 @@ +import numpy.ma as ma + from mapchete.commons import clip as commons_clip from mapchete.commons import contours as commons_contours from mapchete.commons import hillshade as commons_hillshade +from mapchete.protocols import InputTileProtocol from mapchete.validate import deprecated_kwargs @@ -77,7 +80,7 @@ def read(self, **kwargs): ) @deprecated_kwargs - def open(self, input_id, **kwargs): + def open(self, input_id, **kwargs) -> InputTileProtocol: """ Open input data. @@ -96,7 +99,9 @@ def open(self, input_id, **kwargs): raise ValueError("%s not found in config as input" % input_id) return self.input[input_id] - def hillshade(self, elevation, azimuth=315.0, altitude=45.0, z=1.0, scale=1.0): + def hillshade( + self, elevation, azimuth=315.0, altitude=45.0, z=1.0, scale=1.0 + ) -> ma.MaskedArray: """ Calculate hillshading from elevation data. @@ -122,7 +127,7 @@ def hillshade(self, elevation, azimuth=315.0, altitude=45.0, z=1.0, scale=1.0): elevation, self.tile, azimuth, altitude, z, scale ) - def contours(self, elevation, interval=100, field="elev", base=0): + def contours(self, elevation, interval=100, field="elev", base=0) -> ma.MaskedArray: """ Extract contour lines from elevation data. @@ -146,7 +151,7 @@ def contours(self, elevation, interval=100, field="elev", base=0): elevation, self.tile, interval=interval, field=field, base=base ) - def clip(self, array, geometries, inverted=False, clip_buffer=0): + def clip(self, array, geometries, inverted=False, clip_buffer=0) -> ma.MaskedArray: """ Clip array by geometry. diff --git a/mapchete/processing/tasks.py b/mapchete/processing/tasks.py index 6ad5d730..b4e5820f 100644 --- a/mapchete/processing/tasks.py +++ b/mapchete/processing/tasks.py @@ -348,14 +348,20 @@ def _execute(self, dependencies: Optional[Dict[str, TaskInfo]] = None) -> Any: task_key=task_key, result=task_result ) # Actually run process. + mp = MapcheteProcess( + tile=self.tile, + params=self.process_func_params, + input=self.input, + output_params=self.output_params, + ) + # this contains key: params mapping, where under param.annotation we can inspect for target type + extended_kwargs = dict( + self.process_func_params, + mp=mp, + **{k: v for k, v in self.input.items()}, + ) process_data = self.process( - MapcheteProcess( - tile=self.tile, - params=self.process_func_params, - input=self.input, - output_params=self.output_params, - ), - **self.process_func_params, + **extended_kwargs, ) except MapcheteNodataTile: raise diff --git a/mapchete/protocols.py b/mapchete/protocols.py index 28bfd6ce..38dcddc1 100644 --- a/mapchete/protocols.py +++ b/mapchete/protocols.py @@ -1,9 +1,24 @@ -from typing import Protocol, Tuple, runtime_checkable +from __future__ import annotations +from typing import ( + Any, + Callable, + List, + NoReturn, + Optional, + Protocol, + Tuple, + Union, + runtime_checkable, +) + +import numpy.ma as ma from affine import Affine from rasterio.crs import CRS +from shapely.geometry.base import BaseGeometry -from mapchete.types import Bounds +from mapchete.tile import BufferedTilePyramid +from mapchete.types import Bounds, BoundsLike, CRSLike, ResamplingLike, TileLike class ObserverProtocol(Protocol): @@ -21,3 +36,90 @@ class GridProtocol(Protocol): shape: Tuple[int, int] bounds: Bounds crs: CRS + + +class InputTileProtocol(Protocol): + preprocessing_tasks_results: dict = {} + input_key: str + + def read(self, **kwargs) -> Any: + """Read from input.""" + ... + + def is_empty(self) -> bool: + """Checks if input is empty here.""" + ... + + def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: + ... + + def __enter__(self) -> InputTileProtocol: + """Required for 'with' statement.""" + return self + + def __exit__(self, *args): + """Clean up.""" + + +class RasterInput(InputTileProtocol): + def read( + self, + indexes: Optional[Union[List[int], int]] = None, + resampling: Optional[ResamplingLike] = None, + **kwargs, + ) -> ma.MaskedArray: + """Read resampled array from input.""" + ... + + +class VectorInput(InputTileProtocol): + def read( + self, validity_check: bool = True, clip_to_crs_bounds: bool = False, **kwargs + ) -> List[dict]: + """Read reprojected and clipped vector features from input.""" + ... + + +RasterInputGroup = List[RasterInput] +VectorInputGroup = List[VectorInput] + + +class InputDataProtocol(Protocol): + input_key: str + pyramid: BufferedTilePyramid + pixelbuffer: int = 0 + crs: CRSLike + preprocessing_tasks: dict = {} + preprocessing_tasks_results: dict = {} + + def open(self, tile: TileLike, **kwargs) -> InputTileProtocol: + ... + + def bbox(self, out_crs: Optional[CRSLike] = None) -> BaseGeometry: + ... + + def exists(self) -> bool: + ... + + def cleanup(self) -> NoReturn: + ... + + def add_preprocessing_task( + self, + func: Callable, + fargs: Optional[tuple] = None, + fkwargs: Optional[dict] = None, + key: Optional[str] = None, + geometry: Optional[BaseGeometry] = None, + bounds: Optional[BoundsLike] = None, + ) -> NoReturn: + ... + + def get_preprocessing_task_result(self, task_key: str) -> Any: + ... + + def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: + ... + + def preprocessing_task_finished(self, task_key: str) -> bool: + ... diff --git a/mapchete/types.py b/mapchete/types.py index 32d1955e..2cc6477f 100644 --- a/mapchete/types.py +++ b/mapchete/types.py @@ -7,6 +7,7 @@ from fiona.crs import CRS as FionaCRS from pydantic import BaseModel from rasterio.crs import CRS as RasterioCRS +from rasterio.enums import Resampling from rasterio.transform import array_bounds, from_bounds from shapely.geometry import shape from shapely.geometry.base import BaseGeometry @@ -22,6 +23,7 @@ CRSLike = Union[FionaCRS, RasterioCRS] NodataVal = Optional[float] NodataVals = Union[List[NodataVal], NodataVal] +ResamplingLike = Union[Resampling, str] class Bounds(list): diff --git a/test/conftest.py b/test/conftest.py index 2a2f6bbc..39833c0d 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -612,6 +612,15 @@ def custom_grid_json(): # example mapchete configurations +@pytest.fixture +def typed_raster_input(mp_tmpdir): + """Fixture for typed_raster_input.mapchete.""" + with ProcessFixture( + TESTDATA_DIR / "typed_raster_input.mapchete", output_tempdir=mp_tmpdir + ) as example: + yield example + + @pytest.fixture def custom_grid(mp_tmpdir): """Fixture for custom_grid.mapchete.""" diff --git a/test/test_config.py b/test/test_config.py index 7a31b22e..5775e177 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -560,3 +560,8 @@ def test_dask_specs(dask_specs): assert isinstance( mp.config.parsed_config.dask_specs.adapt_options, DaskAdaptOptions ) + + +def test_typed_raster_input(typed_raster_input): + with mapchete.open(typed_raster_input.path) as mp: + list(mp.execute(concurrency=None)) diff --git a/test/testdata/typed_raster_input.mapchete b/test/testdata/typed_raster_input.mapchete new file mode 100644 index 00000000..3d11c26a --- /dev/null +++ b/test/testdata/typed_raster_input.mapchete @@ -0,0 +1,13 @@ +process: typed_raster_input.py +zoom_levels: + min: 0 + max: 5 +pyramid: + grid: geodetic +input: + raster: cleantopo_br.tif +output: + dtype: uint16 + bands: 1 + format: GTiff + path: tmp/typed_raster_input diff --git a/test/testdata/typed_raster_input.py b/test/testdata/typed_raster_input.py new file mode 100644 index 00000000..37be7946 --- /dev/null +++ b/test/testdata/typed_raster_input.py @@ -0,0 +1,19 @@ +"""Example process file.""" + +import numpy.ma as ma + +from mapchete import RasterInput + + +def execute( + raster: RasterInput, +) -> ma.MaskedArray: + """User defined process.""" + # Reading and writing data works like this: + if raster.is_empty(): + # This assures a transparent tile instead of a pink error tile + # is returned when using mapchete serve. + return "empty" + + data = raster.read(resampling="bilinear") + return data diff --git a/test/testdata/typed_raster_input_group.py b/test/testdata/typed_raster_input_group.py new file mode 100644 index 00000000..c61fc0cc --- /dev/null +++ b/test/testdata/typed_raster_input_group.py @@ -0,0 +1,18 @@ +"""Example process file.""" + +import numpy.ma as ma + +from mapchete import RasterInputGroup + + +def execute( + rasters: RasterInputGroup, +) -> ma.MaskedArray: + """User defined process.""" + for raster in rasters: + if raster.is_empty(): + return "empty" + + data = raster.read(resampling="bilinear") + + return data diff --git a/test/testdata/typed_vector_input.py b/test/testdata/typed_vector_input.py new file mode 100644 index 00000000..6dd07a14 --- /dev/null +++ b/test/testdata/typed_vector_input.py @@ -0,0 +1,19 @@ +"""Example process file.""" + +from typing import List + +from mapchete import VectorInput + + +def execute( + vector: VectorInput, +) -> List[dict]: + """User defined process.""" + # Reading and writing data works like this: + if vector.is_empty(): + # This assures a transparent tile instead of a pink error tile + # is returned when using mapchete serve. + return "empty" + + data = vector.read(validity_check=False) + return data diff --git a/test/testdata/typed_vector_input_group.py b/test/testdata/typed_vector_input_group.py new file mode 100644 index 00000000..646ffed8 --- /dev/null +++ b/test/testdata/typed_vector_input_group.py @@ -0,0 +1,18 @@ +"""Example process file.""" + +import numpy.ma as ma + +from mapchete import VectorInputGroup + + +def execute( + vectors: VectorInputGroup, +) -> ma.MaskedArray: + """User defined process.""" + for vector in vectors: + if vector.is_empty(): + return "empty" + + data = vector.read(resampling="bilinear") + + return data From cf3b9772a83656b3ceb6a09a51b4eb201c1b9a64 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 26 Feb 2024 07:18:57 +0100 Subject: [PATCH 02/28] use ABC for input base classes --- mapchete/__init__.py | 6 +- mapchete/config/base.py | 6 +- mapchete/config/models.py | 18 ++--- mapchete/formats/base.py | 142 +++++++++++++++++++--------------- mapchete/formats/protocols.py | 96 +++++++++++++++++++++++ mapchete/processing/mp.py | 2 +- mapchete/protocols.py | 104 +------------------------ mapchete/types.py | 2 + 8 files changed, 197 insertions(+), 179 deletions(-) create mode 100644 mapchete/formats/protocols.py diff --git a/mapchete/__init__.py b/mapchete/__init__.py index 1344a9e9..677ec29c 100644 --- a/mapchete/__init__.py +++ b/mapchete/__init__.py @@ -7,14 +7,14 @@ from mapchete.config import MapcheteConfig from mapchete.executor import Executor, MFuture from mapchete.formats import read_output_metadata -from mapchete.path import MPath, fs_from_path -from mapchete.processing import Mapchete, MapcheteProcess -from mapchete.protocols import ( +from mapchete.formats.protocols import ( RasterInput, RasterInputGroup, VectorInput, VectorInputGroup, ) +from mapchete.path import MPath, fs_from_path +from mapchete.processing import Mapchete, MapcheteProcess from mapchete.tile import count_tiles from mapchete.timer import Timer from mapchete.types import MPathLike diff --git a/mapchete/config/base.py b/mapchete/config/base.py index 8167decb..615f0216 100644 --- a/mapchete/config/base.py +++ b/mapchete/config/base.py @@ -140,7 +140,7 @@ def __init__( # (2) check user process self.config_dir = self.parsed_config.config_dir - if self.mode != "readonly": + if self.mode != ProcessingMode.READONLY: if self.parsed_config.process is None: raise MapcheteConfigError( f"process must be provided on {self.mode} mode" @@ -205,6 +205,10 @@ def __init__( self._params_at_zoom = raw_conf_at_zoom( self.parsed_config, self.init_zoom_levels ) + # TODO: check execute function parameters and provide warnings in case parameters + # have been omitted, are not defined in the config, or have the wrong type + # if self.process: + # breakpoint() # (6) determine process area and process boundaries both from config as well # as from initialization. diff --git a/mapchete/config/models.py b/mapchete/config/models.py index f9b012e6..36cb7262 100644 --- a/mapchete/config/models.py +++ b/mapchete/config/models.py @@ -1,7 +1,7 @@ from typing import List, Optional, Type, Union from distributed import Client -from pydantic import BaseModel, ConfigDict, Field, NonNegativeInt, field_validator +from pydantic import BaseModel, Field, NonNegativeInt, field_validator from shapely.geometry.base import BaseGeometry from mapchete.types import Bounds, BoundsLike, MPathLike, ZoomLevels, ZoomLevelsLike @@ -53,6 +53,14 @@ class DaskSpecs(BaseModel): adapt_options: DaskAdaptOptions = DaskAdaptOptions() +class DaskSettings(BaseModel): + process_graph: bool = True + max_submitted_tasks: int = 500 + chunksize: int = 100 + scheduler: Optional[str] = None + client: Optional[Type[Client]] = None + + class ProcessConfig(BaseModel, arbitrary_types_allowed=True): pyramid: PyramidConfig output: dict @@ -68,11 +76,3 @@ class ProcessConfig(BaseModel, arbitrary_types_allowed=True): bounds_crs: Optional[Union[dict, str]] = None process_parameters: dict = Field(default_factory=dict) dask_specs: Optional[DaskSpecs] = None - - -class DaskSettings(BaseModel): - process_graph: bool = True - max_submitted_tasks: int = 500 - chunksize: int = 100 - scheduler: Optional[str] = None - client: Optional[Type[Client]] = None diff --git a/mapchete/formats/base.py b/mapchete/formats/base.py index 5558f5cb..87104417 100644 --- a/mapchete/formats/base.py +++ b/mapchete/formats/base.py @@ -4,19 +4,22 @@ When writing a new driver, please inherit from these classes and implement the respective interfaces. """ - import logging import types import warnings +from abc import ABC, abstractmethod from itertools import chain +from typing import Any, NoReturn, Optional import numpy as np import numpy.ma as ma from shapely.geometry import shape +from shapely.geometry.base import BaseGeometry from mapchete.config import get_hash from mapchete.errors import MapcheteNodataTile, MapcheteProcessOutputError from mapchete.formats import write_output_metadata +from mapchete.formats.protocols import InputDataProtocol, InputTileProtocol from mapchete.io import fs_from_path, path_exists from mapchete.io.raster import ( create_mosaic, @@ -26,7 +29,8 @@ ) from mapchete.io.vector import read_vector_window from mapchete.processing.tasks import Task -from mapchete.tile import BufferedTilePyramid +from mapchete.tile import BufferedTile, BufferedTilePyramid +from mapchete.types import CRSLike logger = logging.getLogger(__name__) @@ -34,7 +38,63 @@ DEFAULT_TILE_PATH_SCHEMA = "{zoom}/{row}/{col}.{extension}" -class InputData(object): +class InputTile(InputTileProtocol, ABC): + """ + Target Tile representation of input data. + + Parameters + ---------- + tile : ``Tile`` + kwargs : keyword arguments + driver specific parameters + """ + + preprocessing_tasks_results = {} + input_key = None + + def __init__(self, tile: BufferedTile, **kwargs): + """Initialize.""" + + @abstractmethod + def read(self, **kwargs) -> Any: + """ + Read reprojected & resampled input data. + + Returns + ------- + data : array or list + NumPy array for raster data or feature list for vector data + """ + ... + + @abstractmethod + def is_empty(self) -> bool: + """ + Check if there is data within this tile. + + Returns + ------- + is empty : bool + """ + ... + + def set_preprocessing_task_result( + self, task_key: str, result: Any = None + ) -> NoReturn: + """ + Adds a preprocessing task result. + """ + self.preprocessing_tasks_results[task_key] = result + + def __enter__(self): + """Required for 'with' statement.""" + return self + + def __exit__(self, t, v, tb): + """Clean up.""" + + +class InputData(InputDataProtocol, ABC): """ Template class handling geographic input data. @@ -53,9 +113,15 @@ class InputData(object): object describing the process coordinate reference system """ + input_key: str + pyramid: BufferedTilePyramid + pixelbuffer: int + crs: CRSLike + preprocessing_tasks: dict + preprocessing_tasks_results: dict METADATA = {"driver_name": None, "data_type": None, "mode": "r"} - def __init__(self, input_params, input_key=None, **kwargs): + def __init__(self, input_params: dict, input_key: str, **kwargs): """Initialize relevant input information.""" self.input_key = input_key self.pyramid = input_params.get("pyramid") @@ -69,7 +135,8 @@ def __init__(self, input_params, input_key=None, **kwargs): "storage_options", {} ) - def open(self, tile, **kwargs): + @abstractmethod + def open(self, tile: BufferedTile, **kwargs) -> InputTileProtocol: """ Return InputTile object. @@ -82,9 +149,10 @@ def open(self, tile, **kwargs): input tile : ``InputTile`` tile view of input data """ - raise NotImplementedError + ... - def bbox(self, out_crs=None): + @abstractmethod + def bbox(self, out_crs: Optional[CRSLike] = None) -> BaseGeometry: """ Return data bounding box. @@ -98,9 +166,9 @@ def bbox(self, out_crs=None): bounding box : geometry Shapely geometry object """ - raise NotImplementedError + ... - def exists(self): + def exists(self) -> bool: """ Check if data or file even exists. @@ -108,9 +176,9 @@ def exists(self): ------- file exists : bool """ - raise NotImplementedError + ... - def cleanup(self): + def cleanup(self) -> NoReturn: """Optional cleanup function called when Mapchete exits.""" def add_preprocessing_task( @@ -181,58 +249,6 @@ def preprocessing_task_finished(self, task_key): return task_key in self.preprocessing_tasks_results -class InputTile(object): - """ - Target Tile representation of input data. - - Parameters - ---------- - tile : ``Tile`` - kwargs : keyword arguments - driver specific parameters - """ - - preprocessing_tasks_results = {} - input_key = None - - def __init__(self, tile, **kwargs): - """Initialize.""" - - def read(self, **kwargs): - """ - Read reprojected & resampled input data. - - Returns - ------- - data : array or list - NumPy array for raster data or feature list for vector data - """ - raise NotImplementedError - - def is_empty(self): - """ - Check if there is data within this tile. - - Returns - ------- - is empty : bool - """ - raise NotImplementedError - - def set_preprocessing_task_result(self, task_key=None, result=None): - """ - Adds a preprocessing task result. - """ - self.preprocessing_tasks_results[task_key] = result - - def __enter__(self): - """Required for 'with' statement.""" - return self - - def __exit__(self, t, v, tb): - """Clean up.""" - - class OutputDataBaseFunctions: write_in_parent_process = False diff --git a/mapchete/formats/protocols.py b/mapchete/formats/protocols.py new file mode 100644 index 00000000..03286178 --- /dev/null +++ b/mapchete/formats/protocols.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +from typing import Any, Callable, List, NoReturn, Optional, Protocol + +import numpy.ma as ma +from shapely.geometry.base import BaseGeometry + +from mapchete.tile import BufferedTilePyramid +from mapchete.types import BandIndexes, BoundsLike, CRSLike, ResamplingLike, TileLike + + +class InputTileProtocol(Protocol): + preprocessing_tasks_results: dict = {} + input_key: str + + def read(self, **kwargs) -> Any: + """Read from input.""" + ... + + def is_empty(self) -> bool: + """Checks if input is empty here.""" + ... + + def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: + ... + + def __enter__(self) -> InputTileProtocol: + """Required for 'with' statement.""" + return self + + def __exit__(self, *args): + """Clean up.""" + + +class RasterInput(InputTileProtocol): + def read( + self, + indexes: Optional[BandIndexes] = None, + resampling: Optional[ResamplingLike] = None, + **kwargs, + ) -> ma.MaskedArray: + """Read resampled array from input.""" + ... + + +class VectorInput(InputTileProtocol): + def read( + self, validity_check: bool = True, clip_to_crs_bounds: bool = False, **kwargs + ) -> List[dict]: + """Read reprojected and clipped vector features from input.""" + ... + + +RasterInputGroup = List[RasterInput] +VectorInputGroup = List[VectorInput] + + +class InputDataProtocol(Protocol): + input_key: str + pyramid: BufferedTilePyramid + pixelbuffer: int = 0 + crs: CRSLike + preprocessing_tasks: dict = {} + preprocessing_tasks_results: dict = {} + + def open(self, tile: TileLike, **kwargs) -> InputTileProtocol: + ... + + def bbox(self, out_crs: Optional[CRSLike] = None) -> BaseGeometry: + ... + + def exists(self) -> bool: + ... + + def cleanup(self) -> NoReturn: + ... + + def add_preprocessing_task( + self, + func: Callable, + fargs: Optional[tuple] = None, + fkwargs: Optional[dict] = None, + key: Optional[str] = None, + geometry: Optional[BaseGeometry] = None, + bounds: Optional[BoundsLike] = None, + ) -> NoReturn: + ... + + def get_preprocessing_task_result(self, task_key: str) -> Any: + ... + + def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: + ... + + def preprocessing_task_finished(self, task_key: str) -> bool: + ... diff --git a/mapchete/processing/mp.py b/mapchete/processing/mp.py index 8f35101d..09ce2f7f 100644 --- a/mapchete/processing/mp.py +++ b/mapchete/processing/mp.py @@ -3,7 +3,7 @@ from mapchete.commons import clip as commons_clip from mapchete.commons import contours as commons_contours from mapchete.commons import hillshade as commons_hillshade -from mapchete.protocols import InputTileProtocol +from mapchete.formats.protocols import InputTileProtocol from mapchete.validate import deprecated_kwargs diff --git a/mapchete/protocols.py b/mapchete/protocols.py index 38dcddc1..a49596cb 100644 --- a/mapchete/protocols.py +++ b/mapchete/protocols.py @@ -1,24 +1,11 @@ from __future__ import annotations -from typing import ( - Any, - Callable, - List, - NoReturn, - Optional, - Protocol, - Tuple, - Union, - runtime_checkable, -) +from typing import Protocol, Tuple, runtime_checkable -import numpy.ma as ma from affine import Affine from rasterio.crs import CRS -from shapely.geometry.base import BaseGeometry -from mapchete.tile import BufferedTilePyramid -from mapchete.types import Bounds, BoundsLike, CRSLike, ResamplingLike, TileLike +from mapchete.types import Bounds class ObserverProtocol(Protocol): @@ -36,90 +23,3 @@ class GridProtocol(Protocol): shape: Tuple[int, int] bounds: Bounds crs: CRS - - -class InputTileProtocol(Protocol): - preprocessing_tasks_results: dict = {} - input_key: str - - def read(self, **kwargs) -> Any: - """Read from input.""" - ... - - def is_empty(self) -> bool: - """Checks if input is empty here.""" - ... - - def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: - ... - - def __enter__(self) -> InputTileProtocol: - """Required for 'with' statement.""" - return self - - def __exit__(self, *args): - """Clean up.""" - - -class RasterInput(InputTileProtocol): - def read( - self, - indexes: Optional[Union[List[int], int]] = None, - resampling: Optional[ResamplingLike] = None, - **kwargs, - ) -> ma.MaskedArray: - """Read resampled array from input.""" - ... - - -class VectorInput(InputTileProtocol): - def read( - self, validity_check: bool = True, clip_to_crs_bounds: bool = False, **kwargs - ) -> List[dict]: - """Read reprojected and clipped vector features from input.""" - ... - - -RasterInputGroup = List[RasterInput] -VectorInputGroup = List[VectorInput] - - -class InputDataProtocol(Protocol): - input_key: str - pyramid: BufferedTilePyramid - pixelbuffer: int = 0 - crs: CRSLike - preprocessing_tasks: dict = {} - preprocessing_tasks_results: dict = {} - - def open(self, tile: TileLike, **kwargs) -> InputTileProtocol: - ... - - def bbox(self, out_crs: Optional[CRSLike] = None) -> BaseGeometry: - ... - - def exists(self) -> bool: - ... - - def cleanup(self) -> NoReturn: - ... - - def add_preprocessing_task( - self, - func: Callable, - fargs: Optional[tuple] = None, - fkwargs: Optional[dict] = None, - key: Optional[str] = None, - geometry: Optional[BaseGeometry] = None, - bounds: Optional[BoundsLike] = None, - ) -> NoReturn: - ... - - def get_preprocessing_task_result(self, task_key: str) -> Any: - ... - - def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: - ... - - def preprocessing_task_finished(self, task_key: str) -> bool: - ... diff --git a/mapchete/types.py b/mapchete/types.py index 2cc6477f..3fc88328 100644 --- a/mapchete/types.py +++ b/mapchete/types.py @@ -24,6 +24,8 @@ NodataVal = Optional[float] NodataVals = Union[List[NodataVal], NodataVal] ResamplingLike = Union[Resampling, str] +BandIndex = int +BandIndexes = Union[BandIndex, List[BandIndex]] class Bounds(list): From 01de95540791845b48429f43db53a3b96a462040 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 26 Feb 2024 20:36:55 +0100 Subject: [PATCH 03/28] deprecate mapchete.commons module --- mapchete/__init__.py | 2 + mapchete/commons/__init__.py | 8 - mapchete/commons/clip.py | 72 --------- mapchete/commons/contours.py | 69 --------- mapchete/commons/hillshade.py | 142 ----------------- mapchete/config/base.py | 10 +- mapchete/config/models.py | 6 + mapchete/config/parse.py | 12 +- mapchete/config/process_func.py | 36 ++++- mapchete/formats/protocols.py | 6 +- mapchete/io/__init__.py | 2 + mapchete/io/_misc.py | 21 ++- mapchete/io/raster/array.py | 74 ++++++++- mapchete/processes/contours.py | 139 ++++++++++++----- mapchete/processes/hillshade.py | 266 ++++++++++++++++++++++---------- mapchete/processing/mp.py | 27 ++-- 16 files changed, 449 insertions(+), 443 deletions(-) delete mode 100644 mapchete/commons/__init__.py delete mode 100644 mapchete/commons/clip.py delete mode 100644 mapchete/commons/contours.py delete mode 100644 mapchete/commons/hillshade.py diff --git a/mapchete/__init__.py b/mapchete/__init__.py index 677ec29c..0aab4291 100644 --- a/mapchete/__init__.py +++ b/mapchete/__init__.py @@ -5,6 +5,7 @@ from fsspec import AbstractFileSystem from mapchete.config import MapcheteConfig +from mapchete.errors import MapcheteNodataTile from mapchete.executor import Executor, MFuture from mapchete.formats import read_output_metadata from mapchete.formats.protocols import ( @@ -25,6 +26,7 @@ "MapcheteProcess", "Timer", "Executor", + "MapcheteNodataTile", "MFuture", "RasterInput", "RasterInputGroup", diff --git a/mapchete/commons/__init__.py b/mapchete/commons/__init__.py deleted file mode 100644 index 2f176fd0..00000000 --- a/mapchete/commons/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Useful user functions. - -* ``clip()``: Clips raster data with vector geometries. -* ``contours()``: Extract contour lines from elevation raster. -* ``hillshade()``: Generate hillshade from elevation raster. - -""" diff --git a/mapchete/commons/clip.py b/mapchete/commons/clip.py deleted file mode 100644 index f50f5fab..00000000 --- a/mapchete/commons/clip.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Clip array using vector data.""" -import numpy as np -import numpy.ma as ma -from rasterio.features import geometry_mask -from shapely.ops import unary_union - -from mapchete.io.vector import to_shape - - -def clip_array_with_vector( - array, array_affine, geometries, inverted=False, clip_buffer=0 -): - """ - Clip input array with a vector list. - - Parameters - ---------- - array : array - input raster data - array_affine : Affine - Affine object describing the raster's geolocation - geometries : iterable - iterable of dictionaries, where every entry has a 'geometry' and - 'properties' key. - inverted : bool - invert clip (default: False) - clip_buffer : integer - buffer (in pixels) geometries before clipping - - Returns - ------- - clipped array : array - """ - # buffer input geometries and clean up - buffered_geometries = [] - for feature in geometries: - feature_geom = to_shape(feature["geometry"]) - if feature_geom.is_empty: - continue - if feature_geom.geom_type == "GeometryCollection": - # for GeometryCollections apply buffer to every subgeometry - # and make union - buffered_geom = unary_union( - [g.buffer(clip_buffer) for g in feature_geom.geoms] - ) - else: - buffered_geom = feature_geom.buffer(clip_buffer) - if not buffered_geom.is_empty: - buffered_geometries.append(buffered_geom) - - # mask raster by buffered geometries - if buffered_geometries: - if array.ndim == 2: - return ma.masked_array( - array, - geometry_mask( - buffered_geometries, array.shape, array_affine, invert=inverted - ), - ) - elif array.ndim == 3: - mask = geometry_mask( - buffered_geometries, - (array.shape[1], array.shape[2]), - array_affine, - invert=inverted, - ) - return ma.masked_array(array, mask=np.stack([mask for band in array])) - - # if no geometries, return unmasked array - else: - fill = False if inverted else True - return ma.masked_array(array, mask=np.full(array.shape, fill, dtype=bool)) diff --git a/mapchete/commons/contours.py b/mapchete/commons/contours.py deleted file mode 100644 index 2a7feffe..00000000 --- a/mapchete/commons/contours.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Contour line extraction using matplotlib.""" - -from shapely.geometry import LineString, mapping - - -def extract_contours(array, tile, interval=100, field="elev", base=0): - """ - Extract contour lines from an array. - - Parameters - ---------- - array : array - Input elevation data. - tile : Tile - Tile covering the array. - interval : integer - Elevation value interval when drawing contour lines. - field : string - Output field name containing elevation value. - base : integer - Elevation base value the intervals are computed from. - - Returns - ------- - contours : iterable - contours as GeoJSON-like pairs of properties and geometry - """ - import matplotlib.pyplot as plt - - levels = _get_contour_values(array.min(), array.max(), interval=interval, base=base) - if not levels: - return [] - contours = plt.contour(array, levels) - index = 0 - out_contours = [] - for level in range(len(contours.collections)): - elevation = levels[index] - index += 1 - paths = contours.collections[level].get_paths() - for path in paths: - out_coords = [ - ( - tile.left + (y * tile.pixel_x_size), - tile.top - (x * tile.pixel_y_size), - ) - for x, y in zip(path.vertices[:, 1], path.vertices[:, 0]) - ] - if len(out_coords) >= 2: - out_contours.append( - dict( - properties={field: elevation}, - geometry=mapping(LineString(out_coords)), - ) - ) - return out_contours - - -def _get_contour_values(min_val, max_val, base=0, interval=100): - """Return a list of values between min and max within an interval.""" - i = base - out = [] - if min_val < base: - while i >= min_val: - i -= interval - while i <= max_val: - if i >= min_val: - out.append(i) - i += interval - return out diff --git a/mapchete/commons/hillshade.py b/mapchete/commons/hillshade.py deleted file mode 100644 index 8bd0020c..00000000 --- a/mapchete/commons/hillshade.py +++ /dev/null @@ -1,142 +0,0 @@ -""" -Calculate hillshade and slopeshade. - -Original code is from: -https://github.com/migurski/DEM-Tools/blob/master/Hillup/data/__init__.py#L288-L318 - -License ------------------------ -Copyright (c) 2011, Michal Migurski, Nelson Minar - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the followg conditions are met: - -- Redistributions of source code must retain the above copyright notice, - this list of conditions and the followg disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the followg disclaimer in the documentation - and/or other materials provided with the distribution. -- Neither the name of the project nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" -import math -import warnings -from itertools import product - -import numpy as np -import numpy.ma as ma - - -def calculate_slope_aspect(elevation, xres, yres, z=1.0, scale=1.0): - """ - Calculate slope and aspect map. - - Return a pair of arrays 2 pixels smaller than the input elevation array. - - Slope is returned in radians, from 0 for sheer face to pi/2 for - flat ground. Aspect is returned in radians, counterclockwise from -pi - at north around to pi. - - Logic here is borrowed from hillshade.cpp: - http://www.perrygeo.net/wordpress/?p=7 - - Parameters - ---------- - elevation : array - input elevation data - xres : float - column width - yres : float - row height - z : float - vertical exaggeration factor - scale : float - scale factor of pixel size units versus height units (insert 112000 - when having elevation values in meters in a geodetic projection) - - Returns - ------- - slope shade : array - """ - with warnings.catch_warnings(): - # this is to filter out division by zero warnings - warnings.simplefilter("ignore", category=RuntimeWarning) - z = float(z) - scale = float(scale) - height, width = elevation.shape[0] - 2, elevation.shape[1] - 2 - w = [ - z * elevation[row : (row + height), col : (col + width)] - for (row, col) in product(range(3), range(3)) - ] - x = ((w[0] + w[3] + w[3] + w[6]) - (w[2] + w[5] + w[5] + w[8])) / ( - 8.0 * xres * scale - ) - y = ((w[6] + w[7] + w[7] + w[8]) - (w[0] + w[1] + w[1] + w[2])) / ( - 8.0 * yres * scale - ) - # in radians, from 0 to pi/2 - slope = math.pi / 2 - np.arctan(np.sqrt(x * x + y * y)) - # in radians counterclockwise, from -pi at north back to pi - aspect = np.arctan2(x, y) - return slope, aspect - - -def hillshade( - elevation, - tile, - azimuth=315.0, - altitude=45.0, - z=1.0, - scale=1.0, -): - """ - Return hillshaded numpy array. - - Parameters - ---------- - elevation : array - Input elevation data. - tile : Tile - Tile covering the array. - azimuth : float - Light source direction in degrees. (default: 315, top left) - altitude : float - Light source altitude angle in degrees. (default: 45) - z : float - Vertical DEM exaggeration factor. (default: 1) - scale : float - Scale factor of pixel size units versus height units (insert 112000 - when having elevation values in meters in a geodetic projection). - """ - elevation = elevation[0] if elevation.ndim == 3 else elevation - azimuth = float(azimuth) - altitude = float(altitude) - z = float(z) - scale = float(scale) - xres = tile.pixel_x_size - yres = -tile.pixel_y_size - slope, aspect = calculate_slope_aspect(elevation, xres, yres, z=z, scale=scale) - deg2rad = math.pi / 180.0 - # shaded has values between -1.0 and +1.0 - shaded = np.sin(altitude * deg2rad) * np.sin(slope) + np.cos( - altitude * deg2rad - ) * np.cos(slope) * np.cos((azimuth - 90.0) * deg2rad - aspect) - # stretch to 0 - 255 and add one pixel padding using the edge values - return ma.masked_array( - data=np.pad(np.clip(shaded * 255.0, 1, 255).astype("uint8"), 1, mode="edge"), - mask=elevation.mask, - ) diff --git a/mapchete/config/base.py b/mapchete/config/base.py index 615f0216..3c3be453 100644 --- a/mapchete/config/base.py +++ b/mapchete/config/base.py @@ -17,6 +17,7 @@ guess_geometry, parse_config, raw_conf_at_zoom, + zoom_parameters, ) from mapchete.config.process_func import ProcessFunc from mapchete.enums import ProcessingMode @@ -207,8 +208,13 @@ def __init__( ) # TODO: check execute function parameters and provide warnings in case parameters # have been omitted, are not defined in the config, or have the wrong type - # if self.process: - # breakpoint() + if self.process: + self.process.analyze_parameters( + { + zoom: zoom_parameters(self.parsed_config, zoom) + for zoom in self.init_zoom_levels + } + ) # (6) determine process area and process boundaries both from config as well # as from initialization. diff --git a/mapchete/config/models.py b/mapchete/config/models.py index 36cb7262..9dd5ccfc 100644 --- a/mapchete/config/models.py +++ b/mapchete/config/models.py @@ -1,3 +1,4 @@ +from collections import OrderedDict from typing import List, Optional, Type, Union from distributed import Client @@ -76,3 +77,8 @@ class ProcessConfig(BaseModel, arbitrary_types_allowed=True): bounds_crs: Optional[Union[dict, str]] = None process_parameters: dict = Field(default_factory=dict) dask_specs: Optional[DaskSpecs] = None + + +class ZoomParameters(BaseModel): + input: OrderedDict = Field(default_factory=OrderedDict) + process_parameters: OrderedDict = Field(default_factory=OrderedDict) diff --git a/mapchete/config/parse.py b/mapchete/config/parse.py index 66d1342f..9c6b27cd 100644 --- a/mapchete/config/parse.py +++ b/mapchete/config/parse.py @@ -10,7 +10,7 @@ from shapely.geometry.base import BaseGeometry from shapely.ops import unary_union -from mapchete.config.models import ProcessConfig +from mapchete.config.models import ProcessConfig, ZoomParameters from mapchete.errors import GeometryTypeError, MapcheteConfigError from mapchete.io.vector import clean_geometry_type, fiona_open, reproject_geometry from mapchete.path import MPath @@ -146,6 +146,16 @@ def raw_conf_at_zoom(config: ProcessConfig, zooms: ZoomLevels) -> OrderedDict: return OrderedDict(params_per_zoom) +def zoom_parameters(config: ProcessConfig, zoom: int) -> ZoomParameters: + """Return parameter dictionary per zoom level.""" + params = dict() + for name, element in config.model_dump().items(): + out_element = element_at_zoom(name, element, zoom) + if out_element is not None: + params[name] = out_element + return ZoomParameters(**params) + + def element_at_zoom(name: str, element: Any, zoom: int) -> Any: """ Return the element filtered by zoom level. diff --git a/mapchete/config/process_func.py b/mapchete/config/process_func.py index 3c2da447..f9d6f099 100644 --- a/mapchete/config/process_func.py +++ b/mapchete/config/process_func.py @@ -3,9 +3,11 @@ import logging import py_compile import sys +import warnings from tempfile import NamedTemporaryFile -from typing import Any +from typing import Any, Dict, NoReturn +from mapchete.config.models import ZoomParameters from mapchete.errors import ( MapcheteConfigError, MapcheteProcessImportError, @@ -56,6 +58,38 @@ def __call__(self, *args, **kwargs: Any) -> Any: kwargs = self.filter_parameters(kwargs) return self._load_func()(*args, **kwargs) + def analyze_parameters( + self, parameters_per_zoom: Dict[int, ZoomParameters] + ) -> NoReturn: + for zoom, config_parameters in parameters_per_zoom.items(): + # make sure parameters with no defaults are given, except of magical "mp" object + for name, param in self.function_parameters.items(): + if param.default == inspect.Parameter.empty and name not in [ + "mp", + "kwargs", + ]: + if ( + name not in config_parameters.input + and name not in config_parameters.process_parameters + ): + raise MapcheteConfigError( + f"zoom {zoom}: parameter '{name}' is required by process function but not provided in the process configuration" + ) + # make sure there is no intersection between process parameters and input keys + param_intersection = set(config_parameters.input.keys()).intersection( + set(config_parameters.process_parameters.keys()) + ) + if param_intersection: + raise MapcheteConfigError( + f"zoom {zoom}: parameters {', '.join(list(param_intersection))} are provided as both input names as well as process parameter names" + ) + # warn if there are process parameters not available in the process + for param_name in config_parameters.process_parameters.keys(): + if param_name not in self.function_parameters: + warnings.warn( + f"zoom {zoom}: parameter '{param_name}' is set in the process configuration but not a process function parameter" + ) + def filter_parameters(self, params): """Return function kwargs.""" return { diff --git a/mapchete/formats/protocols.py b/mapchete/formats/protocols.py index 03286178..78c30c27 100644 --- a/mapchete/formats/protocols.py +++ b/mapchete/formats/protocols.py @@ -5,13 +5,15 @@ import numpy.ma as ma from shapely.geometry.base import BaseGeometry -from mapchete.tile import BufferedTilePyramid +from mapchete.protocols import GridProtocol +from mapchete.tile import BufferedTile, BufferedTilePyramid from mapchete.types import BandIndexes, BoundsLike, CRSLike, ResamplingLike, TileLike -class InputTileProtocol(Protocol): +class InputTileProtocol(GridProtocol): preprocessing_tasks_results: dict = {} input_key: str + tile: BufferedTile def read(self, **kwargs) -> Any: """Read from input.""" diff --git a/mapchete/io/__init__.py b/mapchete/io/__init__.py index 728267a8..126cca65 100644 --- a/mapchete/io/__init__.py +++ b/mapchete/io/__init__.py @@ -2,6 +2,7 @@ from mapchete.io._json import read_json, write_json from mapchete.io._misc import ( + MatchingMethod, copy, get_best_zoom_level, get_boto3_bucket, @@ -29,6 +30,7 @@ "get_best_zoom_level", "get_segmentize_value", "tile_to_zoom_level", + "MatchingMethod", "path_is_remote", "path_exists", "tiles_exist", diff --git a/mapchete/io/_misc.py b/mapchete/io/_misc.py index fac850ee..3569cfd4 100644 --- a/mapchete/io/_misc.py +++ b/mapchete/io/_misc.py @@ -1,4 +1,5 @@ import logging +from enum import Enum import rasterio from rasterio.warp import calculate_default_transform @@ -7,7 +8,7 @@ from mapchete.io._geometry_operations import reproject_geometry, segmentize_geometry from mapchete.path import MPath -from mapchete.tile import BufferedTilePyramid +from mapchete.tile import BufferedTile, BufferedTilePyramid logger = logging.getLogger(__name__) @@ -75,7 +76,17 @@ def get_segmentize_value(input_file=None, tile_pyramid=None): return pixelsize * tile_pyramid.tile_size -def tile_to_zoom_level(tile, dst_pyramid=None, matching_method="gdal", precision=8): +class MatchingMethod(str, Enum): + gdal = "gdal" + min = "min" + + +def tile_to_zoom_level( + tile: BufferedTile, + dst_pyramid: BufferedTilePyramid, + matching_method: MatchingMethod = MatchingMethod.gdal, + precision: int = 8, +): """ Determine the best zoom level in target TilePyramid from given Tile. @@ -83,7 +94,7 @@ def tile_to_zoom_level(tile, dst_pyramid=None, matching_method="gdal", precision ---------- tile : BufferedTile dst_pyramid : BufferedTilePyramid - matching_method : str ('gdal' or 'min') + matching_method : MatchingMethod ('gdal' or 'min') gdal: Uses GDAL's standard method. Here, the target resolution is calculated by averaging the extent's pixel sizes over both x and y axes. This approach returns a zoom level which may not have the best quality but will speed up @@ -120,7 +131,7 @@ def width_height(bounds): if tile.tp.crs == dst_pyramid.crs: return tile.zoom else: - if matching_method == "gdal": + if matching_method == MatchingMethod.gdal: # use rasterio/GDAL method to calculate default warp target properties # enabling CHECK_WITH_INVERT_PROJ fixes #269, otherwise this function would # return a non-optimal zoom level for reprojection @@ -130,7 +141,7 @@ def width_height(bounds): ) # this is the resolution the tile would have in destination CRS tile_resolution = round(transform[0], precision) - elif matching_method == "min": + elif matching_method == MatchingMethod.min: # calculate the minimum pixel size from the four tile corner pixels l, b, r, t = tile.bounds x = tile.pixel_x_size diff --git a/mapchete/io/raster/array.py b/mapchete/io/raster/array.py index cf3a3036..2b24b491 100644 --- a/mapchete/io/raster/array.py +++ b/mapchete/io/raster/array.py @@ -1,16 +1,19 @@ import itertools import logging import warnings -from typing import Iterable, Optional, Tuple, Union +from typing import Iterable, List, Optional, Tuple, Union import numpy as np import numpy.ma as ma from affine import Affine from numpy.typing import DTypeLike from rasterio.enums import Resampling +from rasterio.features import geometry_mask from rasterio.warp import reproject from rasterio.windows import from_bounds +from shapely.ops import unary_union +from mapchete.io.vector import to_shape from mapchete.protocols import GridProtocol from mapchete.types import BoundsLike, CRSLike, NodataVal @@ -285,3 +288,72 @@ def prepare_masked_array( return ma.masked_values(data.astype(dtype, copy=False), nodata, copy=False) else: return ma.filled(data.astype(dtype, copy=False), nodata) + + +def clip_array_with_vector( + array: np.ndarray, + array_affine: Affine, + geometries: List[dict], + inverted: bool = False, + clip_buffer: int = 0, +): + """ + Clip input array with a vector list. + + Parameters + ---------- + array : array + input raster data + array_affine : Affine + Affine object describing the raster's geolocation + geometries : iterable + iterable of dictionaries, where every entry has a 'geometry' and + 'properties' key. + inverted : bool + invert clip (default: False) + clip_buffer : integer + buffer (in pixels) geometries before clipping + + Returns + ------- + clipped array : array + """ + # buffer input geometries and clean up + buffered_geometries = [] + for feature in geometries: + feature_geom = to_shape(feature["geometry"]) + if feature_geom.is_empty: + continue + if feature_geom.geom_type == "GeometryCollection": + # for GeometryCollections apply buffer to every subgeometry + # and make union + buffered_geom = unary_union( + [g.buffer(clip_buffer) for g in feature_geom.geoms] + ) + else: + buffered_geom = feature_geom.buffer(clip_buffer) + if not buffered_geom.is_empty: + buffered_geometries.append(buffered_geom) + + # mask raster by buffered geometries + if buffered_geometries: + if array.ndim == 2: + return ma.masked_array( + array, + geometry_mask( + buffered_geometries, array.shape, array_affine, invert=inverted + ), + ) + elif array.ndim == 3: + mask = geometry_mask( + buffered_geometries, + (array.shape[1], array.shape[2]), + array_affine, + invert=inverted, + ) + return ma.masked_array(array, mask=np.stack([mask for band in array])) + + # if no geometries, return unmasked array + else: + fill = False if inverted else True + return ma.masked_array(array, mask=np.full(array.shape, fill, dtype=bool)) diff --git a/mapchete/processes/contours.py b/mapchete/processes/contours.py index fef917a8..93c0cb55 100644 --- a/mapchete/processes/contours.py +++ b/mapchete/processes/contours.py @@ -1,24 +1,32 @@ +"""Contour line extraction using matplotlib.""" import logging +from typing import List, Optional -from shapely.geometry import mapping, shape +import numpy as np +from shapely.geometry import LineString, mapping, shape from shapely.ops import unary_union +from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete.io import MatchingMethod +from mapchete.tile import BufferedTile +from mapchete.types import ResamplingLike + logger = logging.getLogger(__name__) def execute( - mp, - resampling="nearest", - interval=100, - field="elev", - base=0, - td_matching_method="gdal", - td_matching_max_zoom=None, - td_matching_precision=8, - td_fallback_to_higher_zoom=False, + dem: RasterInput, + clip: Optional[VectorInput], + resampling: ResamplingLike = "nearest", + interval: float = 100, + field: str = "elev", + base: float = 0, + td_matching_method: MatchingMethod = MatchingMethod.gdal, + td_matching_max_zoom: Optional[int] = None, + td_matching_precision: int = 8, + td_fallback_to_higher_zoom: bool = False, clip_pixelbuffer=0, - **kwargs -): +) -> List[dict]: """ Generate hillshade from DEM. @@ -65,48 +73,47 @@ def execute( list of GeoJSON-like features """ # read clip geometry - if "clip" in mp.params["input"]: - clip_geom = mp.open("clip").read() + if clip: + clip_geom = [] if not clip_geom: logger.debug("no clip data over tile") - return "empty" - else: - clip_geom = [] + raise MapcheteNodataTile - with mp.open( - "dem", - ) as dem: - logger.debug("reading input raster") - dem_data = dem.read( - 1, - resampling=resampling, - matching_method=td_matching_method, - matching_max_zoom=td_matching_max_zoom, - matching_precision=td_matching_precision, - fallback_to_higher_zoom=td_fallback_to_higher_zoom, - ) - if dem_data.mask.all(): - logger.debug("raster empty") - return "empty" + if dem.is_empty(): + raise MapcheteNodataTile + + logger.debug("reading input raster") + dem_data = dem.read( + 1, + resampling=resampling, + matching_method=td_matching_method, + matching_max_zoom=td_matching_max_zoom, + matching_precision=td_matching_precision, + fallback_to_higher_zoom=td_fallback_to_higher_zoom, + ) + if dem_data.mask.all(): + logger.debug("raster empty") + raise MapcheteNodataTile logger.debug("calculate hillshade") - contours = mp.contours( + contour_lines = contours( dem_data, + dem.tile, interval=interval, field=field, base=base, ) - if clip_geom: + if clip: logger.debug("clipping output with geometry") # use inverted clip geometry to extract contours - clip_geom = mp.tile.bbox.difference( + clip_geom = dem.tile.bbox.difference( unary_union([shape(i["geometry"]) for i in clip_geom]).buffer( - clip_pixelbuffer * mp.tile.pixel_x_size + clip_pixelbuffer * dem.tile.pixel_x_size ) ) out_contours = [] - for contour in contours: + for contour in contour_lines: out_geom = shape(contour["geometry"]).intersection(clip_geom) if not out_geom.is_empty: out_contours.append( @@ -117,4 +124,60 @@ def execute( ) return out_contours else: - return contours + return contour_lines + + +def contours( + array: np.ndarray, + tile: BufferedTile, + interval: float = 100, + field: str = "elev", + base: float = 0, +) -> List[dict]: + """ + Extract contour lines from an array. + """ + import matplotlib.pyplot as plt + + levels = _get_contour_values(array.min(), array.max(), interval=interval, base=base) + if not levels: + return [] + contours = plt.contour(array, levels) + index = 0 + out_contours = [] + for level in range(len(contours.collections)): + elevation = levels[index] + index += 1 + paths = contours.collections[level].get_paths() + for path in paths: + out_coords = [ + ( + tile.left + (y * tile.pixel_x_size), + tile.top - (x * tile.pixel_y_size), + ) + for x, y in zip(path.vertices[:, 1], path.vertices[:, 0]) + ] + if len(out_coords) >= 2: + out_contours.append( + dict( + properties={field: elevation}, + geometry=mapping(LineString(out_coords)), + ) + ) + return out_contours + + +def _get_contour_values( + min_val: float, max_val: float, base: float = 0, interval: float = 100 +): + """Return a list of values between min and max within an interval.""" + i = base + out = [] + if min_val < base: + while i >= min_val: + i -= interval + while i <= max_val: + if i >= min_val: + out.append(i) + i += interval + return out diff --git a/mapchete/processes/hillshade.py b/mapchete/processes/hillshade.py index ef959d58..117b456d 100644 --- a/mapchete/processes/hillshade.py +++ b/mapchete/processes/hillshade.py @@ -1,36 +1,179 @@ +""" +Calculate hillshade and slopeshade. + +Original code is from: +https://github.com/migurski/DEM-Tools/blob/master/Hillup/data/__init__.py#L288-L318 + +License +----------------------- +Copyright (c) 2011, Michal Migurski, Nelson Minar + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the followg conditions are met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the followg disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the followg disclaimer in the documentation + and/or other materials provided with the distribution. +- Neither the name of the project nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" import logging +import math +import warnings +from itertools import product +from typing import Optional, Tuple + +import numpy as np +import numpy.ma as ma +from affine import Affine + +from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete.io import MatchingMethod +from mapchete.types import ResamplingLike logger = logging.getLogger(__name__) def execute( - mp, - resampling="nearest", - azimuth=315.0, - altitude=45.0, - z=1.0, - scale=1.0, - td_matching_method="gdal", - td_matching_max_zoom=None, - td_matching_precision=8, - td_fallback_to_higher_zoom=False, - clip_pixelbuffer=0, - **kwargs -): + dem: RasterInput, + clip: Optional[VectorInput], + resampling: ResamplingLike = "nearest", + azimuth: float = 315.0, + altitude: float = 45.0, + z: float = 1.0, + scale: float = 1.0, + td_matching_method: MatchingMethod = MatchingMethod.gdal, + td_matching_max_zoom: Optional[int] = None, + td_matching_precision: int = 8, + td_fallback_to_higher_zoom: bool = False, +) -> ma.MaskedArray: """ - Extract contour lines from DEM. + Calculate hillshade from elevation. + """ + # read clip geometry + if clip is None: + clip_geom = [] + else: + clip_geom = clip.read() + if not clip_geom: + logger.debug("no clip data over tile") + raise MapcheteNodataTile + + if dem.is_empty(): + raise MapcheteNodataTile + + logger.debug("reading input raster") + elevation_data = dem.read( + resampling=resampling, + matching_method=td_matching_method, + matching_max_zoom=td_matching_max_zoom, + matching_precision=td_matching_precision, + fallback_to_higher_zoom=td_fallback_to_higher_zoom, + ) + + if elevation_data.mask.all(): + raise MapcheteNodataTile + + logger.debug("calculate hillshade") + return hillshade( + elevation_data, + dem.affine, + azimuth=azimuth, + altitude=altitude, + z=z, + scale=scale, + ) + + +def calculate_slope_aspect( + elevation: np.ndarray, xres: float, yres: float, z: float = 1.0, scale: float = 1.0 +) -> Tuple[np.ndarray, np.ndarray]: + """ + Calculate slope and aspect map. + + Return a pair of arrays 2 pixels smaller than the input elevation array. - Inputs - ------ - dem - Input DEM. - clip (optional) - Vector data used to clip output. + Slope is returned in radians, from 0 for sheer face to pi/2 for + flat ground. Aspect is returned in radians, counterclockwise from -pi + at north around to pi. + + Logic here is borrowed from hillshade.cpp: + http://www.perrygeo.net/wordpress/?p=7 + + Parameters + ---------- + elevation : array + input elevation data + xres : float + column width + yres : float + row height + z : float + vertical exaggeration factor + scale : float + scale factor of pixel size units versus height units (insert 112000 + when having elevation values in meters in a geodetic projection) + + Returns + ------- + slope shade : array + """ + with warnings.catch_warnings(): + # this is to filter out division by zero warnings + warnings.simplefilter("ignore", category=RuntimeWarning) + z = float(z) + scale = float(scale) + height, width = elevation.shape[0] - 2, elevation.shape[1] - 2 + w = [ + z * elevation[row : (row + height), col : (col + width)] + for (row, col) in product(range(3), range(3)) + ] + x = ((w[0] + w[3] + w[3] + w[6]) - (w[2] + w[5] + w[5] + w[8])) / ( + 8.0 * xres * scale + ) + y = ((w[6] + w[7] + w[7] + w[8]) - (w[0] + w[1] + w[1] + w[2])) / ( + 8.0 * yres * scale + ) + # in radians, from 0 to pi/2 + slope = math.pi / 2 - np.arctan(np.sqrt(x * x + y * y)) + # in radians counterclockwise, from -pi at north back to pi + aspect = np.arctan2(x, y) + return slope, aspect + + +def hillshade( + elevation: ma.MaskedArray, + affine: Affine, + azimuth: float = 315.0, + altitude: float = 45.0, + z: float = 1.0, + scale: float = 1.0, +) -> ma.MaskedArray: + """ + Return hillshaded numpy array. Parameters ---------- - resampling : str (default: 'nearest') - Resampling used when reading from TileDirectory. + elevation : array + Input elevation data. + tile : Tile + Tile covering the array. azimuth : float Light source direction in degrees. (default: 315, top left) altitude : float @@ -40,67 +183,22 @@ def execute( scale : float Scale factor of pixel size units versus height units (insert 112000 when having elevation values in meters in a geodetic projection). - td_matching_method : str ('gdal' or 'min') (default: 'gdal') - gdal: Uses GDAL's standard method. Here, the target resolution is - calculated by averaging the extent's pixel sizes over both x and y - axes. This approach returns a zoom level which may not have the - best quality but will speed up reading significantly. - min: Returns the zoom level which matches the minimum resolution of the - extents four corner pixels. This approach returns the zoom level - with the best possible quality but with low performance. If the - tile extent is outside of the destination pyramid, a - TopologicalError will be raised. - td_matching_max_zoom : int (optional, default: None) - If set, it will prevent reading from zoom levels above the maximum. - td_matching_precision : int (default: 8) - Round resolutions to n digits before comparing. - td_fallback_to_higher_zoom : bool (default: False) - In case no data is found at zoom level, try to read data from higher - zoom levels. Enabling this setting can lead to many IO requests in - areas with no data. - clip_pixelbuffer : int - Use pixelbuffer when clipping output by geometry. (default: 0) - - Output - ------ - np.ndarray """ - # read clip geometry - if "clip" in mp.params["input"]: - clip_geom = mp.open("clip").read() - if not clip_geom: - logger.debug("no clip data over tile") - return "empty" - else: - clip_geom = [] - - with mp.open( - "dem", - ) as dem: - logger.debug("reading input raster") - dem_data = dem.read( - resampling=resampling, - matching_method=td_matching_method, - matching_max_zoom=td_matching_max_zoom, - matching_precision=td_matching_precision, - fallback_to_higher_zoom=td_fallback_to_higher_zoom, - ) - if dem_data.mask.all(): - logger.debug("raster empty") - return "empty" - - logger.debug("calculate hillshade") - hillshade = mp.hillshade( - dem_data, - azimuth=azimuth, - altitude=altitude, - z=z, - scale=scale, + elevation = elevation[0] if elevation.ndim == 3 else elevation + azimuth = float(azimuth) + altitude = float(altitude) + z = float(z) + scale = float(scale) + xres = affine[0] + yres = affine[4] + slope, aspect = calculate_slope_aspect(elevation, xres, yres, z=z, scale=scale) + deg2rad = math.pi / 180.0 + # shaded has values between -1.0 and +1.0 + shaded = np.sin(altitude * deg2rad) * np.sin(slope) + np.cos( + altitude * deg2rad + ) * np.cos(slope) * np.cos((azimuth - 90.0) * deg2rad - aspect) + # stretch to 0 - 255 and add one pixel padding using the edge values + return ma.masked_array( + data=np.pad(np.clip(shaded * 255.0, 1, 255).astype(np.uint8), 1, mode="edge"), + mask=elevation.mask, ) - - if clip_geom: - logger.debug("clipping output with geometry") - # apply original nodata mask and clip - return mp.clip(hillshade, clip_geom, clip_buffer=clip_pixelbuffer) - else: - return hillshade diff --git a/mapchete/processing/mp.py b/mapchete/processing/mp.py index 09ce2f7f..8b38d401 100644 --- a/mapchete/processing/mp.py +++ b/mapchete/processing/mp.py @@ -1,8 +1,5 @@ import numpy.ma as ma -from mapchete.commons import clip as commons_clip -from mapchete.commons import contours as commons_contours -from mapchete.commons import hillshade as commons_hillshade from mapchete.formats.protocols import InputTileProtocol from mapchete.validate import deprecated_kwargs @@ -99,9 +96,7 @@ def open(self, input_id, **kwargs) -> InputTileProtocol: raise ValueError("%s not found in config as input" % input_id) return self.input[input_id] - def hillshade( - self, elevation, azimuth=315.0, altitude=45.0, z=1.0, scale=1.0 - ) -> ma.MaskedArray: + def hillshade(self, *_, **__) -> ma.MaskedArray: """ Calculate hillshading from elevation data. @@ -123,11 +118,11 @@ def hillshade( ------- hillshade : array """ - return commons_hillshade.hillshade( - elevation, self.tile, azimuth, altitude, z, scale + raise DeprecationWarning( + "Run hillshade via mp is deprecated. Call the hillshade method from mapchete.processes.hillshade." ) - def contours(self, elevation, interval=100, field="elev", base=0) -> ma.MaskedArray: + def contours(self, *_, **__) -> ma.MaskedArray: """ Extract contour lines from elevation data. @@ -147,11 +142,11 @@ def contours(self, elevation, interval=100, field="elev", base=0) -> ma.MaskedAr contours : iterable contours as GeoJSON-like pairs of properties and geometry """ - return commons_contours.extract_contours( - elevation, self.tile, interval=interval, field=field, base=base + raise DeprecationWarning( + "MapcheteProcess.contours() is deprecated. Call the contours method from mapchete.processes.contours." ) - def clip(self, array, geometries, inverted=False, clip_buffer=0) -> ma.MaskedArray: + def clip(self, *_, **__) -> ma.MaskedArray: """ Clip array by geometry. @@ -170,10 +165,6 @@ def clip(self, array, geometries, inverted=False, clip_buffer=0) -> ma.MaskedArr ------- clipped array : array """ - return commons_clip.clip_array_with_vector( - array, - self.tile.affine, - geometries, - inverted=inverted, - clip_buffer=clip_buffer * self.tile.pixel_x_size, + raise DeprecationWarning( + "MapcheteProcess.clip() is deprecated. Call the clip method from mapchete.io.raster.array.clip_array_with_vector()." ) From 30350bd3faf4351f6a75ce02691a3c5baee401ee Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 27 Feb 2024 10:39:03 +0100 Subject: [PATCH 04/28] add test to verify that all process config parameters can be set from environment --- pytest.ini | 1 + test/conftest.py | 9 +++++++++ test/test_config.py | 9 ++++++++- test/testdata/env_input_path.mapchete | 13 +++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 test/testdata/env_input_path.mapchete diff --git a/pytest.ini b/pytest.ini index e865bb36..5f98ac1a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -5,4 +5,5 @@ markers = env = D:AK1=foo D:AK2=bar + D:FILE1=dummy2.tif addopts = --durations 20 --verbose --nf --cov=mapchete \ No newline at end of file diff --git a/test/conftest.py b/test/conftest.py index 39833c0d..958110d7 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -738,6 +738,15 @@ def env_storage_options_mapchete(mp_tmpdir): yield example +@pytest.fixture +def env_input_path_mapchete(mp_tmpdir): + """Fixture for env_input_path.mapchete.""" + with ProcessFixture( + TESTDATA_DIR / "env_input_path.mapchete", output_tempdir=mp_tmpdir + ) as example: + yield example + + @pytest.fixture def example_custom_process_mapchete(mp_tmpdir): """Fixture for example.mapchete.""" diff --git a/test/test_config.py b/test/test_config.py index 5775e177..d4d7dd5f 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -482,13 +482,20 @@ def test_custom_process(example_custom_process_mapchete): # pytest-env must be installed -def test_env_params(env_storage_options_mapchete): +def test_env_storage_options(env_storage_options_mapchete): with mapchete.open(env_storage_options_mapchete.dict) as mp: inp = mp.config.params_at_zoom(5) assert inp["input"]["file1"].storage_options.get("access_key") == "foo" assert mp.config.output.storage_options.get("access_key") == "bar" +# pytest-env must be installed +def test_env_params(env_input_path_mapchete): + with mapchete.open(env_input_path_mapchete.dict) as mp: + inp = mp.config.params_at_zoom(5) + assert inp["input"]["file1"].path.endswith("dummy2.tif") + + def test_process_config_pyramid_settings(): conf = ProcessConfig( pyramid=dict( diff --git a/test/testdata/env_input_path.mapchete b/test/testdata/env_input_path.mapchete new file mode 100644 index 00000000..b07082fd --- /dev/null +++ b/test/testdata/env_input_path.mapchete @@ -0,0 +1,13 @@ +process: ../example_process.py +zoom_levels: 5 +pyramid: + grid: geodetic +input: + file1: + format: raster_file + path: ${FILE1} +output: + dtype: float32 + bands: 1 + format: GTiff + path: tmp/gtiff From 8eaeca0477a15a81cff87a686c308e0397a0751a Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 27 Feb 2024 14:54:24 +0100 Subject: [PATCH 05/28] fine tune processes; adapt tests --- mapchete/config/process_func.py | 1 + mapchete/formats/default/_fiona_base.py | 3 +- mapchete/formats/default/raster_file.py | 3 +- mapchete/formats/default/tile_directory.py | 3 +- mapchete/formats/default/vector_file.py | 3 +- mapchete/processes/clip.py | 52 ++++++++++ mapchete/processes/contours.py | 2 +- mapchete/processes/convert.py | 85 ++++++++-------- mapchete/processes/hillshade.py | 4 +- test/test_commons.py | 96 ------------------ test/test_processes.py | 108 ++++++++++++--------- test/testdata/dem_to_hillshade.mapchete | 4 +- test/testdata/typed_raster_input.py | 4 +- 13 files changed, 168 insertions(+), 200 deletions(-) create mode 100644 mapchete/processes/clip.py delete mode 100644 test/test_commons.py diff --git a/mapchete/config/process_func.py b/mapchete/config/process_func.py index f9d6f099..9e85ef27 100644 --- a/mapchete/config/process_func.py +++ b/mapchete/config/process_func.py @@ -67,6 +67,7 @@ def analyze_parameters( if param.default == inspect.Parameter.empty and name not in [ "mp", "kwargs", + "__", ]: if ( name not in config_parameters.input diff --git a/mapchete/formats/default/_fiona_base.py b/mapchete/formats/default/_fiona_base.py index 36aea28a..deaa7a0a 100644 --- a/mapchete/formats/default/_fiona_base.py +++ b/mapchete/formats/default/_fiona_base.py @@ -6,6 +6,7 @@ import types from mapchete.formats import base +from mapchete.formats.protocols import VectorInput from mapchete.io import MPath, fiona_open from mapchete.io.vector import write_vector_window from mapchete.tile import BufferedTile @@ -168,7 +169,7 @@ def write(self, process_tile, data): ) -class InputTile(base.InputTile): +class InputTile(base.InputTile, VectorInput): """ Target Tile representation of input data. diff --git a/mapchete/formats/default/raster_file.py b/mapchete/formats/default/raster_file.py index a1937c96..0d1eb450 100644 --- a/mapchete/formats/default/raster_file.py +++ b/mapchete/formats/default/raster_file.py @@ -16,6 +16,7 @@ from mapchete import io from mapchete.formats import base +from mapchete.formats.protocols import RasterInput from mapchete.io.raster import ( convert_raster, rasterio_open, @@ -216,7 +217,7 @@ def cleanup(self): self._cached_path.rm(ignore_errors=True) -class InputTile(base.InputTile): +class InputTile(base.InputTile, RasterInput): """ Target Tile representation of input data. diff --git a/mapchete/formats/default/tile_directory.py b/mapchete/formats/default/tile_directory.py index 43c83947..f3c7a277 100644 --- a/mapchete/formats/default/tile_directory.py +++ b/mapchete/formats/default/tile_directory.py @@ -13,6 +13,7 @@ load_output_writer, read_output_metadata, ) +from mapchete.formats.protocols import RasterInput from mapchete.io import MPath, tile_to_zoom_level from mapchete.io.vector import reproject_geometry from mapchete.tile import BufferedTilePyramid @@ -227,7 +228,7 @@ def _get_tiles_paths( ] -class InputTile(base.InputTile): +class InputTile(base.InputTile, RasterInput): """ Target Tile representation of input data. diff --git a/mapchete/formats/default/vector_file.py b/mapchete/formats/default/vector_file.py index 5e0a1661..dedefc31 100644 --- a/mapchete/formats/default/vector_file.py +++ b/mapchete/formats/default/vector_file.py @@ -11,6 +11,7 @@ from shapely.geometry import Point, box from mapchete.formats import base +from mapchete.formats.protocols import VectorInput from mapchete.io import fiona_open from mapchete.io.vector import ( IndexedFeatures, @@ -196,7 +197,7 @@ def cleanup(self): self._cached_path.rm(ignore_errors=True) -class InputTile(base.InputTile): +class InputTile(base.InputTile, VectorInput): """ Target Tile representation of input data. diff --git a/mapchete/processes/clip.py b/mapchete/processes/clip.py new file mode 100644 index 00000000..bc94ca36 --- /dev/null +++ b/mapchete/processes/clip.py @@ -0,0 +1,52 @@ +import logging +from typing import Optional + +import numpy.ma as ma + +from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete.io import MatchingMethod +from mapchete.io.raster.array import clip_array_with_vector +from mapchete.types import BandIndexes, ResamplingLike + +logger = logging.getLogger(__name__) + + +def execute( + inp: RasterInput, + clip: VectorInput, + resampling: ResamplingLike = "nearest", + band_indexes: Optional[BandIndexes] = None, + td_matching_method: MatchingMethod = MatchingMethod.gdal, + td_matching_max_zoom: Optional[int] = None, + td_matching_precision: int = 8, + td_fallback_to_higher_zoom: bool = False, + clip_pixelbuffer: int = 0, + **kwargs, +) -> ma.MaskedArray: + """ + Clip raster with vector data. + + """ + # read clip geometry + clip_geom = clip.read() + if not clip_geom: + logger.debug("no clip data over tile") + raise MapcheteNodataTile + + if inp.is_empty(): + raise MapcheteNodataTile + + logger.debug("reading input data") + input_data = inp.read( + indexes=band_indexes, + resampling=resampling, + matching_method=td_matching_method, + matching_max_zoom=td_matching_max_zoom, + matching_precision=td_matching_precision, + fallback_to_higher_zoom=td_fallback_to_higher_zoom, + ) + logger.debug("clipping output with geometry") + # apply original nodata mask and clip + return clip_array_with_vector( + input_data, inp.tile.affine, clip_geom, clip_buffer=clip_pixelbuffer + ) diff --git a/mapchete/processes/contours.py b/mapchete/processes/contours.py index 93c0cb55..1a4708e6 100644 --- a/mapchete/processes/contours.py +++ b/mapchete/processes/contours.py @@ -16,7 +16,7 @@ def execute( dem: RasterInput, - clip: Optional[VectorInput], + clip: Optional[VectorInput] = None, resampling: ResamplingLike = "nearest", interval: float = 100, field: str = "elev", diff --git a/mapchete/processes/convert.py b/mapchete/processes/convert.py index 8d776553..1e2a9911 100644 --- a/mapchete/processes/convert.py +++ b/mapchete/processes/convert.py @@ -1,26 +1,33 @@ import logging -import warnings +from typing import List, Optional, Union -import numpy as np +import numpy.ma as ma +from numpy.typing import DTypeLike from rasterio.dtypes import dtype_ranges +from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete.io import MatchingMethod +from mapchete.io.raster.array import clip_array_with_vector +from mapchete.types import BandIndexes, ResamplingLike + logger = logging.getLogger(__name__) def execute( - mp, - resampling="nearest", - band_indexes=None, - td_matching_method="gdal", - td_matching_max_zoom=None, - td_matching_precision=8, - td_fallback_to_higher_zoom=False, - clip_pixelbuffer=0, - scale_ratio=1.0, - scale_offset=0.0, - clip_to_output_dtype=None, + inp: Union[RasterInput, VectorInput], + clip: Optional[VectorInput] = None, + resampling: ResamplingLike = "nearest", + band_indexes: Optional[BandIndexes] = None, + td_matching_method: MatchingMethod = MatchingMethod.gdal, + td_matching_max_zoom: Optional[int] = None, + td_matching_precision: int = 8, + td_fallback_to_higher_zoom: bool = False, + clip_pixelbuffer: int = 0, + scale_ratio: float = 1.0, + scale_offset: float = 0.0, + clip_to_output_dtype: Optional[DTypeLike] = None, **kwargs, -): +) -> Union[ma.MaskedArray, List[dict]]: """ Convert and optionally clip input raster or vector data. @@ -69,27 +76,19 @@ def execute( np.ndarray """ # read clip geometry - if "clip" in mp.params["input"]: - clip_geom = mp.open("clip").read() + if clip is None: + clip_geom = [] + else: + clip_geom = clip.read() if not clip_geom: logger.debug("no clip data over tile") - return "empty" - else: - clip_geom = [] + raise MapcheteNodataTile - if "raster" in mp.input: # pragma: no cover - warnings.warn( - UserWarning( - "'raster' input name in the mapchete configuration is deprecated and has to be named 'inp'" - ) - ) - inp_key = "raster" - else: - inp_key = "inp" - with mp.open(inp_key) as inp: - if inp.is_empty(): - return "empty" - logger.debug("reading input data") + if inp.is_empty(): + raise MapcheteNodataTile + + logger.debug("reading input data") + if isinstance(inp, RasterInput): input_data = inp.read( indexes=band_indexes, resampling=resampling, @@ -98,17 +97,6 @@ def execute( matching_precision=td_matching_precision, fallback_to_higher_zoom=td_fallback_to_higher_zoom, ) - if isinstance(input_data, np.ndarray): - input_type = "raster" - elif isinstance(input_data, list): - input_type = "vector" - else: # pragma: no cover - raise TypeError( - "input data type for this process has to either be a raster or a vector " - "dataset" - ) - - if input_type == "raster": if scale_offset != 0.0: logger.debug("apply scale offset %s", scale_offset) input_data = input_data.astype("float64", copy=False) + scale_offset @@ -124,13 +112,20 @@ def execute( if clip_geom: logger.debug("clipping output with geometry") # apply original nodata mask and clip - return mp.clip(input_data, clip_geom, clip_buffer=clip_pixelbuffer) + return clip_array_with_vector( + input_data, inp.tile.affine, clip_geom, clip_buffer=clip_pixelbuffer + ) else: return input_data - elif input_type == "vector": + elif isinstance(inp, VectorInput): + input_data = inp.read() if clip_geom: # pragma: no cover raise NotImplementedError("clipping vector data is not yet implemented") else: logger.debug(f"writing {len(input_data)} features") return input_data + else: + raise TypeError( + f"inp must either be of type RasterInput or VectorInput, not {type(inp)}" + ) diff --git a/mapchete/processes/hillshade.py b/mapchete/processes/hillshade.py index 117b456d..167d5953 100644 --- a/mapchete/processes/hillshade.py +++ b/mapchete/processes/hillshade.py @@ -52,7 +52,7 @@ def execute( dem: RasterInput, - clip: Optional[VectorInput], + clip: Optional[VectorInput] = None, resampling: ResamplingLike = "nearest", azimuth: float = 315.0, altitude: float = 45.0, @@ -93,7 +93,7 @@ def execute( logger.debug("calculate hillshade") return hillshade( elevation_data, - dem.affine, + dem.tile.affine, azimuth=azimuth, altitude=altitude, z=z, diff --git a/test/test_commons.py b/test/test_commons.py deleted file mode 100644 index f9870b94..00000000 --- a/test/test_commons.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python -"""Test Mapchete commons module.""" - -import numpy as np -import numpy.ma as ma -from shapely.geometry import GeometryCollection, Point - -import mapchete -from mapchete import MapcheteProcess - - -def test_clip(geojson): - """Clip an array with a vector.""" - with mapchete.open(geojson.dict) as mp: - tile = next(mp.get_process_tiles(zoom=4)) - user_process = MapcheteProcess( - tile=tile, - params=mp.config.params_at_zoom(4), - input=mp.config.get_inputs_for_tile(tile), - ) - with user_process.open("file1") as vector_file: - test_array = ma.masked_array(np.ones(user_process.tile.shape)) - clipped = user_process.clip(test_array, vector_file.read()) - # default params - assert isinstance(clipped, ma.masked_array) - assert clipped.mask.any() - assert not clipped.mask.all() - # inverted clip - clipped_inverted = user_process.clip( - test_array, vector_file.read(), inverted=True - ) - assert isinstance(clipped_inverted, ma.masked_array) - assert clipped_inverted.mask.any() - assert not clipped_inverted.mask.all() - # compare results - assert (clipped + clipped_inverted).mask.all() - # using empty Geometries - geoms = [dict(geometry=Point())] - clipped = user_process.clip(test_array, geoms) - assert clipped.mask.all() - # using empty Geometries inverted - clipped = user_process.clip(test_array, geoms, inverted=True) - assert not clipped.mask.any() - # using Point Geometries - geoms = [dict(geometry=tile.bbox.centroid)] - clipped = user_process.clip(test_array, geoms) - assert clipped.mask.all() - # using Geometry Collections - geoms = [dict(geometry=GeometryCollection([tile.bbox.centroid, tile.bbox]))] - clipped = user_process.clip(test_array, geoms) - assert not clipped.mask.any() - # using 3D array - test_array = ma.masked_array(np.ones((1,) + user_process.tile.shape)) - clipped = user_process.clip(test_array, vector_file.read()) - assert isinstance(clipped, ma.masked_array) - assert clipped.mask.any() - assert not clipped.mask.all() - - -def test_contours(cleantopo_tl): - """Extract contours from array.""" - with mapchete.open(cleantopo_tl.dict) as mp: - tile = next(mp.get_process_tiles(zoom=4)) - user_process = MapcheteProcess( - tile=tile, - params=mp.config.params_at_zoom(4), - input=mp.config.get_inputs_for_tile(tile), - ) - with user_process.open("file1") as dem: - arr = dem.read(1) - # valid contours - contours = user_process.contours(arr) - assert contours - assert isinstance(contours, list) - # no contours - contours = user_process.contours(arr, interval=10000) - assert isinstance(contours, list) - assert not contours - # base bigger than values - contours = user_process.contours(arr, base=10000) - assert isinstance(contours, list) - assert contours - - -def test_hillshade(cleantopo_tl): - """Render hillshade from array.""" - with mapchete.open(cleantopo_tl.dict) as mp: - tile = next(mp.get_process_tiles(zoom=4)) - user_process = MapcheteProcess( - tile=tile, - params=mp.config.params_at_zoom(4), - input=mp.config.get_inputs_for_tile(tile), - ) - with user_process.open("file1") as dem: - shade = user_process.hillshade(dem.read()) - assert isinstance(shade, np.ndarray) diff --git a/test/test_processes.py b/test/test_processes.py index a3542d32..c818a281 100644 --- a/test/test_processes.py +++ b/test/test_processes.py @@ -2,8 +2,10 @@ import numpy as np import numpy.ma as ma +import pytest import mapchete +from mapchete import MapcheteNodataTile from mapchete.processes import contours, convert, hillshade from mapchete.processes.examples import example_process from mapchete.testing import get_process_mp @@ -22,110 +24,120 @@ def test_example_process(cleantopo_tl): def test_convert_raster(cleantopo_tl_tif, landpoly): # tile with data assert isinstance( - convert.execute(get_process_mp(input=dict(inp=cleantopo_tl_tif), zoom=5)), + convert.execute( + inp=get_process_mp(input=dict(inp=cleantopo_tl_tif), zoom=5).open("inp") + ), np.ndarray, ) # execute on empty tile - assert ( + with pytest.raises(MapcheteNodataTile): convert.execute( - get_process_mp(input=dict(inp=cleantopo_tl_tif), tile=(5, 3, 7)) + inp=get_process_mp(input=dict(inp=cleantopo_tl_tif), tile=(5, 3, 7)).open( + "inp" + ) ) - == "empty" - ) - process_mp = get_process_mp( + inp = get_process_mp( input=dict(inp=cleantopo_tl_tif, clip=landpoly), zoom=5, metatiling=8 - ) + ).open("inp") # tile with data - default = convert.execute(process_mp) + default = convert.execute(inp) assert isinstance(default, np.ndarray) # scale_offset - offset = convert.execute(process_mp, scale_offset=2) + offset = convert.execute(inp, scale_offset=2) assert isinstance(offset, np.ndarray) # scale_ratio - ratio = convert.execute(process_mp, scale_ratio=0.5) + ratio = convert.execute(inp, scale_ratio=0.5) assert isinstance(ratio, np.ndarray) # clip_to_output_dtype - clip_dtype = convert.execute( - process_mp, scale_ratio=2, clip_to_output_dtype="uint8" - ) + clip_dtype = convert.execute(inp, scale_ratio=2, clip_to_output_dtype="uint8") assert isinstance(clip_dtype, np.ndarray) + # NOTE: this was in the test suite but there is no reason why over this process tile + # the execute function should return an empty tile # execute on empty tile - assert ( - convert.execute( - get_process_mp( - input=dict(inp=cleantopo_tl_tif, clip=landpoly), - tile=(5, 0, 0), - metatiling=1, - ) - ) - == "empty" + mp = get_process_mp( + input=dict(inp=cleantopo_tl_tif, clip=landpoly), + tile=(5, 0, 0), + metatiling=1, ) + with pytest.raises(MapcheteNodataTile): + convert.execute(mp.open("inp"), mp.open("clip")) def test_convert_vector(landpoly): # execute without clip assert isinstance( - convert.execute(get_process_mp(input=dict(inp=landpoly), zoom=5, metatiling=8)), + convert.execute( + get_process_mp(input=dict(inp=landpoly), zoom=5, metatiling=8).open("inp") + ), list, ) # execute on empty tile - assert ( + with pytest.raises(MapcheteNodataTile): convert.execute( - get_process_mp(input=dict(inp=landpoly), tile=(5, 3, 7), metatiling=8) + get_process_mp(input=dict(inp=landpoly), tile=(5, 3, 7), metatiling=8).open( + "inp" + ) ) - == "empty" - ) def test_contours(cleantopo_tl_tif, landpoly): - process_mp = get_process_mp(input=dict(dem=cleantopo_tl_tif), zoom=5, metatiling=8) - output = contours.execute(process_mp) + dem = get_process_mp(input=dict(dem=cleantopo_tl_tif), zoom=5, metatiling=8).open( + "dem" + ) + output = contours.execute(dem) assert isinstance(output, list) assert output # execute on empty tile - process_mp = get_process_mp( + dem = get_process_mp( input=dict(dem=cleantopo_tl_tif), tile=(5, 3, 7), metatiling=8 - ) - assert contours.execute(process_mp) == "empty" + ).open("dem") + with pytest.raises(MapcheteNodataTile): + contours.execute(dem) - process_mp = get_process_mp( + dem = get_process_mp( input=dict(dem=cleantopo_tl_tif, clip=landpoly), zoom=5, metatiling=8 - ) - output = contours.execute(process_mp) + ).open("dem") + output = contours.execute(dem) assert isinstance(output, list) assert output - process_mp = get_process_mp( + dem = get_process_mp( input=dict(dem=cleantopo_tl_tif, clip=landpoly), tile=(5, 3, 7), metatiling=8 - ) - assert contours.execute(process_mp) == "empty" + ).open("dem") + with pytest.raises(MapcheteNodataTile): + contours.execute(dem) def test_hillshade(cleantopo_tl_tif, landpoly): - process_mp = get_process_mp(input=dict(dem=cleantopo_tl_tif), zoom=5, metatiling=8) - assert isinstance(hillshade.execute(process_mp), np.ndarray) + dem = get_process_mp(input=dict(dem=cleantopo_tl_tif), zoom=5, metatiling=8).open( + "dem" + ) + assert isinstance(hillshade.execute(dem), np.ndarray) # execute on empty tile - process_mp = get_process_mp( + dem = get_process_mp( input=dict(dem=cleantopo_tl_tif), tile=(5, 3, 7), metatiling=8 - ) - assert hillshade.execute(process_mp) == "empty" + ).open("dem") + with pytest.raises(MapcheteNodataTile): + hillshade.execute(dem) - process_mp = get_process_mp( + dem = get_process_mp( input=dict(dem=cleantopo_tl_tif, clip=landpoly), zoom=5, metatiling=8 - ) - assert isinstance(hillshade.execute(process_mp), np.ndarray) + ).open("dem") + assert isinstance(hillshade.execute(dem), np.ndarray) + # execute on empty tile - process_mp = get_process_mp( + mp = get_process_mp( input=dict(dem=cleantopo_tl_tif, clip=landpoly), tile=(5, 3, 7), metatiling=8 ) - assert hillshade.execute(process_mp) == "empty" + with pytest.raises(MapcheteNodataTile): + hillshade.execute(mp.open("dem"), mp.open("clip")) diff --git a/test/testdata/dem_to_hillshade.mapchete b/test/testdata/dem_to_hillshade.mapchete index 29c40211..9fc10730 100644 --- a/test/testdata/dem_to_hillshade.mapchete +++ b/test/testdata/dem_to_hillshade.mapchete @@ -1,4 +1,4 @@ -process: dem_to_hillshade.py +process: mapchete.processes.hillshade zoom_levels: min: 0 max: 5 @@ -6,7 +6,7 @@ pyramid: grid: geodetic pixelbuffer: 5 input: - file1: cleantopo_br.tif + dem: cleantopo_br.tif output: bands: 2 format: PNG diff --git a/test/testdata/typed_raster_input.py b/test/testdata/typed_raster_input.py index 37be7946..9c4143a6 100644 --- a/test/testdata/typed_raster_input.py +++ b/test/testdata/typed_raster_input.py @@ -2,7 +2,7 @@ import numpy.ma as ma -from mapchete import RasterInput +from mapchete import MapcheteNodataTile, RasterInput def execute( @@ -13,7 +13,7 @@ def execute( if raster.is_empty(): # This assures a transparent tile instead of a pink error tile # is returned when using mapchete serve. - return "empty" + raise MapcheteNodataTile data = raster.read(resampling="bilinear") return data From 89725ac7afa032f2bf4337661fcb80e92f195aad Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 27 Feb 2024 15:17:09 +0100 Subject: [PATCH 06/28] remove deprecated tests --- mapchete/formats/base.py | 6 +-- mapchete/formats/default/flatgeobuf.py | 1 + mapchete/formats/default/gtiff.py | 6 ++- mapchete/formats/default/mapchete_input.py | 3 +- mapchete/formats/default/tile_directory.py | 2 +- test/test_formats.py | 43 ---------------------- 6 files changed, 11 insertions(+), 50 deletions(-) diff --git a/mapchete/formats/base.py b/mapchete/formats/base.py index 87104417..d5efb00f 100644 --- a/mapchete/formats/base.py +++ b/mapchete/formats/base.py @@ -121,9 +121,9 @@ class InputData(InputDataProtocol, ABC): preprocessing_tasks_results: dict METADATA = {"driver_name": None, "data_type": None, "mode": "r"} - def __init__(self, input_params: dict, input_key: str, **kwargs): + def __init__(self, input_params: dict, input_key: Optional[str] = None, **kwargs): """Initialize relevant input information.""" - self.input_key = input_key + self.input_key = input_key or get_hash(input_params) self.pyramid = input_params.get("pyramid") self.pixelbuffer = input_params.get("pixelbuffer") self.crs = self.pyramid.crs if self.pyramid else None @@ -249,7 +249,7 @@ def preprocessing_task_finished(self, task_key): return task_key in self.preprocessing_tasks_results -class OutputDataBaseFunctions: +class OutputDataBaseFunctions(ABC): write_in_parent_process = False def __init__(self, output_params, readonly=False, **kwargs): diff --git a/mapchete/formats/default/flatgeobuf.py b/mapchete/formats/default/flatgeobuf.py index 5d0d90e4..4ec21cbe 100644 --- a/mapchete/formats/default/flatgeobuf.py +++ b/mapchete/formats/default/flatgeobuf.py @@ -21,6 +21,7 @@ import warnings from mapchete.formats.default import _fiona_base +from mapchete.formats.protocols import VectorInput METADATA = {"driver_name": "FlatGeobuf", "data_type": "vector", "mode": "rw"} diff --git a/mapchete/formats/default/gtiff.py b/mapchete/formats/default/gtiff.py index 2e0cdbc2..53fd8dae 100644 --- a/mapchete/formats/default/gtiff.py +++ b/mapchete/formats/default/gtiff.py @@ -29,6 +29,7 @@ compression method (default: lzw): lzw, jpeg, packbits, deflate, CCITTRLE, CCITTFAX3, CCITTFAX4, lzma """ +from __future__ import annotations import logging import math @@ -48,6 +49,7 @@ from mapchete.config.base import _OUTPUT_PARAMETERS, snap_bounds from mapchete.errors import MapcheteConfigError from mapchete.formats import base +from mapchete.formats.protocols import RasterInput from mapchete.io import MPath, path_exists, path_is_remote from mapchete.io.profiles import DEFAULT_PROFILES from mapchete.io.raster import ( @@ -197,7 +199,7 @@ def for_web(self, data): ) @deprecated_kwargs - def open(self, tile, process, **kwargs): + def open(self, tile, process, **kwargs) -> InputTile: """ Open process output as input for other process. @@ -649,7 +651,7 @@ def _window_in_out_file(window, rio_file): ) -class InputTile(base.InputTile): +class InputTile(base.InputTile, RasterInput): """ Target Tile representation of input data. diff --git a/mapchete/formats/default/mapchete_input.py b/mapchete/formats/default/mapchete_input.py index 613466d5..5d44f6c9 100644 --- a/mapchete/formats/default/mapchete_input.py +++ b/mapchete/formats/default/mapchete_input.py @@ -3,6 +3,7 @@ from mapchete import Mapchete from mapchete.config import MapcheteConfig from mapchete.formats import base +from mapchete.formats.protocols import InputTileProtocol from mapchete.io.vector import reproject_geometry METADATA = { @@ -49,7 +50,7 @@ def __init__(self, input_params, **kwargs): self.path = input_params["path"] self.process = Mapchete(MapcheteConfig(self.path, mode="readonly")) - def open(self, tile, **kwargs): + def open(self, tile, **kwargs) -> InputTileProtocol: """ Return InputTile object. diff --git a/mapchete/formats/default/tile_directory.py b/mapchete/formats/default/tile_directory.py index f3c7a277..7cfe1c02 100644 --- a/mapchete/formats/default/tile_directory.py +++ b/mapchete/formats/default/tile_directory.py @@ -53,7 +53,7 @@ class InputData(base.InputData): METADATA = METADATA - def __init__(self, input_params, **kwargs): + def __init__(self, input_params: dict, **kwargs): """Initialize.""" super().__init__(input_params, **kwargs) self._read_as_tiledir_func = None diff --git a/test/test_formats.py b/test/test_formats.py index 208666d2..3ead3bfb 100644 --- a/test/test_formats.py +++ b/test/test_formats.py @@ -5,14 +5,12 @@ import pytest from rasterio.crs import CRS -from tilematrix import TilePyramid import mapchete from mapchete import errors from mapchete.formats import ( available_input_formats, available_output_formats, - base, driver_from_extension, driver_from_file, dump_metadata, @@ -105,47 +103,6 @@ def test_mapchete_input(mapchete_input): assert not mp_input.is_empty() -def test_base_format_classes(): - """Base format classes.""" - # InputData - tp = TilePyramid("geodetic") - tmp = base.InputData(dict(pyramid=tp, pixelbuffer=0)) - assert tmp.pyramid - assert tmp.pixelbuffer == 0 - assert tmp.crs - with pytest.raises(NotImplementedError): - tmp.open(None) - with pytest.raises(NotImplementedError): - tmp.bbox() - with pytest.raises(NotImplementedError): - tmp.exists() - - # InputTile - tmp = base.InputTile(None) - with pytest.raises(NotImplementedError): - tmp.read() - with pytest.raises(NotImplementedError): - tmp.is_empty() - - # OutputDataWriter - tmp = base.OutputDataWriter(dict(pixelbuffer=0, grid="geodetic", metatiling=1)) - assert tmp.pyramid - assert tmp.pixelbuffer == 0 - assert tmp.crs - with pytest.raises(NotImplementedError): - tmp.read(None) - with pytest.raises(NotImplementedError): - tmp.write(None, None) - with pytest.raises(NotImplementedError): - tmp.is_valid_with_config(None) - with pytest.raises(NotImplementedError): - tmp.for_web(None) - with pytest.raises(NotImplementedError): - tmp.empty(None) - with pytest.raises(NotImplementedError): - tmp.open(None, None) - - @pytest.mark.integration def test_http_rasters(files_bounds, http_raster): """Raster file on remote server with http:// or https:// URLs.""" From 71f8326245f81ae10b5ab84cc2c99b6134873e83 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 29 Feb 2024 18:08:31 +0100 Subject: [PATCH 07/28] rely on protocols rather than ABC for driver classes --- mapchete/commands/_convert.py | 4 +- mapchete/config/process_func.py | 4 +- mapchete/formats/base.py | 211 +++++++++--------------- mapchete/formats/default/gtiff.py | 1 - mapchete/formats/default/raster_file.py | 3 +- mapchete/formats/default/vector_file.py | 3 +- mapchete/formats/loaders.py | 17 +- mapchete/formats/models.py | 14 +- mapchete/formats/protocols.py | 102 +++++++++++- mapchete/processing/base.py | 30 ++-- mapchete/processing/tasks.py | 10 +- 11 files changed, 222 insertions(+), 177 deletions(-) diff --git a/mapchete/commands/_convert.py b/mapchete/commands/_convert.py index f99d62dd..eaabc1a3 100644 --- a/mapchete/commands/_convert.py +++ b/mapchete/commands/_convert.py @@ -3,7 +3,7 @@ from contextlib import AbstractContextManager from multiprocessing import cpu_count from pprint import pformat -from typing import List, NoReturn, Optional, Tuple, Type, Union +from typing import List, Optional, Tuple, Type, Union import tilematrix from rasterio.crs import CRS @@ -63,7 +63,7 @@ def convert( retry_on_exception: Tuple[Type[Exception], Type[Exception]] = Exception, cancel_on_exception: Type[Exception] = JobCancelledError, retries: int = 0, -) -> NoReturn: +) -> None: """ Convert mapchete outputs or other geodata. diff --git a/mapchete/config/process_func.py b/mapchete/config/process_func.py index 9e85ef27..8a3a614c 100644 --- a/mapchete/config/process_func.py +++ b/mapchete/config/process_func.py @@ -5,7 +5,7 @@ import sys import warnings from tempfile import NamedTemporaryFile -from typing import Any, Dict, NoReturn +from typing import Any, Dict from mapchete.config.models import ZoomParameters from mapchete.errors import ( @@ -60,7 +60,7 @@ def __call__(self, *args, **kwargs: Any) -> Any: def analyze_parameters( self, parameters_per_zoom: Dict[int, ZoomParameters] - ) -> NoReturn: + ) -> None: for zoom, config_parameters in parameters_per_zoom.items(): # make sure parameters with no defaults are given, except of magical "mp" object for name, param in self.function_parameters.items(): diff --git a/mapchete/formats/base.py b/mapchete/formats/base.py index d5efb00f..9061a50a 100644 --- a/mapchete/formats/base.py +++ b/mapchete/formats/base.py @@ -7,20 +7,22 @@ import logging import types import warnings -from abc import ABC, abstractmethod from itertools import chain -from typing import Any, NoReturn, Optional +from typing import Any, List, Optional, Tuple import numpy as np import numpy.ma as ma +from pydantic import NonNegativeInt from shapely.geometry import shape from shapely.geometry.base import BaseGeometry from mapchete.config import get_hash from mapchete.errors import MapcheteNodataTile, MapcheteProcessOutputError from mapchete.formats import write_output_metadata + +# from mapchete.formats.models import BaseInputParams from mapchete.formats.protocols import InputDataProtocol, InputTileProtocol -from mapchete.io import fs_from_path, path_exists +from mapchete.io import path_exists from mapchete.io.raster import ( create_mosaic, extract_from_array, @@ -28,6 +30,7 @@ read_raster_window, ) from mapchete.io.vector import read_vector_window +from mapchete.path import MPath from mapchete.processing.tasks import Task from mapchete.tile import BufferedTile, BufferedTilePyramid from mapchete.types import CRSLike @@ -38,7 +41,7 @@ DEFAULT_TILE_PATH_SCHEMA = "{zoom}/{row}/{col}.{extension}" -class InputTile(InputTileProtocol, ABC): +class InputTile(InputTileProtocol): """ Target Tile representation of input data. @@ -49,38 +52,17 @@ class InputTile(InputTileProtocol, ABC): driver specific parameters """ - preprocessing_tasks_results = {} - input_key = None + preprocessing_tasks_results: dict + input_key: str + tile: BufferedTile - def __init__(self, tile: BufferedTile, **kwargs): + def __init__(self, tile: BufferedTile, input_key: str, **kwargs): """Initialize.""" + self.tile = tile + self.input_key = input_key + self.preprocessing_tasks_results = {} - @abstractmethod - def read(self, **kwargs) -> Any: - """ - Read reprojected & resampled input data. - - Returns - ------- - data : array or list - NumPy array for raster data or feature list for vector data - """ - ... - - @abstractmethod - def is_empty(self) -> bool: - """ - Check if there is data within this tile. - - Returns - ------- - is empty : bool - """ - ... - - def set_preprocessing_task_result( - self, task_key: str, result: Any = None - ) -> NoReturn: + def set_preprocessing_task_result(self, task_key: str, result: Any = None) -> None: """ Adds a preprocessing task result. """ @@ -94,7 +76,7 @@ def __exit__(self, t, v, tb): """Clean up.""" -class InputData(InputDataProtocol, ABC): +class InputData(InputDataProtocol): """ Template class handling geographic input data. @@ -135,50 +117,7 @@ def __init__(self, input_params: dict, input_key: Optional[str] = None, **kwargs "storage_options", {} ) - @abstractmethod - def open(self, tile: BufferedTile, **kwargs) -> InputTileProtocol: - """ - Return InputTile object. - - Parameters - ---------- - tile : ``Tile`` - - Returns - ------- - input tile : ``InputTile`` - tile view of input data - """ - ... - - @abstractmethod - def bbox(self, out_crs: Optional[CRSLike] = None) -> BaseGeometry: - """ - Return data bounding box. - - Parameters - ---------- - out_crs : ``rasterio.crs.CRS`` - rasterio CRS object (default: CRS of process pyramid) - - Returns - ------- - bounding box : geometry - Shapely geometry object - """ - ... - - def exists(self) -> bool: - """ - Check if data or file even exists. - - Returns - ------- - file exists : bool - """ - ... - - def cleanup(self) -> NoReturn: + def cleanup(self) -> None: """Optional cleanup function called when Mapchete exits.""" def add_preprocessing_task( @@ -249,10 +188,13 @@ def preprocessing_task_finished(self, task_key): return task_key in self.preprocessing_tasks_results -class OutputDataBaseFunctions(ABC): +class OutputDataBase: write_in_parent_process = False + pixelbuffer: NonNegativeInt + pyramid: BufferedTilePyramid + crs: CRSLike - def __init__(self, output_params, readonly=False, **kwargs): + def __init__(self, output_params: dict, readonly: bool = False, **kwargs): """Initialize.""" self.pixelbuffer = output_params["pixelbuffer"] if "type" in output_params: # pragma: no cover @@ -268,54 +210,9 @@ def __init__(self, output_params, readonly=False, **kwargs): ) self.crs = self.pyramid.crs self.storage_options = output_params.get("storage_options") - self.fs = self._fs = output_params.get( - "fs", fs_from_path(output_params.get("path", "")) - ) - self.fs_kwargs = self._fs_kwargs = output_params.get("fs_kwargs") or {} - self.tile_path_schema = output_params.get( - "tile_path_schema", DEFAULT_TILE_PATH_SCHEMA - ) - - @property - def stac_path(self): - """Return path to STAC JSON file.""" - return self.path.joinpath(f"{self.stac_item_id}.json") - - @property - def stac_item_id(self): - """ - Return STAC item ID according to configuration. - Defaults to path basename. - """ - return self.output_params.get("stac", {}).get("id") or self.path.stem - - @property - def stac_item_metadata(self): - """Custom STAC metadata.""" - return self.output_params.get("stac", {}) - - @property - def stac_asset_type(self): # pragma: no cover - """Asset MIME type.""" - raise ValueError("no MIME type set for this output") - - def is_valid_with_config(self, config): - """ - Check if output format is valid with other process parameters. - - Parameters - ---------- - config : dictionary - output configuration parameters - - Returns - ------- - is_valid : bool - """ - raise NotImplementedError() - - def get_path(self, tile): + # TODO: move to path based output + def get_path(self, tile: BufferedTile) -> MPath: """ Determine target file path. @@ -335,7 +232,10 @@ def get_path(self, tile): extension=self.file_extension.lstrip("."), ) - def extract_subset(self, input_data_tiles=None, out_tile=None): + # TODO: split up into vector and raster based output (mixin classes) + def extract_subset( + self, input_data_tiles: List[Tuple[BufferedTile, Any]], out_tile: BufferedTile + ) -> Any: """ Extract subset from multiple tiles. input_data_tiles : list of (``Tile``, process data) tuples @@ -368,7 +268,38 @@ def prepare(self, **kwargs): pass -class OutputDataReader(OutputDataBaseFunctions): +class OutputSTACMixin: + """Adds STAC related features.""" + + path: MPath + output_params: dict + + @property + def stac_path(self) -> MPath: + """Return path to STAC JSON file.""" + return self.path / f"{self.stac_item_id}.json" + + @property + def stac_item_id(self) -> str: + """ + Return STAC item ID according to configuration. + + Defaults to path basename. + """ + return self.output_params.get("stac", {}).get("id") or self.path.stem + + @property + def stac_item_metadata(self): + """Custom STAC metadata.""" + return self.output_params.get("stac", {}) + + @property + def stac_asset_type(self): # pragma: no cover + """Asset MIME type.""" + raise ValueError("no MIME type set for this output") + + +class OutputDataReader(OutputDataBase): def read(self, output_tile): """ Read existing process output. @@ -532,10 +463,15 @@ def close(self, exc_type=None, exc_value=None, exc_traceback=None): """Gets called if process is closed.""" -class TileDirectoryOutputReader(OutputDataReader): +class TileDirectoryOutputReader(OutputDataReader, OutputSTACMixin): + tile_path_schema: str = DEFAULT_TILE_PATH_SCHEMA + def __init__(self, output_params, readonly=False): """Initialize.""" super().__init__(output_params, readonly=readonly) + self.tile_path_schema = output_params.get( + "tile_path_schema", DEFAULT_TILE_PATH_SCHEMA + ) if not readonly: write_output_metadata( {k: v for k, v in output_params.items() if k not in ["stac"]} @@ -559,12 +495,13 @@ def tiles_exist(self, process_tile=None, output_tile=None): if process_tile and output_tile: # pragma: no cover raise ValueError("just one of 'process_tile' and 'output_tile' allowed") if process_tile: - return any( - path_exists(self.get_path(tile), fs=self._fs) - for tile in self.pyramid.intersecting(process_tile) - ) + for tile in self.pyramid.intersecting(process_tile): + if self.get_path(tile).exists(): + return True + else: + return False if output_tile: - return path_exists(self.get_path(output_tile), fs=self._fs) + return self.get_path(output_tile).exists() def _read_as_tiledir( self, @@ -618,7 +555,7 @@ class TileDirectoryOutputWriter(OutputDataWriter, TileDirectoryOutputReader): pass -class SingleFileOutputReader(OutputDataReader): +class SingleFileOutputReader(OutputDataReader, OutputSTACMixin): def __init__(self, output_params, readonly=False): """Initialize.""" super().__init__(output_params, readonly=readonly) @@ -647,7 +584,7 @@ class SingleFileOutputWriter(OutputDataWriter, SingleFileOutputReader): def is_numpy_or_masked_array(data): - return isinstance(data, (np.ndarray, ma.core.MaskedArray)) + return isinstance(data, (np.ndarray, ma.MaskedArray)) def is_numpy_or_masked_array_with_tags(data): diff --git a/mapchete/formats/default/gtiff.py b/mapchete/formats/default/gtiff.py index 53fd8dae..4d39a17c 100644 --- a/mapchete/formats/default/gtiff.py +++ b/mapchete/formats/default/gtiff.py @@ -375,7 +375,6 @@ def write(self, process_tile, data): out_grid=out_tile, out_path=out_path, tags=tags, - fs=self.fs, ) @property diff --git a/mapchete/formats/default/raster_file.py b/mapchete/formats/default/raster_file.py index 0d1eb450..7b38a4c0 100644 --- a/mapchete/formats/default/raster_file.py +++ b/mapchete/formats/default/raster_file.py @@ -243,11 +243,10 @@ def __init__( self, tile, input_data, in_memory_raster=None, cache_task_key=None, **kwargs ): """Initialize.""" - self.tile = tile + super().__init__(tile, input_key=input_data.input_key, **kwargs) self.bbox = input_data.bbox(out_crs=self.tile.crs) self.profile = input_data.profile self.cache_task_key = cache_task_key - self.input_key = input_data.input_key if input_data._memory_cache_active: self._memory_cache_active = True self._in_memory_raster = in_memory_raster diff --git a/mapchete/formats/default/vector_file.py b/mapchete/formats/default/vector_file.py index dedefc31..f68737c1 100644 --- a/mapchete/formats/default/vector_file.py +++ b/mapchete/formats/default/vector_file.py @@ -221,11 +221,10 @@ def __init__( self, tile, input_data, in_memory_features=None, cache_task_key=None, **kwargs ): """Initialize.""" - self.tile = tile + super().__init__(tile, input_key=input_data.input_key, **kwargs) self._cache = {} self.bbox = input_data.bbox(out_crs=self.tile.crs) self.cache_task_key = cache_task_key - self.input_key = input_data.input_key if input_data._memory_cache_active: self._memory_cache_active = True self._in_memory_features = in_memory_features diff --git a/mapchete/formats/loaders.py b/mapchete/formats/loaders.py index 5a646abc..a6461d62 100644 --- a/mapchete/formats/loaders.py +++ b/mapchete/formats/loaders.py @@ -1,14 +1,19 @@ import logging -from typing import Dict, Optional +from typing import Optional from mapchete.errors import MapcheteDriverError +from mapchete.formats.protocols import ( + InputDataProtocol, + OutputDataReaderProtocol, + OutputDataWriterProtocol, +) from mapchete.formats.tools import driver_from_file from mapchete.registered import drivers logger = logging.getLogger(__name__) -def load_output_reader(output_params: Dict) -> "OutputDataReader": +def load_output_reader(output_params: dict) -> OutputDataReaderProtocol: """ Return OutputReader class of driver. @@ -35,8 +40,8 @@ def load_output_reader(output_params: Dict) -> "OutputDataReader": def load_output_writer( - output_params: Dict, readonly: bool = False -) -> "OutputDataWriter": + output_params: dict, readonly: bool = False +) -> OutputDataWriterProtocol: """ Return output class of driver. @@ -65,8 +70,8 @@ def load_output_writer( def load_input_reader( - input_params: Dict, readonly: bool = False, input_key: Optional[str] = None -) -> "InputData": + input_params: dict, readonly: bool = False, input_key: Optional[str] = None +) -> InputDataProtocol: """ Return input class of driver. diff --git a/mapchete/formats/models.py b/mapchete/formats/models.py index 441f8b6d..2dfdab6c 100644 --- a/mapchete/formats/models.py +++ b/mapchete/formats/models.py @@ -1,8 +1,10 @@ from typing import List -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, NonNegativeInt from mapchete.enums import DataType +from mapchete.tile import BufferedTilePyramid +from mapchete.types import NodataVal class DriverMetadata(BaseModel): @@ -10,3 +12,13 @@ class DriverMetadata(BaseModel): data_type: DataType mode: str file_extensions: List[str] = Field(default_factory=list) + + +class BaseInputParams(BaseModel, arbitrary_types_allowed=True): + pyramid: BufferedTilePyramid + pixelbuffer: NonNegativeInt = 0 + + +class BaseOutputParams(BaseModel): + stac: dict = Field(default_factory=dict) + nodata: NodataVal diff --git a/mapchete/formats/protocols.py b/mapchete/formats/protocols.py index 78c30c27..1abf8fa2 100644 --- a/mapchete/formats/protocols.py +++ b/mapchete/formats/protocols.py @@ -1,17 +1,23 @@ from __future__ import annotations -from typing import Any, Callable, List, NoReturn, Optional, Protocol +from types import TracebackType +from typing import Any, Callable, List, Optional, Protocol, Tuple, Type +import numpy as np import numpy.ma as ma +from pydantic import NonNegativeInt from shapely.geometry.base import BaseGeometry +from mapchete.path import MPath + +# from mapchete.processing.mp import MapcheteProcess from mapchete.protocols import GridProtocol from mapchete.tile import BufferedTile, BufferedTilePyramid from mapchete.types import BandIndexes, BoundsLike, CRSLike, ResamplingLike, TileLike class InputTileProtocol(GridProtocol): - preprocessing_tasks_results: dict = {} + preprocessing_tasks_results: dict input_key: str tile: BufferedTile @@ -23,7 +29,7 @@ def is_empty(self) -> bool: """Checks if input is empty here.""" ... - def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: + def set_preprocessing_task_result(self, task_key: str, result: Any) -> None: ... def __enter__(self) -> InputTileProtocol: @@ -66,15 +72,19 @@ class InputDataProtocol(Protocol): preprocessing_tasks_results: dict = {} def open(self, tile: TileLike, **kwargs) -> InputTileProtocol: + """Return an input instance for a given process tile.""" ... def bbox(self, out_crs: Optional[CRSLike] = None) -> BaseGeometry: + """Return geometry of input bounding box.""" ... def exists(self) -> bool: + """Check whether data exists.""" ... - def cleanup(self) -> NoReturn: + def cleanup(self) -> None: + """Optional cleanup code after processing.""" ... def add_preprocessing_task( @@ -85,14 +95,94 @@ def add_preprocessing_task( key: Optional[str] = None, geometry: Optional[BaseGeometry] = None, bounds: Optional[BoundsLike] = None, - ) -> NoReturn: + ) -> None: ... def get_preprocessing_task_result(self, task_key: str) -> Any: ... - def set_preprocessing_task_result(self, task_key: str, result: Any) -> NoReturn: + def set_preprocessing_task_result(self, task_key: str, result: Any) -> None: ... def preprocessing_task_finished(self, task_key: str) -> bool: ... + + +class OutputDataReaderProtocol(Protocol): + """Minimum interface for any output reader class.""" + + pixelbuffer: NonNegativeInt + pyramid: BufferedTilePyramid + crs: CRSLike + + def tiles_exist( + self, + process_tile: Optional[BufferedTile] = None, + output_tile: Optional[BufferedTile] = None, + ) -> bool: + ... + + def extract_subset( + self, input_data_tiles: List[Tuple[BufferedTile, Any]], out_tile: BufferedTile + ) -> Any: + ... + + def read(self, output_tile: BufferedTile) -> Any: + ... + + def empty(self, process_tile: BufferedTile) -> Any: + ... + + def open(self, tile: BufferedTile, process: "MapcheteProcess") -> InputTileProtocol: + ... + + def for_web(self, data) -> np.ndarray: + ... + + +class FileSystemOutputDataReaderProtocol: + """Minimum interface for any filesystem storage based output reader class.""" + + def get_path(self, tile: BufferedTile) -> MPath: + ... + + # STAC functionality # + ###################### + + @property + def stac_path(self) -> MPath: + ... + + @property + def stac_item_id(self) -> str: + ... + + @property + def stac_item_metadata(self) -> dict: + ... + + @property + def stac_asset_type(self) -> str: + ... + + +class OutputDataWriterProtocol(OutputDataReaderProtocol): + def write(self, process_tile: BufferedTile, data: Any) -> None: + ... + + def output_is_valid(self, process_data: Any) -> bool: + ... + + def output_cleaned(self, process_data: Any) -> Any: + ... + + def streamline_output(self, process_data: Any) -> Any: + ... + + def close( + self, + exctype: Optional[Type[BaseException]], + excinst: Optional[BaseException], + exctb: Optional[TracebackType], + ) -> None: + ... diff --git a/mapchete/processing/base.py b/mapchete/processing/base.py index b7bafc8f..f630e155 100644 --- a/mapchete/processing/base.py +++ b/mapchete/processing/base.py @@ -5,7 +5,7 @@ import os import threading from contextlib import ExitStack -from typing import Any, Iterator, List, Optional, Tuple, Union +from typing import Any, Generator, Iterator, List, Optional, Tuple, Union from cachetools import LRUCache from shapely.geometry import Polygon, base @@ -92,7 +92,7 @@ def __init__( def get_process_tiles( self, zoom: Optional[int] = None, batch_by: Optional[str] = None - ) -> Iterator[BufferedTile]: + ) -> Generator[BufferedTile, None, None]: """ Yield process tiles. @@ -113,6 +113,8 @@ def get_process_tiles( BufferedTile objects """ logger.debug("get process tiles...") + # if batch_by is None and hasattr(self.config.output_reader, "tile_path_schema"): + # batch_by = batch_sort_property(self.config.output_reader.tile_path_schema) if zoom or zoom == 0: for tile in self.config.process_pyramid.tiles_from_geom( self.config.area_at_zoom(zoom), @@ -130,8 +132,8 @@ def get_process_tiles( def skip_tiles( self, - tiles: Iterator[BufferedTile] = None, - tiles_batches: Iterator[Iterator[BufferedTile]] = None, + tiles: Optional[Iterator[BufferedTile]] = None, + tiles_batches: Optional[Iterator[Iterator[BufferedTile]]] = None, ) -> Iterator[Tuple[BufferedTile, bool]]: """ Quickly determine whether tiles can be skipped for processing. @@ -161,9 +163,11 @@ def skip_tiles( for batch in tiles_batches: for tile in batch: yield (tile, False) - else: + elif tiles: for tile in tiles: yield (tile, False) + else: # pragma: no cover + raise TypeError("either tiles or tiles_batches required") def tasks( self, @@ -204,7 +208,7 @@ def execute( tile: Optional[TileLike] = None, executor: Optional[ExecutorBase] = None, concurrency: Concurrency = Concurrency.none, - workers: int = os.cpu_count(), + workers: Optional[int] = os.cpu_count(), propagate_results: bool = False, dask_settings: DaskSettings = DaskSettings(), remember_preprocessing_results: bool = False, @@ -772,15 +776,15 @@ def _tile_task_batches( overview_parents = set() for i, zoom in enumerate(zoom_levels.descending()): tile_tasks = [] - + if hasattr(process.config.output_reader, "tile_path_schema"): + batch_by = batch_sort_property( + process.config.output_reader.tile_path_schema + ) + else: + batch_by = "row" for tile in _filter_skipable( process=process, - tiles_batches=process.get_process_tiles( - zoom, - batch_by=batch_sort_property( - process.config.output_reader.tile_path_schema - ), - ), + tiles_batches=process.get_process_tiles(zoom, batch_by=batch_by), overview_tiles=( overview_parents if process.config.baselevels and i else None ), diff --git a/mapchete/processing/tasks.py b/mapchete/processing/tasks.py index b4e5820f..fa863940 100644 --- a/mapchete/processing/tasks.py +++ b/mapchete/processing/tasks.py @@ -52,7 +52,7 @@ class Task(ABC): id: str func: Callable fargs: Tuple - fkwargs: Tuple + fkwargs: dict dependencies: Dict[str, TaskInfo] result_key_name: str geometry: Optional[Union[base.BaseGeometry, dict]] = None @@ -61,17 +61,17 @@ class Task(ABC): def __init__( self, + func: Callable, id: Optional[str] = None, - func: Optional[Callable] = None, fargs: Optional[Tuple] = None, - fkwargs: Optional[Tuple] = None, + fkwargs: Optional[dict] = None, dependencies: Optional[Dict[str, TaskInfo]] = None, result_key_name: Optional[str] = None, geometry: Optional[Union[base.BaseGeometry, dict]] = None, bounds: Optional[BoundsLike] = None, ): - self.id = id or uuid4().hex self.func = func + self.id = id or uuid4().hex self.fargs = fargs or () self.fkwargs = fkwargs or {} self.dependencies = dependencies or {} @@ -248,7 +248,7 @@ def __init__( self.mode = config.mode self.output_reader = config.output_reader if config.baselevels else None self._dependencies = dict() - super().__init__(id=self.id, geometry=tile.bbox) + super().__init__(self.func, id=self.id, geometry=tile.bbox) def __repr__(self): # pragma: no cover return f"TileTask(id={self.id}, tile={self.tile}, bounds={self.bounds})" From bf5a34cc5aae26dbfce32c933ec6b3a5588004b4 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 27 Mar 2024 09:27:47 +0100 Subject: [PATCH 08/28] update tests --- test/test_processing_tasks.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/test/test_processing_tasks.py b/test/test_processing_tasks.py index d9ea55e9..bf25c62b 100644 --- a/test/test_processing_tasks.py +++ b/test/test_processing_tasks.py @@ -11,27 +11,31 @@ from mapchete.tile import BufferedTilePyramid +def dummy_func(*args, **kwargs): + return + + def test_task_geo_interface(): - task = Task(bounds=(0, 1, 2, 3)) + task = Task(dummy_func, bounds=(0, 1, 2, 3)) assert not shape(task).is_empty def test_task_errors(): # provide geometry and bounds at the same time with pytest.raises(ValueError): - Task(geometry="foo", bounds="bar") + Task(dummy_func, geometry="foo", bounds="bar") # task has no geo information with pytest.raises(NoTaskGeometry): - Task().__geo_interface__ + Task(dummy_func).__geo_interface__ # invalid dependencies with pytest.raises(TypeError): - Task().add_dependencies("invalid") + Task(dummy_func).add_dependencies("invalid") def test_task_dict(): - task_dict = Task(bounds=(0, 1, 2, 3)).to_dict() + task_dict = Task(dummy_func, bounds=(0, 1, 2, 3)).to_dict() assert "geometry" in task_dict assert "properties" in task_dict assert "id" in task_dict @@ -39,7 +43,7 @@ def test_task_dict(): def test_task_dependencies(): - task = Task() + task = Task(dummy_func) task.add_dependencies({"foo": "bar"}) assert "foo" in task.dependencies @@ -62,7 +66,7 @@ def test_task_batches(): for task in batch: assert isinstance(task, Task) - other_task = Task(bounds=(0, 1, 2, 3)) + other_task = Task(dummy_func, bounds=(0, 1, 2, 3)) assert len(batch.intersection(other_task)) == 10 assert len(batch.intersection((0, 1, 2, 3))) == 10 @@ -96,7 +100,7 @@ def test_tile_task_batch(dem_to_hillshade): tile_task = TileTask(tile=other_tile, config=dem_to_hillshade.mp().config) assert len(tile_task_batch.intersection(tile_task)) == 4 - task = Task(bounds=other_tile.bounds) + task = Task(dummy_func, bounds=other_tile.bounds) assert len(tile_task_batch.intersection(task)) == 4 assert len(tile_task_batch.intersection(task.bounds)) == 4 @@ -142,12 +146,12 @@ def test_task_batches_mixed_geometries(): assert isinstance(task, Task) # other task intersecting with all tasks - other_task = Task(bounds=(0, 1, 2, 3)) + other_task = Task(dummy_func, bounds=(0, 1, 2, 3)) assert len(batch.intersection(other_task)) == 20 assert len(batch.intersection((0, 1, 2, 3))) == 20 # other task not intersecting with spatial tasks - other_task = Task(bounds=(3, 4, 5, 6)) + other_task = Task(dummy_func, bounds=(3, 4, 5, 6)) assert len(batch.intersection(other_task)) == 10 assert len(batch.intersection((3, 4, 5, 6))) == 10 From 9828af863d545e289ad74e0de56747c4133b88c1 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 27 Mar 2024 10:12:10 +0100 Subject: [PATCH 09/28] add Empty exception as alias for MapcheteNodataTile; fix typing concerns in processes --- mapchete/__init__.py | 3 ++- mapchete/errors.py | 4 ++++ mapchete/io/raster/array.py | 12 ++++++++---- mapchete/processes/clip.py | 6 +++--- mapchete/processes/contours.py | 12 +++++++----- mapchete/processes/convert.py | 17 ++++++++++------- mapchete/processes/hillshade.py | 9 +++++---- 7 files changed, 39 insertions(+), 24 deletions(-) diff --git a/mapchete/__init__.py b/mapchete/__init__.py index 0aab4291..9b943f00 100644 --- a/mapchete/__init__.py +++ b/mapchete/__init__.py @@ -5,7 +5,7 @@ from fsspec import AbstractFileSystem from mapchete.config import MapcheteConfig -from mapchete.errors import MapcheteNodataTile +from mapchete.errors import Empty, MapcheteNodataTile from mapchete.executor import Executor, MFuture from mapchete.formats import read_output_metadata from mapchete.formats.protocols import ( @@ -26,6 +26,7 @@ "MapcheteProcess", "Timer", "Executor", + "Empty", "MapcheteNodataTile", "MFuture", "RasterInput", diff --git a/mapchete/errors.py b/mapchete/errors.py index e20c221e..a59e8e03 100644 --- a/mapchete/errors.py +++ b/mapchete/errors.py @@ -37,6 +37,10 @@ class MapcheteNodataTile(Exception): """Indicates an empty tile.""" +class Empty(MapcheteNodataTile): + """Short alias for MapcheteNodataTile.""" + + class GeometryTypeError(TypeError): """Raised when geometry type does not fit.""" diff --git a/mapchete/io/raster/array.py b/mapchete/io/raster/array.py index 2b24b491..04ff6b62 100644 --- a/mapchete/io/raster/array.py +++ b/mapchete/io/raster/array.py @@ -133,9 +133,11 @@ def resample_from_array( data=np.stack(array), mask=np.stack( [ - band.mask - if isinstance(band, ma.masked_array) - else np.where(band == nodata, True, False) + ( + band.mask + if isinstance(band, ma.masked_array) + else np.where(band == nodata, True, False) + ) for band in array ] ), @@ -296,7 +298,7 @@ def clip_array_with_vector( geometries: List[dict], inverted: bool = False, clip_buffer: int = 0, -): +) -> ma.MaskedArray: """ Clip input array with a vector list. @@ -352,6 +354,8 @@ def clip_array_with_vector( invert=inverted, ) return ma.masked_array(array, mask=np.stack([mask for band in array])) + else: # pragma: no cover + raise ValueError("array has to be 2D or 3D") # if no geometries, return unmasked array else: diff --git a/mapchete/processes/clip.py b/mapchete/processes/clip.py index bc94ca36..54749c1e 100644 --- a/mapchete/processes/clip.py +++ b/mapchete/processes/clip.py @@ -3,7 +3,7 @@ import numpy.ma as ma -from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete import Empty, RasterInput, VectorInput from mapchete.io import MatchingMethod from mapchete.io.raster.array import clip_array_with_vector from mapchete.types import BandIndexes, ResamplingLike @@ -31,10 +31,10 @@ def execute( clip_geom = clip.read() if not clip_geom: logger.debug("no clip data over tile") - raise MapcheteNodataTile + raise Empty if inp.is_empty(): - raise MapcheteNodataTile + raise Empty logger.debug("reading input data") input_data = inp.read( diff --git a/mapchete/processes/contours.py b/mapchete/processes/contours.py index 1a4708e6..991c7b42 100644 --- a/mapchete/processes/contours.py +++ b/mapchete/processes/contours.py @@ -1,4 +1,5 @@ """Contour line extraction using matplotlib.""" + import logging from typing import List, Optional @@ -6,7 +7,7 @@ from shapely.geometry import LineString, mapping, shape from shapely.ops import unary_union -from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete import Empty, RasterInput, VectorInput from mapchete.io import MatchingMethod from mapchete.tile import BufferedTile from mapchete.types import ResamplingLike @@ -77,10 +78,10 @@ def execute( clip_geom = [] if not clip_geom: logger.debug("no clip data over tile") - raise MapcheteNodataTile + raise Empty if dem.is_empty(): - raise MapcheteNodataTile + raise Empty logger.debug("reading input raster") dem_data = dem.read( @@ -93,7 +94,7 @@ def execute( ) if dem_data.mask.all(): logger.debug("raster empty") - raise MapcheteNodataTile + raise Empty logger.debug("calculate hillshade") contour_lines = contours( @@ -150,12 +151,13 @@ def contours( index += 1 paths = contours.collections[level].get_paths() for path in paths: + breakpoint() out_coords = [ ( tile.left + (y * tile.pixel_x_size), tile.top - (x * tile.pixel_y_size), ) - for x, y in zip(path.vertices[:, 1], path.vertices[:, 0]) + for x, y in np.asarray(path.vertices) ] if len(out_coords) >= 2: out_contours.append( diff --git a/mapchete/processes/convert.py b/mapchete/processes/convert.py index 1e2a9911..ae0ec8fe 100644 --- a/mapchete/processes/convert.py +++ b/mapchete/processes/convert.py @@ -1,11 +1,12 @@ import logging from typing import List, Optional, Union +import numpy as np import numpy.ma as ma from numpy.typing import DTypeLike from rasterio.dtypes import dtype_ranges -from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete import Empty, RasterInput, VectorInput from mapchete.io import MatchingMethod from mapchete.io.raster.array import clip_array_with_vector from mapchete.types import BandIndexes, ResamplingLike @@ -25,9 +26,9 @@ def execute( clip_pixelbuffer: int = 0, scale_ratio: float = 1.0, scale_offset: float = 0.0, - clip_to_output_dtype: Optional[DTypeLike] = None, + clip_to_output_dtype: Optional[str] = None, **kwargs, -) -> Union[ma.MaskedArray, List[dict]]: +) -> Union[np.ndarray, List[dict]]: """ Convert and optionally clip input raster or vector data. @@ -82,10 +83,10 @@ def execute( clip_geom = clip.read() if not clip_geom: logger.debug("no clip data over tile") - raise MapcheteNodataTile + raise Empty if inp.is_empty(): - raise MapcheteNodataTile + raise Empty logger.debug("reading input data") if isinstance(inp, RasterInput): @@ -104,8 +105,10 @@ def execute( logger.debug("apply scale ratio %s", scale_ratio) input_data = input_data.astype("float64", copy=False) * scale_ratio if ( - scale_offset != 0.0 or scale_ratio != 1.0 - ) and clip_to_output_dtype in dtype_ranges: + clip_to_output_dtype + and (scale_offset != 0.0 or scale_ratio != 1.0) + and clip_to_output_dtype in dtype_ranges + ): logger.debug("clip to output dtype ranges") input_data.clip(*dtype_ranges[clip_to_output_dtype], out=input_data) diff --git a/mapchete/processes/hillshade.py b/mapchete/processes/hillshade.py index 167d5953..a4214688 100644 --- a/mapchete/processes/hillshade.py +++ b/mapchete/processes/hillshade.py @@ -33,6 +33,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ + import logging import math import warnings @@ -43,7 +44,7 @@ import numpy.ma as ma from affine import Affine -from mapchete import MapcheteNodataTile, RasterInput, VectorInput +from mapchete import Empty, RasterInput, VectorInput from mapchete.io import MatchingMethod from mapchete.types import ResamplingLike @@ -73,10 +74,10 @@ def execute( clip_geom = clip.read() if not clip_geom: logger.debug("no clip data over tile") - raise MapcheteNodataTile + raise Empty if dem.is_empty(): - raise MapcheteNodataTile + raise Empty logger.debug("reading input raster") elevation_data = dem.read( @@ -88,7 +89,7 @@ def execute( ) if elevation_data.mask.all(): - raise MapcheteNodataTile + raise Empty logger.debug("calculate hillshade") return hillshade( From 186c39c3cedf85273cd529206ddbe7447a44e6aa Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 27 Mar 2024 10:34:37 +0100 Subject: [PATCH 10/28] run 'ruff check . --fix' --- mapchete/cli/default/cp.py | 1 - mapchete/commands/_index.py | 1 - mapchete/config/base.py | 4 ++-- mapchete/executor/dask.py | 1 - mapchete/formats/base.py | 2 -- mapchete/formats/default/flatgeobuf.py | 1 - mapchete/formats/tools.py | 1 - mapchete/index.py | 1 - mapchete/io/raster/convert.py | 2 -- mapchete/io/vector.py | 4 ++-- mapchete/path.py | 1 - mapchete/processes/convert.py | 2 -- mapchete/processing/base.py | 9 +-------- mapchete/processing/tasks.py | 2 +- test/test_config.py | 1 - test/test_deprecated.py | 1 - test/test_formats_flatgeobuf.py | 1 - test/test_processes.py | 1 - test/test_processing_tasks.py | 3 --- 19 files changed, 6 insertions(+), 33 deletions(-) diff --git a/mapchete/cli/default/cp.py b/mapchete/cli/default/cp.py index bd677b03..93948c72 100644 --- a/mapchete/cli/default/cp.py +++ b/mapchete/cli/default/cp.py @@ -1,5 +1,4 @@ import click -import tqdm from mapchete import commands from mapchete.cli import options diff --git a/mapchete/commands/_index.py b/mapchete/commands/_index.py index ac81fec0..a8e4ea8f 100644 --- a/mapchete/commands/_index.py +++ b/mapchete/commands/_index.py @@ -13,7 +13,6 @@ from mapchete.config.parse import bounds_from_opts, raw_conf from mapchete.enums import InputType from mapchete.index import zoom_index_gen -from mapchete.path import MPath from mapchete.types import MPathLike, Progress logger = logging.getLogger(__name__) diff --git a/mapchete/config/base.py b/mapchete/config/base.py index 3c3be453..c3e1f7ee 100644 --- a/mapchete/config/base.py +++ b/mapchete/config/base.py @@ -949,12 +949,12 @@ def _unflatten_tree(flat): # there are more branches else: # create new dict - if not path[0] in tree: + if path[0] not in tree: tree[path[0]] = _unflatten_tree({"/".join(path[1:]): value}) # add keys to existing dict else: branch = _unflatten_tree({"/".join(path[1:]): value}) - if not path[1] in tree[path[0]]: + if path[1] not in tree[path[0]]: tree[path[0]][path[1]] = branch[path[1]] else: tree[path[0]][path[1]].update(branch[path[1]]) diff --git a/mapchete/executor/dask.py b/mapchete/executor/dask.py index 18907152..7d5d4da5 100644 --- a/mapchete/executor/dask.py +++ b/mapchete/executor/dask.py @@ -12,7 +12,6 @@ from mapchete.executor.future import MFuture from mapchete.executor.types import Result from mapchete.pretty import pretty_bytes -from mapchete.timer import Timer logger = logging.getLogger(__name__) diff --git a/mapchete/formats/base.py b/mapchete/formats/base.py index 9061a50a..76d965c5 100644 --- a/mapchete/formats/base.py +++ b/mapchete/formats/base.py @@ -14,7 +14,6 @@ import numpy.ma as ma from pydantic import NonNegativeInt from shapely.geometry import shape -from shapely.geometry.base import BaseGeometry from mapchete.config import get_hash from mapchete.errors import MapcheteNodataTile, MapcheteProcessOutputError @@ -22,7 +21,6 @@ # from mapchete.formats.models import BaseInputParams from mapchete.formats.protocols import InputDataProtocol, InputTileProtocol -from mapchete.io import path_exists from mapchete.io.raster import ( create_mosaic, extract_from_array, diff --git a/mapchete/formats/default/flatgeobuf.py b/mapchete/formats/default/flatgeobuf.py index 4ec21cbe..5d0d90e4 100644 --- a/mapchete/formats/default/flatgeobuf.py +++ b/mapchete/formats/default/flatgeobuf.py @@ -21,7 +21,6 @@ import warnings from mapchete.formats.default import _fiona_base -from mapchete.formats.protocols import VectorInput METADATA = {"driver_name": "FlatGeobuf", "data_type": "vector", "mode": "rw"} diff --git a/mapchete/formats/tools.py b/mapchete/formats/tools.py index 7d26c713..f64848ba 100644 --- a/mapchete/formats/tools.py +++ b/mapchete/formats/tools.py @@ -15,7 +15,6 @@ from shapely.geometry.base import BaseGeometry from mapchete.errors import MapcheteConfigError, MapcheteDriverError -from mapchete.formats.models import DriverMetadata from mapchete.io import MPath, fiona_open, rasterio_open from mapchete.registered import drivers from mapchete.tile import BufferedTilePyramid diff --git a/mapchete/index.py b/mapchete/index.py index 568bd450..39217732 100644 --- a/mapchete/index.py +++ b/mapchete/index.py @@ -341,7 +341,6 @@ class VRTFileWriter: def __init__(self, out_path=None, output=None, out_pyramid=None): # see if lxml is installed before checking all output tiles - from lxml.builder import ElementMaker self.path = out_path self._tp = out_pyramid diff --git a/mapchete/io/raster/convert.py b/mapchete/io/raster/convert.py index 02e74986..ae0aa93d 100644 --- a/mapchete/io/raster/convert.py +++ b/mapchete/io/raster/convert.py @@ -2,8 +2,6 @@ from mapchete.io import copy from mapchete.io.raster.open import rasterio_open -from mapchete.io.raster.read import read_raster_window -from mapchete.io.raster.referenced_raster import ReferencedRaster from mapchete.path import MPath logger = logging.getLogger(__name__) diff --git a/mapchete/io/vector.py b/mapchete/io/vector.py index e9f3d473..e202fc2c 100644 --- a/mapchete/io/vector.py +++ b/mapchete/io/vector.py @@ -8,7 +8,7 @@ from typing import Any, Union import fiona -from fiona.errors import DriverError, FionaError, FionaValueError +from fiona.errors import DriverError from fiona.io import MemoryFile from rasterio.crs import CRS from retry import retry @@ -26,7 +26,7 @@ segmentize_geometry, to_shape, ) -from mapchete.path import MPath, fs_from_path, path_exists +from mapchete.path import MPath, fs_from_path from mapchete.settings import IORetrySettings from mapchete.types import Bounds from mapchete.validate import validate_bounds diff --git a/mapchete/path.py b/mapchete/path.py index 99bc91a8..ee662cd6 100644 --- a/mapchete/path.py +++ b/mapchete/path.py @@ -19,7 +19,6 @@ from rasterio.session import Session as RioSession from retry import retry -import mapchete from mapchete.executor import Executor from mapchete.settings import GDALHTTPOptions, IORetrySettings, mapchete_options from mapchete.tile import BufferedTile diff --git a/mapchete/processes/convert.py b/mapchete/processes/convert.py index ae0ec8fe..eab55782 100644 --- a/mapchete/processes/convert.py +++ b/mapchete/processes/convert.py @@ -2,8 +2,6 @@ from typing import List, Optional, Union import numpy as np -import numpy.ma as ma -from numpy.typing import DTypeLike from rasterio.dtypes import dtype_ranges from mapchete import Empty, RasterInput, VectorInput diff --git a/mapchete/processing/base.py b/mapchete/processing/base.py index f630e155..d358865c 100644 --- a/mapchete/processing/base.py +++ b/mapchete/processing/base.py @@ -14,14 +14,7 @@ from mapchete.config import DaskSettings, MapcheteConfig from mapchete.enums import Concurrency, ProcessingMode from mapchete.errors import MapcheteNodataTile, ReprojectionFailed -from mapchete.executor import ( - MULTIPROCESSING_DEFAULT_START_METHOD, - DaskExecutor, - Executor, - ExecutorBase, - MFuture, -) -from mapchete.executor.base import func_partial +from mapchete.executor import Executor, ExecutorBase, MFuture from mapchete.executor.types import Profiler from mapchete.path import batch_sort_property, tiles_exist from mapchete.processing.execute import batches, dask_graph, single_batch diff --git a/mapchete/processing/tasks.py b/mapchete/processing/tasks.py index fa863940..fec09480 100644 --- a/mapchete/processing/tasks.py +++ b/mapchete/processing/tasks.py @@ -36,7 +36,7 @@ from mapchete.processing.types import TaskInfo, default_tile_task_id from mapchete.tile import BufferedTile from mapchete.timer import Timer -from mapchete.types import Bounds, BoundsLike, TileLike, ZoomLevels, ZoomLevelsLike +from mapchete.types import Bounds, BoundsLike, TileLike, ZoomLevels from mapchete.validate import validate_bounds logger = logging.getLogger(__name__) diff --git a/test/test_config.py b/test/test_config.py index d4d7dd5f..1bbd7e08 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -4,7 +4,6 @@ import oyaml as yaml import pytest -from fiona.errors import DriverError from pydantic import ValidationError from shapely import wkt from shapely.errors import WKTReadingError diff --git a/test/test_deprecated.py b/test/test_deprecated.py index 098f3398..aeb81ec6 100644 --- a/test/test_deprecated.py +++ b/test/test_deprecated.py @@ -3,7 +3,6 @@ import pytest import mapchete -from mapchete.errors import MapcheteProcessImportError def test_parse_deprecated(deprecated_params): diff --git a/test/test_formats_flatgeobuf.py b/test/test_formats_flatgeobuf.py index 41c26282..4c613751 100644 --- a/test/test_formats_flatgeobuf.py +++ b/test/test_formats_flatgeobuf.py @@ -1,6 +1,5 @@ """Test FlatGeobuf as process output.""" import pytest -from shapely.geometry import shape import mapchete from mapchete import formats diff --git a/test/test_processes.py b/test/test_processes.py index c818a281..b25dead4 100644 --- a/test/test_processes.py +++ b/test/test_processes.py @@ -4,7 +4,6 @@ import numpy.ma as ma import pytest -import mapchete from mapchete import MapcheteNodataTile from mapchete.processes import contours, convert, hillshade from mapchete.processes.examples import example_process diff --git a/test/test_processing_tasks.py b/test/test_processing_tasks.py index bf25c62b..9520f373 100644 --- a/test/test_processing_tasks.py +++ b/test/test_processing_tasks.py @@ -2,13 +2,10 @@ import pytest from shapely.geometry import shape -from shapely.ops import unary_union from mapchete.errors import NoTaskGeometry -from mapchete.executor import Executor from mapchete.processing.tasks import Task, TaskBatch, Tasks, TileTask, TileTaskBatch from mapchete.testing import ProcessFixture -from mapchete.tile import BufferedTilePyramid def dummy_func(*args, **kwargs): From 5f95838fc50d73b9cbf1189e4e9534b5bde05dd6 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 27 Mar 2024 10:37:28 +0100 Subject: [PATCH 11/28] remove breakpoint --- mapchete/config/base.py | 2 ++ mapchete/processes/contours.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mapchete/config/base.py b/mapchete/config/base.py index c3e1f7ee..5da82f30 100644 --- a/mapchete/config/base.py +++ b/mapchete/config/base.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import hashlib import logging import warnings diff --git a/mapchete/processes/contours.py b/mapchete/processes/contours.py index 991c7b42..19e6debf 100644 --- a/mapchete/processes/contours.py +++ b/mapchete/processes/contours.py @@ -151,7 +151,6 @@ def contours( index += 1 paths = contours.collections[level].get_paths() for path in paths: - breakpoint() out_coords = [ ( tile.left + (y * tile.pixel_x_size), From 68daeea0120c58330bb34003dd977396d81e3e50 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 28 Mar 2024 15:04:54 +0100 Subject: [PATCH 12/28] add examples --- examples/contours/contours.mapchete | 20 ++++++++++++ examples/custom_grid/custom_grid.mapchete | 29 +++++++++++++++++ examples/file_groups/file_groups.mapchete | 20 ++++++++++++ examples/file_groups/file_groups.py | 21 ++++++++++++ examples/hillshade/hillshade.mapchete | 21 ++++++++++++ mapchete/config/process_func.py | 8 ++--- mapchete/formats/protocols.py | 4 +-- test/conftest.py | 39 ++++++++++++++++++++++- test/example.mapchete | 1 + test/test_config.py | 8 +++-- test/test_examples.py | 24 ++++++++++++++ test/testdata/file_groups.mapchete | 4 +-- test/testdata/file_groups.py | 21 ++++++++++++ 13 files changed, 207 insertions(+), 13 deletions(-) create mode 100644 examples/contours/contours.mapchete create mode 100644 examples/custom_grid/custom_grid.mapchete create mode 100644 examples/file_groups/file_groups.mapchete create mode 100644 examples/file_groups/file_groups.py create mode 100644 examples/hillshade/hillshade.mapchete create mode 100644 test/test_examples.py create mode 100644 test/testdata/file_groups.py diff --git a/examples/contours/contours.mapchete b/examples/contours/contours.mapchete new file mode 100644 index 00000000..bae9c885 --- /dev/null +++ b/examples/contours/contours.mapchete @@ -0,0 +1,20 @@ +process: mapchete.processes.contours +zoom_levels: + min: 0 + max: 8 +pyramid: + grid: geodetic + metatiling: 1 + pixelbuffer: 2 +input: + dem: ../../test/testdata/cleantopo_tl.tif +output: + format: GeoJSON + path: output + schema: + properties: + elevation: float + geometry: LineString +process_parameters: + field: "elevation" + interval: 100 \ No newline at end of file diff --git a/examples/custom_grid/custom_grid.mapchete b/examples/custom_grid/custom_grid.mapchete new file mode 100644 index 00000000..73d338d8 --- /dev/null +++ b/examples/custom_grid/custom_grid.mapchete @@ -0,0 +1,29 @@ +process: mapchete.processes.convert +input: + inp: ../../test/testdata/custom_grid_points.geojson +output: + format: GeoJSON + path: output + schema: + properties: + name: str + geometry: Unknown +pyramid: + grid: + shape: + - 1525 + - 125 + bounds: + - 166020 + - 0 + - 934020 + - 9369600 + is_global: false + proj: +proj=utm +zone=32 +datum=WGS84 +units=m +no_defs + metatiling: 4 +zoom_levels: 3 +bounds: +- 166020 +- 0 +- 934020 +- 9369600 diff --git a/examples/file_groups/file_groups.mapchete b/examples/file_groups/file_groups.mapchete new file mode 100644 index 00000000..7065662e --- /dev/null +++ b/examples/file_groups/file_groups.mapchete @@ -0,0 +1,20 @@ +process: file_groups.py +zoom_levels: + min: 0 + max: 5 +pyramid: + grid: geodetic + pixelbuffer: 10 + metatiling: 8 +input: + group1: + file1: ../../test/testdata/cleantopo_tl.tif + file2: ../../test/testdata/cleantopo_br.tif + group2: + file1: ../../test/testdata/dummy1.tif + file2: ../../test/testdata/dummy2.tif +output: + dtype: uint16 + bands: 1 + format: GTiff + path: output diff --git a/examples/file_groups/file_groups.py b/examples/file_groups/file_groups.py new file mode 100644 index 00000000..b84f53c2 --- /dev/null +++ b/examples/file_groups/file_groups.py @@ -0,0 +1,21 @@ +"""Example process file.""" + +import numpy.ma as ma + +from mapchete import RasterInputGroup + + +def execute(group1: RasterInputGroup, group2: RasterInputGroup) -> ma.MaskedArray: + """User defined process.""" + + # read band 1 and get mean of group1 + group1 = ma.mean( + ma.stack([raster_input.read(1) for _, raster_input in group1]), axis=0 + ) + + # read band 1 and get mean of group1 + group2 = ma.mean( + ma.stack([raster_input.read(1) for _, raster_input in group2]), axis=0 + ) + + return ma.mean(ma.stack([group1, group2]), axis=0) diff --git a/examples/hillshade/hillshade.mapchete b/examples/hillshade/hillshade.mapchete new file mode 100644 index 00000000..7eea53da --- /dev/null +++ b/examples/hillshade/hillshade.mapchete @@ -0,0 +1,21 @@ +process: mapchete.processes.hillshade +zoom_levels: + min: 0 + max: 8 +pyramid: + grid: geodetic + metatiling: 1 + pixelbuffer: 2 +input: + dem: ../../test/testdata/cleantopo_tl.tif +output: + path: output + format: GTiff + dtype: uint8 + bands: 1 +process_parameters: + resampling: "nearest" + azimuth: 315.0 + altitude: 45.0 + z: 1.0 + scale: 1.0 diff --git a/mapchete/config/process_func.py b/mapchete/config/process_func.py index 8a3a614c..2841f141 100644 --- a/mapchete/config/process_func.py +++ b/mapchete/config/process_func.py @@ -50,11 +50,9 @@ def __init__(self, src, config_dir=None, run_compile=True): # this also serves as a validation step for the function logger.debug("validate process function") func = self._load_func() - self.function_parameters = dict(**inspect.signature(func).parameters) def __call__(self, *args, **kwargs: Any) -> Any: - args = args kwargs = self.filter_parameters(kwargs) return self._load_func()(*args, **kwargs) @@ -62,7 +60,7 @@ def analyze_parameters( self, parameters_per_zoom: Dict[int, ZoomParameters] ) -> None: for zoom, config_parameters in parameters_per_zoom.items(): - # make sure parameters with no defaults are given, except of magical "mp" object + # make sure parameters with no defaults are present in configuration, except of magical "mp" object for name, param in self.function_parameters.items(): if param.default == inspect.Parameter.empty and name not in [ "mp", @@ -91,11 +89,11 @@ def analyze_parameters( f"zoom {zoom}: parameter '{param_name}' is set in the process configuration but not a process function parameter" ) - def filter_parameters(self, params): + def filter_parameters(self, kwargs): """Return function kwargs.""" return { k: v - for k, v in params.items() + for k, v in kwargs.items() if k in self.function_parameters and v is not None } diff --git a/mapchete/formats/protocols.py b/mapchete/formats/protocols.py index 1abf8fa2..1bd8f459 100644 --- a/mapchete/formats/protocols.py +++ b/mapchete/formats/protocols.py @@ -59,8 +59,8 @@ def read( ... -RasterInputGroup = List[RasterInput] -VectorInputGroup = List[VectorInput] +RasterInputGroup = List[Tuple[str, RasterInput]] +VectorInputGroup = List[Tuple[str, VectorInput]] class InputDataProtocol(Protocol): diff --git a/test/conftest.py b/test/conftest.py index 958110d7..08463c6f 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -31,7 +31,8 @@ S3_ENDPOINT_URL = "localhost:9000" SCRIPT_DIR = MPath(os.path.dirname(os.path.realpath(__file__))) -TESTDATA_DIR = MPath(os.path.join(SCRIPT_DIR, "testdata/")) +EXAMPLES_DIR = SCRIPT_DIR.parent / "examples/" +TESTDATA_DIR = SCRIPT_DIR / "testdata/" HTTP_TESTDATA_DIR = MPath("http://localhost/open/") SECURE_HTTP_TESTDATA_DIR = MPath( "http://localhost/secure/", @@ -1108,3 +1109,39 @@ def threads_executor(): """ConcurrentFuturesExecutor()""" with ConcurrentFuturesExecutor(concurrency="threads") as executor: yield executor + + +@pytest.fixture +def example_contours(mp_tmpdir): + """Fixture for examples/contours/contours.mapchete.""" + with ProcessFixture( + EXAMPLES_DIR / "contours/contours.mapchete", output_tempdir=mp_tmpdir + ) as example: + yield example + + +@pytest.fixture +def example_custom_grid(mp_tmpdir): + """Fixture for examples/custom_grid/custom_grid.mapchete.""" + with ProcessFixture( + EXAMPLES_DIR / "custom_grid/custom_grid.mapchete", output_tempdir=mp_tmpdir + ) as example: + yield example + + +@pytest.fixture +def example_file_groups(mp_tmpdir): + """Fixture for examples/file_groups/file_groups.mapchete.""" + with ProcessFixture( + EXAMPLES_DIR / "file_groups/file_groups.mapchete", output_tempdir=mp_tmpdir + ) as example: + yield example + + +@pytest.fixture +def example_hillshade(mp_tmpdir): + """Fixture for examples/hillshade/hillshade.mapchete.""" + with ProcessFixture( + EXAMPLES_DIR / "hillshade/hillshade.mapchete", output_tempdir=mp_tmpdir + ) as example: + yield example diff --git a/test/example.mapchete b/test/example.mapchete index 48987ea4..527bd3ab 100644 --- a/test/example.mapchete +++ b/test/example.mapchete @@ -22,6 +22,7 @@ pyramid: input: file1: + zoom<9: testdata/dummy2.tif zoom>=10: testdata/dummy1.tif file2: testdata/dummy2.tif output: diff --git a/test/test_config.py b/test/test_config.py index 1bbd7e08..c39b7371 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -74,7 +74,8 @@ def test_config_zoom7(example_mapchete, dummy2_tif): config = MapcheteConfig(example_mapchete.dict) zoom7 = config.params_at_zoom(7) input_files = zoom7["input"] - assert input_files["file1"] is None + assert input_files["file1"] is not None + assert str(input_files["file1"].path) == dummy2_tif assert str(input_files["file2"].path) == dummy2_tif assert zoom7["process_parameters"]["some_integer_parameter"] == 12 assert zoom7["process_parameters"]["some_float_parameter"] == 5.3 @@ -233,10 +234,11 @@ def test_read_baselevels(baselevels): def test_empty_input(file_groups): - """Verify configuration gets parsed without input files.""" + """Input has to be defined if required by process.""" config = file_groups.dict config.update(input=None) - assert mapchete.open(config) + with pytest.raises(MapcheteConfigError): + mapchete.open(config) def test_read_input_groups(file_groups): diff --git a/test/test_examples.py b/test/test_examples.py new file mode 100644 index 00000000..90675bb0 --- /dev/null +++ b/test/test_examples.py @@ -0,0 +1,24 @@ +import pytest +from pytest import lazy_fixture + +from mapchete.enums import Concurrency +from mapchete.testing import ProcessFixture + + +@pytest.mark.parametrize( + "process_fixture", + [ + lazy_fixture("example_contours"), + lazy_fixture("example_custom_grid"), + lazy_fixture("example_file_groups"), + lazy_fixture("example_hillshade"), + ], +) +def test_example(process_fixture: ProcessFixture): + """Runs configuration only on first process tile""" + with process_fixture.mp() as mp: + assert list( + mp.execute( + tile=process_fixture.first_process_tile(), concurrency=Concurrency.none + ) + ) diff --git a/test/testdata/file_groups.mapchete b/test/testdata/file_groups.mapchete index 7078b075..af532f91 100644 --- a/test/testdata/file_groups.mapchete +++ b/test/testdata/file_groups.mapchete @@ -1,4 +1,4 @@ -process: ../example_process.py +process: file_groups.py zoom_levels: min: 0 max: 5 @@ -24,4 +24,4 @@ output: dtype: uint16 bands: 1 format: GTiff - path: tmp/file_groups + path: foo/bar diff --git a/test/testdata/file_groups.py b/test/testdata/file_groups.py new file mode 100644 index 00000000..757285b7 --- /dev/null +++ b/test/testdata/file_groups.py @@ -0,0 +1,21 @@ +"""Example process file.""" + +import numpy.ma as ma + +from mapchete import RasterInputGroup + + +def execute( + group1: RasterInputGroup, group2: RasterInputGroup, nested_group: RasterInputGroup +) -> ma.MaskedArray: + """User defined process.""" + + # read and get mean of group1 + group1 = ma.mean(ma.stack([raster_input.read() for raster_input in group1]), axis=0) + + # read and get mean of group1 + group2 = ma.mean(ma.stack([raster_input.read() for raster_input in group2]), axis=0) + + # TODO nested_group + + return ma.mean(ma.stack([group1, group2]), axis=0) From 3d2a457b3fad07a11675eec7a35d00455aa48724 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Fri, 29 Mar 2024 11:03:57 +0100 Subject: [PATCH 13/28] remove deprecated fs kwargs for clip command; add typing --- examples/contours/contours.mapchete | 1 - mapchete/__init__.py | 8 +-- mapchete/commands/_cp.py | 83 +++++++---------------------- mapchete/processes/clip.py | 13 ++--- test/test_commands.py | 2 +- 5 files changed, 28 insertions(+), 79 deletions(-) diff --git a/examples/contours/contours.mapchete b/examples/contours/contours.mapchete index bae9c885..8198000c 100644 --- a/examples/contours/contours.mapchete +++ b/examples/contours/contours.mapchete @@ -5,7 +5,6 @@ zoom_levels: pyramid: grid: geodetic metatiling: 1 - pixelbuffer: 2 input: dem: ../../test/testdata/cleantopo_tl.tif output: diff --git a/mapchete/__init__.py b/mapchete/__init__.py index 9b943f00..60017963 100644 --- a/mapchete/__init__.py +++ b/mapchete/__init__.py @@ -79,17 +79,17 @@ def open( Mapchete a Mapchete process object """ + if fs or fs_kwargs: + raise DeprecationWarning() # convert to MPath object if possible if isinstance(some_input, str): some_input = MPath.from_inp(some_input) # for TileDirectory inputs if isinstance(some_input, MPath) and some_input.suffix == "": logger.debug("assuming TileDirectory") - metadata_json = MPath.from_inp(some_input).joinpath("metadata.json") - fs_kwargs = fs_kwargs or {} - fs = fs or fs_from_path(metadata_json, **fs_kwargs) + metadata_json = MPath.from_inp(some_input) / "metadata.json" logger.debug("read metadata.json") - metadata = read_output_metadata(metadata_json, fs=fs) + metadata = read_output_metadata(metadata_json) config = dict( process=None, input=None, diff --git a/mapchete/commands/_cp.py b/mapchete/commands/_cp.py index 7a0040a9..425559ad 100644 --- a/mapchete/commands/_cp.py +++ b/mapchete/commands/_cp.py @@ -1,20 +1,20 @@ """Copy tiles between Tile Directories.""" import logging -from contextlib import AbstractContextManager from multiprocessing import cpu_count from typing import List, Optional, Tuple, Union -from rasterio.crs import CRS +from distributed import Client from shapely.geometry import Point from shapely.geometry.base import BaseGeometry import mapchete from mapchete.commands.observer import ObserverProtocol, Observers +from mapchete.enums import Concurrency from mapchete.executor import Executor from mapchete.io import MPath, copy, tiles_exist from mapchete.io.vector import reproject_geometry -from mapchete.types import Progress +from mapchete.types import BoundsLike, CRSLike, Progress, ZoomLevelsLike logger = logging.getLogger(__name__) @@ -22,74 +22,33 @@ def cp( src_tiledir: Union[str, MPath], dst_tiledir: Union[str, MPath], - zoom: Union[int, List[int]] = None, - area: Union[BaseGeometry, str, dict] = None, - area_crs: Union[CRS, str] = None, - bounds: Tuple[float] = None, - bounds_crs: Union[CRS, str] = None, - point: Tuple[float, float] = None, - point_crs: Tuple[float, float] = None, + zoom: ZoomLevelsLike, + area: Optional[Union[BaseGeometry, str, dict]] = None, + area_crs: Optional[CRSLike] = None, + bounds: Optional[BoundsLike] = None, + bounds_crs: Optional[CRSLike] = None, + point: Optional[Tuple[float, float]] = None, + point_crs: Optional[CRSLike] = None, overwrite: bool = False, - workers: Union[int, None] = None, - multi: Union[int, None] = None, - concurrency: Union[str, None] = None, - dask_scheduler: Union[str, None] = None, - dask_client=None, + workers: Optional[int] = None, + concurrency: Concurrency = Concurrency.threads, + dask_scheduler: Optional[str] = None, + dask_client: Optional[Client] = None, src_fs_opts: Union[dict, None] = None, dst_fs_opts: Union[dict, None] = None, - executor_getter: AbstractContextManager = Executor, + executor_getter: type[Executor] = Executor, observers: Optional[List[ObserverProtocol]] = None, ): """ Copy TileDirectory from source to destination. - - Parameters - ---------- - src_tiledir : str - Source TileDirectory or mapchete file. - dst_tiledir : str - Destination TileDirectory. - zoom : integer or list of integers - Single zoom, minimum and maximum zoom or a list of zoom levels. - area : str, dict, BaseGeometry - Geometry to override bounds or area provided in process configuration. Can be either a - WKT string, a GeoJSON mapping, a shapely geometry or a path to a Fiona-readable file. - area_crs : CRS or str - CRS of area (default: process CRS). - bounds : tuple - Override bounds or area provided in process configuration. - bounds_crs : CRS or str - CRS of area (default: process CRS). - point : iterable - X and y coordinates of point whose corresponding output tile bounds will be used. - point_crs : str or CRS - CRS of point (defaults to process pyramid CRS). - overwrite : bool - Overwrite existing output. - workers : int - Number of threads used to check whether tiles exist. - concurrency : str - Concurrency to be used. Could either be "processes", "threads" or "dask". - dask_scheduler : str - URL to dask scheduler if required. - dask_client : dask.distributed.Client - Reusable Client instance if required. Otherwise a new client will be created. - src_fs_opts : dict - Configuration options for source fsspec filesystem. - dst_fs_opts : dict - Configuration options for destination fsspec filesystem. """ workers = workers or cpu_count() src_fs_opts = src_fs_opts or {} dst_fs_opts = dst_fs_opts or {} - if zoom is None: # pragma: no cover - raise ValueError("zoom level(s) required") src_tiledir = MPath.from_inp(src_tiledir, storage_options=src_fs_opts) dst_tiledir = MPath.from_inp(dst_tiledir, storage_options=dst_fs_opts) - src_fs = src_tiledir.fs - dst_fs = dst_tiledir.fs all_observers = Observers(observers) # open source tile directory @@ -100,8 +59,6 @@ def cp( area_crs=area_crs, bounds=bounds, bounds_crs=bounds_crs, - fs=src_fs, - fs_kwargs=src_fs_opts, mode="readonly", ) as src_mp: tp = src_mp.config.output_pyramid @@ -109,15 +66,13 @@ def cp( # copy metadata to destination if necessary src_metadata = src_tiledir / "metadata.json" dst_metadata = dst_tiledir / "metadata.json" - if not dst_fs.exists(dst_metadata): + if not dst_tiledir.fs.exists(dst_metadata): msg = f"copy {src_metadata} to {dst_metadata}" logger.debug(msg) all_observers.notify(message=msg) copy( src_metadata, dst_metadata, - src_fs=src_fs, - dst_fs=dst_fs, overwrite=overwrite, ) @@ -128,8 +83,6 @@ def cp( area_crs=area_crs, bounds=bounds, bounds_crs=bounds_crs, - fs=dst_fs, - fs_kwargs=dst_fs_opts, mode="readonly", ) as dst_mp: with executor_getter( @@ -185,8 +138,8 @@ def cp( dst_mp, src_tiles_exist, dst_tiles_exist, - src_fs, - dst_fs, + src_tiledir.fs, + dst_tiledir.fs, overwrite, ), ), diff --git a/mapchete/processes/clip.py b/mapchete/processes/clip.py index 54749c1e..00de7e9f 100644 --- a/mapchete/processes/clip.py +++ b/mapchete/processes/clip.py @@ -28,13 +28,10 @@ def execute( """ # read clip geometry - clip_geom = clip.read() - if not clip_geom: - logger.debug("no clip data over tile") - raise Empty - - if inp.is_empty(): - raise Empty + if clip.is_empty(): + raise Empty("no clip data over tile") + elif inp.is_empty(): + raise Empty("no data over tile") logger.debug("reading input data") input_data = inp.read( @@ -48,5 +45,5 @@ def execute( logger.debug("clipping output with geometry") # apply original nodata mask and clip return clip_array_with_vector( - input_data, inp.tile.affine, clip_geom, clip_buffer=clip_pixelbuffer + input_data, inp.tile.affine, clip.read(), clip_buffer=clip_pixelbuffer ) diff --git a/test/test_commands.py b/test/test_commands.py index 2bca9d3b..d38a6244 100644 --- a/test/test_commands.py +++ b/test/test_commands.py @@ -69,7 +69,7 @@ def test_cp(mp_tmpdir, cleantopo_br, wkt_geom): out_path, os.path.join(mp_tmpdir, "nothreads"), zoom=5, - multi=1, + workers=1, observers=[task_counter], ) assert task_counter.tasks From 714334759366a581362aeef767e6480e9a6fe71a Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Fri, 29 Mar 2024 11:09:27 +0100 Subject: [PATCH 14/28] add clip example --- examples/clip/clip.mapchete | 16 ++++++++++++++++ test/conftest.py | 9 +++++++++ test/test_examples.py | 1 + 3 files changed, 26 insertions(+) create mode 100644 examples/clip/clip.mapchete diff --git a/examples/clip/clip.mapchete b/examples/clip/clip.mapchete new file mode 100644 index 00000000..c03b57bc --- /dev/null +++ b/examples/clip/clip.mapchete @@ -0,0 +1,16 @@ +process: mapchete.processes.clip +zoom_levels: + min: 0 + max: 8 +pyramid: + grid: geodetic + metatiling: 1 +input: + inp: ../../test/testdata/cleantopo_br.tif + clip: ../../test/testdata/aoi_br.geojson + +output: + path: output + format: GTiff + dtype: uint8 + bands: 1 diff --git a/test/conftest.py b/test/conftest.py index 08463c6f..1b71186a 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1111,6 +1111,15 @@ def threads_executor(): yield executor +@pytest.fixture +def example_clip(mp_tmpdir): + """Fixture for examples/clip/clip.mapchete.""" + with ProcessFixture( + EXAMPLES_DIR / "clip/clip.mapchete", output_tempdir=mp_tmpdir + ) as example: + yield example + + @pytest.fixture def example_contours(mp_tmpdir): """Fixture for examples/contours/contours.mapchete.""" diff --git a/test/test_examples.py b/test/test_examples.py index 90675bb0..b6bd8579 100644 --- a/test/test_examples.py +++ b/test/test_examples.py @@ -8,6 +8,7 @@ @pytest.mark.parametrize( "process_fixture", [ + lazy_fixture("example_clip"), lazy_fixture("example_contours"), lazy_fixture("example_custom_grid"), lazy_fixture("example_file_groups"), From 7c0ef039a585284baad7b246f6af352c9e3ee10e Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Fri, 29 Mar 2024 11:57:42 +0100 Subject: [PATCH 15/28] add numpy array protocol to ReferencedRaster --- mapchete/io/raster/array.py | 15 ++++---- mapchete/io/raster/referenced_raster.py | 47 +++++++++++++++++++------ test/test_io_raster.py | 5 +++ 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/mapchete/io/raster/array.py b/mapchete/io/raster/array.py index 04ff6b62..752a90ee 100644 --- a/mapchete/io/raster/array.py +++ b/mapchete/io/raster/array.py @@ -50,16 +50,15 @@ def extract_from_array( if out_grid is None: # pragma: no cover raise ValueError("grid must be defined") - if hasattr(array, "affine") and hasattr(array, "data"): # pragma: no cover - array_transform, array = array.affine, array.data - elif hasattr(array, "transform") and hasattr(array, "data"): # pragma: no cover - array_transform, array = array.transform, array.data - elif array_transform is None: # pragma: no cover - raise ValueError("an Affine object is required") + from mapchete.io.raster.referenced_raster import ReferencedRaster + + raster = ReferencedRaster.from_array_like( + array, transform=array_transform, crs=out_grid.crs + ) # get range within array minrow, maxrow, mincol, maxcol = bounds_to_ranges( - bounds=out_grid.bounds, transform=array_transform + bounds=out_grid.bounds, transform=raster.transform ) # if output window is within input window if ( @@ -68,7 +67,7 @@ def extract_from_array( and maxrow <= array.shape[-2] and maxcol <= array.shape[-1] ): - return array[..., minrow:maxrow, mincol:maxcol] + return ma.array(raster)[..., minrow:maxrow, mincol:maxcol] # raise error if output is not fully within input else: raise ValueError("extraction fails if output shape is not within input") diff --git a/mapchete/io/raster/referenced_raster.py b/mapchete/io/raster/referenced_raster.py index 91625709..deca1b3d 100644 --- a/mapchete/io/raster/referenced_raster.py +++ b/mapchete/io/raster/referenced_raster.py @@ -1,11 +1,14 @@ +from __future__ import annotations + import logging import warnings -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Union import numpy as np import numpy.ma as ma from affine import Affine -from shapely.geometry import box, mapping +from rasterio.transform import array_bounds +from shapely.geometry import mapping, shape from mapchete.io.raster.array import resample_from_array from mapchete.io.raster.open import rasterio_open @@ -13,7 +16,7 @@ from mapchete.path import MPath from mapchete.protocols import GridProtocol from mapchete.tile import BufferedTile -from mapchete.types import Bounds, CRSLike, MPathLike, NodataVal +from mapchete.types import Bounds, BoundsLike, CRSLike, MPathLike, NodataVal logger = logging.getLogger(__name__) @@ -27,7 +30,7 @@ class ReferencedRaster: data: Union[np.ndarray, ma.masked_array] transform: Affine - bounds: Union[List[float], Tuple[float], Bounds] + bounds: Bounds crs: CRSLike nodata: Optional[NodataVal] = None driver: Optional[str] = None @@ -36,10 +39,10 @@ def __init__( self, data: Union[np.ndarray, ma.masked_array], transform: Affine, - bounds: Union[List[float], Tuple[float], Bounds], crs: CRSLike, + bounds: Optional[BoundsLike] = None, nodata: Optional[NodataVal] = None, - driver: Optional[str] = None, + driver: Optional[str] = "COG", **kwargs, ): if data.ndim == 1: # pragma: no cover @@ -63,8 +66,14 @@ def __init__( self.nodata = nodata self.crs = crs self.transform = self.affine = transform - self.bounds = bounds - self.__geo_interface__ = mapping(box(*self.bounds)) + self.bounds = Bounds.from_inp( + bounds or array_bounds(self.height, self.width, self.transform) + ) + self.__geo_interface__ = mapping(shape(self.bounds)) + + @property + def __array_interface__(self) -> dict: + return self.data.__array_interface__ @property def meta(self) -> dict: @@ -158,7 +167,7 @@ def to_file( return path @staticmethod - def from_rasterio(src, masked: bool = True) -> "ReferencedRaster": + def from_rasterio(src, masked: bool = True) -> ReferencedRaster: return ReferencedRaster( data=src.read(masked=masked).copy(), transform=src.transform, @@ -167,10 +176,28 @@ def from_rasterio(src, masked: bool = True) -> "ReferencedRaster": ) @staticmethod - def from_file(path, masked: bool = True) -> "ReferencedRaster": + def from_file(path, masked: bool = True) -> ReferencedRaster: with rasterio_open(path) as src: return ReferencedRaster.from_rasterio(src, masked=masked) + @staticmethod + def from_array_like( + array_like: Union[np.ndarray, ma.MaskedArray, GridProtocol], + transform: Optional[Affine] = None, + crs: Optional[CRSLike] = None, + ) -> ReferencedRaster: + if isinstance(array_like, ReferencedRaster): + return array_like + elif isinstance(array_like, np.ndarray): + if transform is None or crs is None: + raise ValueError("array transform and CRS must be provided") + return ReferencedRaster(data=array_like, transform=transform, crs=crs) + # elif isinstance(array_like, GridProtocol): + # return ReferencedRaster( + # data=array_like, transform=array_like.transform, crs=array_like.crs + # ) + raise TypeError(f"cannot convert {array_like} to ReferencedRaster") + def read_raster( inp: MPathLike, grid: Optional[GridProtocol] = None, **kwargs diff --git a/test/test_io_raster.py b/test/test_io_raster.py index 72efb927..3537a108 100644 --- a/test/test_io_raster.py +++ b/test/test_io_raster.py @@ -811,6 +811,11 @@ def test_referencedraster_meta(s2_band): assert k in meta +def test_referencedraster_array_interface(s2_band): + rr = ReferencedRaster.from_file(s2_band) + assert isinstance(ma.array(rr), ma.MaskedArray) + + @pytest.mark.parametrize("indexes", [None, 1, [1]]) def test_referencedraster_read_band(s2_band, indexes): rr = ReferencedRaster.from_file(s2_band) From 4ae5f30c8f8dac94285f74a50b198354e48b963b Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 2 Apr 2024 16:18:38 +0200 Subject: [PATCH 16/28] add CRS check --- mapchete/io/raster/array.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mapchete/io/raster/array.py b/mapchete/io/raster/array.py index 752a90ee..75df3797 100644 --- a/mapchete/io/raster/array.py +++ b/mapchete/io/raster/array.py @@ -56,6 +56,11 @@ def extract_from_array( array, transform=array_transform, crs=out_grid.crs ) + if raster.crs != out_grid.crs: # pragma: no cover + raise ValueError( + f"source CRS {raster.crs} and destination CRS {out_grid.crs} do not match!" + ) + # get range within array minrow, maxrow, mincol, maxcol = bounds_to_ranges( bounds=out_grid.bounds, transform=raster.transform From 7883cdd23aacb3315453fcb00f667e88ef7248d4 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 25 Apr 2024 14:29:48 +0200 Subject: [PATCH 17/28] don't use __array_interface__ as it cannot handle array masks --- mapchete/formats/base.py | 1 + mapchete/io/raster/array.py | 2 +- mapchete/io/raster/referenced_raster.py | 7 ++----- mapchete/processing/base.py | 1 - test/test_io.py | 8 ++------ 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/mapchete/formats/base.py b/mapchete/formats/base.py index 76d965c5..11dc2476 100644 --- a/mapchete/formats/base.py +++ b/mapchete/formats/base.py @@ -4,6 +4,7 @@ When writing a new driver, please inherit from these classes and implement the respective interfaces. """ + import logging import types import warnings diff --git a/mapchete/io/raster/array.py b/mapchete/io/raster/array.py index 75df3797..d94b76ff 100644 --- a/mapchete/io/raster/array.py +++ b/mapchete/io/raster/array.py @@ -72,7 +72,7 @@ def extract_from_array( and maxrow <= array.shape[-2] and maxcol <= array.shape[-1] ): - return ma.array(raster)[..., minrow:maxrow, mincol:maxcol] + return raster.array[..., minrow:maxrow, mincol:maxcol] # raise error if output is not fully within input else: raise ValueError("extraction fails if output shape is not within input") diff --git a/mapchete/io/raster/referenced_raster.py b/mapchete/io/raster/referenced_raster.py index deca1b3d..7a37f824 100644 --- a/mapchete/io/raster/referenced_raster.py +++ b/mapchete/io/raster/referenced_raster.py @@ -29,6 +29,7 @@ class ReferencedRaster: """ data: Union[np.ndarray, ma.masked_array] + array: Union[np.ndarray, ma.masked_array] transform: Affine bounds: Bounds crs: CRSLike @@ -60,7 +61,7 @@ def __init__( transform = transform or kwargs.get("affine") if transform is None: # pragma: no cover raise ValueError("georeference given") - self.data = data + self.data = self.array = data self.driver = driver self.dtype = self.data.dtype self.nodata = nodata @@ -71,10 +72,6 @@ def __init__( ) self.__geo_interface__ = mapping(shape(self.bounds)) - @property - def __array_interface__(self) -> dict: - return self.data.__array_interface__ - @property def meta(self) -> dict: return { diff --git a/mapchete/processing/base.py b/mapchete/processing/base.py index d358865c..6cb16588 100644 --- a/mapchete/processing/base.py +++ b/mapchete/processing/base.py @@ -519,7 +519,6 @@ def get_raw_output(self, tile: TileLike, _baselevel_readonly: bool = False) -> A ) else: output_tiles = self.config.output_pyramid.intersecting(tile) - if self.config.mode == ProcessingMode.READONLY or _baselevel_readonly: if self.config.output.tiles_exist(process_tile): return self._read_existing_output(tile, output_tiles) diff --git a/test/test_io.py b/test/test_io.py index 1be47325..fd5abbd7 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -223,9 +223,7 @@ def test_tiles_exist_local(example_mapchete): # process tiles existing = set() not_existing = set() - for tile, exists in tiles_exist( - config=mp.config, process_tiles=process_tiles, multi=4 - ): + for tile, exists in tiles_exist(config=mp.config, process_tiles=process_tiles): if exists: existing.add(tile) else: @@ -237,9 +235,7 @@ def test_tiles_exist_local(example_mapchete): # output tiles existing = set() not_existing = set() - for tile, exists in tiles_exist( - config=mp.config, output_tiles=output_tiles, multi=1 - ): + for tile, exists in tiles_exist(config=mp.config, output_tiles=output_tiles): if exists: existing.add(tile) else: From be3513d426ac97f797d61d2e67fd1b8c03be531f Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 25 Apr 2024 14:35:39 +0200 Subject: [PATCH 18/28] make typing compatible for python 3.8 --- mapchete/commands/_cp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapchete/commands/_cp.py b/mapchete/commands/_cp.py index 425559ad..3fce0117 100644 --- a/mapchete/commands/_cp.py +++ b/mapchete/commands/_cp.py @@ -2,7 +2,7 @@ import logging from multiprocessing import cpu_count -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Type, Union from distributed import Client from shapely.geometry import Point @@ -36,7 +36,7 @@ def cp( dask_client: Optional[Client] = None, src_fs_opts: Union[dict, None] = None, dst_fs_opts: Union[dict, None] = None, - executor_getter: type[Executor] = Executor, + executor_getter: Type[Executor] = Executor, observers: Optional[List[ObserverProtocol]] = None, ): """ From 46982b0a62d3560bc29197ce78668b545ff6b773 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 25 Apr 2024 17:03:01 +0200 Subject: [PATCH 19/28] increase test coverage; add more tests for contour process --- mapchete/formats/models.py | 24 ---------------- mapchete/formats/protocols.py | 12 ++++---- mapchete/processes/contours.py | 24 +++++++++------- test/test_processes.py | 52 +++++++++++++++++++++++++--------- 4 files changed, 57 insertions(+), 55 deletions(-) delete mode 100644 mapchete/formats/models.py diff --git a/mapchete/formats/models.py b/mapchete/formats/models.py deleted file mode 100644 index 2dfdab6c..00000000 --- a/mapchete/formats/models.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import List - -from pydantic import BaseModel, Field, NonNegativeInt - -from mapchete.enums import DataType -from mapchete.tile import BufferedTilePyramid -from mapchete.types import NodataVal - - -class DriverMetadata(BaseModel): - driver_name: str - data_type: DataType - mode: str - file_extensions: List[str] = Field(default_factory=list) - - -class BaseInputParams(BaseModel, arbitrary_types_allowed=True): - pyramid: BufferedTilePyramid - pixelbuffer: NonNegativeInt = 0 - - -class BaseOutputParams(BaseModel): - stac: dict = Field(default_factory=dict) - nodata: NodataVal diff --git a/mapchete/formats/protocols.py b/mapchete/formats/protocols.py index 1bd8f459..2abe420f 100644 --- a/mapchete/formats/protocols.py +++ b/mapchete/formats/protocols.py @@ -9,14 +9,12 @@ from shapely.geometry.base import BaseGeometry from mapchete.path import MPath - -# from mapchete.processing.mp import MapcheteProcess from mapchete.protocols import GridProtocol from mapchete.tile import BufferedTile, BufferedTilePyramid from mapchete.types import BandIndexes, BoundsLike, CRSLike, ResamplingLike, TileLike -class InputTileProtocol(GridProtocol): +class InputTileProtocol(GridProtocol): # pragma: no cover preprocessing_tasks_results: dict input_key: str tile: BufferedTile @@ -40,7 +38,7 @@ def __exit__(self, *args): """Clean up.""" -class RasterInput(InputTileProtocol): +class RasterInput(InputTileProtocol): # pragma: no cover def read( self, indexes: Optional[BandIndexes] = None, @@ -51,7 +49,7 @@ def read( ... -class VectorInput(InputTileProtocol): +class VectorInput(InputTileProtocol): # pragma: no cover def read( self, validity_check: bool = True, clip_to_crs_bounds: bool = False, **kwargs ) -> List[dict]: @@ -63,7 +61,7 @@ def read( VectorInputGroup = List[Tuple[str, VectorInput]] -class InputDataProtocol(Protocol): +class InputDataProtocol(Protocol): # pragma: no cover input_key: str pyramid: BufferedTilePyramid pixelbuffer: int = 0 @@ -108,7 +106,7 @@ def preprocessing_task_finished(self, task_key: str) -> bool: ... -class OutputDataReaderProtocol(Protocol): +class OutputDataReaderProtocol(Protocol): # pragma: no cover """Minimum interface for any output reader class.""" pixelbuffer: NonNegativeInt diff --git a/mapchete/processes/contours.py b/mapchete/processes/contours.py index 19e6debf..435b9cbf 100644 --- a/mapchete/processes/contours.py +++ b/mapchete/processes/contours.py @@ -75,13 +75,15 @@ def execute( """ # read clip geometry if clip: - clip_geom = [] + clip_geom = clip.read() if not clip_geom: logger.debug("no clip data over tile") - raise Empty + raise Empty("no clip data over tile") + else: + clip_geom = [] if dem.is_empty(): - raise Empty + raise Empty("no DEM data over tile") logger.debug("reading input raster") dem_data = dem.read( @@ -92,9 +94,9 @@ def execute( matching_precision=td_matching_precision, fallback_to_higher_zoom=td_fallback_to_higher_zoom, ) - if dem_data.mask.all(): - logger.debug("raster empty") - raise Empty + if dem_data.mask.all(): # pragma: no cover + logger.debug("DEM data empty over tile") + raise Empty("DEM data empty over tile") logger.debug("calculate hillshade") contour_lines = contours( @@ -140,9 +142,9 @@ def contours( """ import matplotlib.pyplot as plt - levels = _get_contour_values(array.min(), array.max(), interval=interval, base=base) - if not levels: - return [] + levels = ( + get_contour_values(array.min(), array.max(), interval=interval, base=base) or [] + ) contours = plt.contour(array, levels) index = 0 out_contours = [] @@ -168,9 +170,9 @@ def contours( return out_contours -def _get_contour_values( +def get_contour_values( min_val: float, max_val: float, base: float = 0, interval: float = 100 -): +) -> List[float]: """Return a list of values between min and max within an interval.""" i = base out = [] diff --git a/test/test_processes.py b/test/test_processes.py index b25dead4..4a56dbf4 100644 --- a/test/test_processes.py +++ b/test/test_processes.py @@ -4,7 +4,7 @@ import numpy.ma as ma import pytest -from mapchete import MapcheteNodataTile +from mapchete import Empty, MapcheteNodataTile from mapchete.processes import contours, convert, hillshade from mapchete.processes.examples import example_process from mapchete.testing import get_process_mp @@ -87,7 +87,8 @@ def test_convert_vector(landpoly): ) -def test_contours(cleantopo_tl_tif, landpoly): +def test_contours_dem(cleantopo_tl_tif): + # not empty dem dem = get_process_mp(input=dict(dem=cleantopo_tl_tif), zoom=5, metatiling=8).open( "dem" ) @@ -95,25 +96,50 @@ def test_contours(cleantopo_tl_tif, landpoly): assert isinstance(output, list) assert output - # execute on empty tile + +def test_contours_empty_dem(cleantopo_tl_tif): + # empty dem dem = get_process_mp( - input=dict(dem=cleantopo_tl_tif), tile=(5, 3, 7), metatiling=8 + input=dict(dem=cleantopo_tl_tif), tile=(5, 3, 6), metatiling=8 ).open("dem") - with pytest.raises(MapcheteNodataTile): + with pytest.raises(Empty): contours.execute(dem) - dem = get_process_mp( + +def test_contours_clipped(cleantopo_tl_tif, landpoly): + # clipped contours + mp = get_process_mp( input=dict(dem=cleantopo_tl_tif, clip=landpoly), zoom=5, metatiling=8 - ).open("dem") - output = contours.execute(dem) + ) + dem = mp.open("dem") + clip = mp.open("clip") + output = contours.execute(dem, clip) assert isinstance(output, list) assert output - dem = get_process_mp( - input=dict(dem=cleantopo_tl_tif, clip=landpoly), tile=(5, 3, 7), metatiling=8 - ).open("dem") - with pytest.raises(MapcheteNodataTile): - contours.execute(dem) + +def test_contours_empty_clip(cleantopo_tl_tif, landpoly): + # empty clip geometry + mp = get_process_mp( + input=dict(dem=cleantopo_tl_tif, clip=landpoly), tile=(5, 3, 6), metatiling=8 + ) + dem = mp.open("dem") + clip = mp.open("clip") + with pytest.raises(Empty): + contours.execute(dem, clip) + + +@pytest.mark.parametrize( + "min_val, max_val, base, interval, control", + [ + (0, 1000, 0, 500, [0, 500, 1000]), + (10, 1000, 0, 500, [500, 1000]), + (0, 1000, 10, 500, [10, 510]), + (-100, 500, 0, 100, [-100, 0, 100, 200, 300, 400, 500]), + ], +) +def test_get_contour_values(min_val, max_val, base, interval, control): + assert contours.get_contour_values(min_val, max_val, base, interval) == control def test_hillshade(cleantopo_tl_tif, landpoly): From 8a8204c1249ead8fa035605368a1242b3edb3d0f Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 29 Apr 2024 11:52:41 +0200 Subject: [PATCH 20/28] streamline contour generation --- mapchete/processes/contours.py | 46 ++++++++++++++-------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/mapchete/processes/contours.py b/mapchete/processes/contours.py index 435b9cbf..1727a7d0 100644 --- a/mapchete/processes/contours.py +++ b/mapchete/processes/contours.py @@ -1,7 +1,7 @@ """Contour line extraction using matplotlib.""" import logging -from typing import List, Optional +from typing import Generator, List, Optional import numpy as np from shapely.geometry import LineString, mapping, shape @@ -99,12 +99,14 @@ def execute( raise Empty("DEM data empty over tile") logger.debug("calculate hillshade") - contour_lines = contours( - dem_data, - dem.tile, - interval=interval, - field=field, - base=base, + contour_lines = list( + generate_contours( + dem_data, + dem.tile, + interval=interval, + field=field, + base=base, + ) ) if clip: @@ -130,29 +132,22 @@ def execute( return contour_lines -def contours( +def generate_contours( array: np.ndarray, tile: BufferedTile, interval: float = 100, field: str = "elev", base: float = 0, -) -> List[dict]: - """ - Extract contour lines from an array. - """ +) -> Generator[dict, None, None]: import matplotlib.pyplot as plt - levels = ( + elevations = ( get_contour_values(array.min(), array.max(), interval=interval, base=base) or [] ) - contours = plt.contour(array, levels) - index = 0 - out_contours = [] - for level in range(len(contours.collections)): - elevation = levels[index] - index += 1 - paths = contours.collections[level].get_paths() - for path in paths: + for elevation, contours in zip( + elevations, plt.contour(array, elevations).collections + ): + for path in contours.get_paths(): out_coords = [ ( tile.left + (y * tile.pixel_x_size), @@ -161,13 +156,10 @@ def contours( for x, y in np.asarray(path.vertices) ] if len(out_coords) >= 2: - out_contours.append( - dict( - properties={field: elevation}, - geometry=mapping(LineString(out_coords)), - ) + yield dict( + properties={field: elevation}, + geometry=mapping(LineString(out_coords)), ) - return out_contours def get_contour_values( From a70cf358897de3bfb8945720d3fea44228be5d12 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 29 Apr 2024 16:35:50 +0200 Subject: [PATCH 21/28] increase test coverage --- mapchete/__init__.py | 8 +-- mapchete/formats/base.py | 14 ++--- mapchete/formats/protocols.py | 4 +- mapchete/processing/mp.py | 6 +- test/conftest.py | 12 ++-- test/test_processes.py | 108 +++++++++++++++++++--------------- 6 files changed, 78 insertions(+), 74 deletions(-) diff --git a/mapchete/__init__.py b/mapchete/__init__.py index 60017963..67c80003 100644 --- a/mapchete/__init__.py +++ b/mapchete/__init__.py @@ -1,6 +1,6 @@ import logging import os -from typing import Optional, Union +from typing import Union from fsspec import AbstractFileSystem @@ -43,8 +43,6 @@ def open( some_input: Union[MPathLike, dict, MapcheteConfig], with_cache: bool = False, - fs: Optional[AbstractFileSystem] = None, - fs_kwargs: Optional[dict] = None, **kwargs, ) -> Mapchete: """ @@ -79,8 +77,6 @@ def open( Mapchete a Mapchete process object """ - if fs or fs_kwargs: - raise DeprecationWarning() # convert to MPath object if possible if isinstance(some_input, str): some_input = MPath.from_inp(some_input) @@ -101,8 +97,6 @@ def open( if k not in ["delimiters", "mode"] }, path=some_input, - fs=fs, - fs_kwargs=fs_kwargs, **kwargs, ), config_dir=os.getcwd(), diff --git a/mapchete/formats/base.py b/mapchete/formats/base.py index 11dc2476..81cc5669 100644 --- a/mapchete/formats/base.py +++ b/mapchete/formats/base.py @@ -299,7 +299,7 @@ def stac_asset_type(self): # pragma: no cover class OutputDataReader(OutputDataBase): - def read(self, output_tile): + def read(self, output_tile): # pragma: no cover """ Read existing process output. @@ -314,7 +314,7 @@ def read(self, output_tile): """ raise NotImplementedError() - def empty(self, process_tile): + def empty(self, process_tile): # pragma: no cover """ Return empty data. @@ -331,7 +331,7 @@ def empty(self, process_tile): """ raise NotImplementedError() - def open(self, tile, process): + def open(self, tile, process): # pragma: no cover """ Open process output as input for other process. @@ -342,7 +342,7 @@ def open(self, tile, process): """ raise NotImplementedError - def for_web(self, data): + def for_web(self, data): # pragma: no cover """ Convert data to web output (raster only). @@ -379,7 +379,7 @@ class OutputDataWriter(OutputDataReader): METADATA = {"driver_name": None, "data_type": None, "mode": "w"} use_stac = False - def write(self, process_tile, data): + def write(self, process_tile, data): # pragma: no cover """ Write data from one or more process tiles. @@ -559,7 +559,7 @@ def __init__(self, output_params, readonly=False): """Initialize.""" super().__init__(output_params, readonly=readonly) - def tiles_exist(self, process_tile=None, output_tile=None): + def tiles_exist(self, process_tile=None, output_tile=None): # pragma: no cover """ Check whether output tiles of a tile (either process or output) exists. @@ -575,7 +575,7 @@ def tiles_exist(self, process_tile=None, output_tile=None): exists : bool """ # TODO - raise NotImplementedError # pragma: no cover + raise NotImplementedError class SingleFileOutputWriter(OutputDataWriter, SingleFileOutputReader): diff --git a/mapchete/formats/protocols.py b/mapchete/formats/protocols.py index 2abe420f..48a2f76b 100644 --- a/mapchete/formats/protocols.py +++ b/mapchete/formats/protocols.py @@ -138,7 +138,7 @@ def for_web(self, data) -> np.ndarray: ... -class FileSystemOutputDataReaderProtocol: +class FileSystemOutputDataReaderProtocol: # pragma: no cover """Minimum interface for any filesystem storage based output reader class.""" def get_path(self, tile: BufferedTile) -> MPath: @@ -164,7 +164,7 @@ def stac_asset_type(self) -> str: ... -class OutputDataWriterProtocol(OutputDataReaderProtocol): +class OutputDataWriterProtocol(OutputDataReaderProtocol): # pragma: no cover def write(self, process_tile: BufferedTile, data: Any) -> None: ... diff --git a/mapchete/processing/mp.py b/mapchete/processing/mp.py index 8b38d401..2507648e 100644 --- a/mapchete/processing/mp.py +++ b/mapchete/processing/mp.py @@ -96,7 +96,7 @@ def open(self, input_id, **kwargs) -> InputTileProtocol: raise ValueError("%s not found in config as input" % input_id) return self.input[input_id] - def hillshade(self, *_, **__) -> ma.MaskedArray: + def hillshade(self, *_, **__) -> ma.MaskedArray: # pragma: no cover """ Calculate hillshading from elevation data. @@ -122,7 +122,7 @@ def hillshade(self, *_, **__) -> ma.MaskedArray: "Run hillshade via mp is deprecated. Call the hillshade method from mapchete.processes.hillshade." ) - def contours(self, *_, **__) -> ma.MaskedArray: + def contours(self, *_, **__) -> ma.MaskedArray: # pragma: no cover """ Extract contour lines from elevation data. @@ -146,7 +146,7 @@ def contours(self, *_, **__) -> ma.MaskedArray: "MapcheteProcess.contours() is deprecated. Call the contours method from mapchete.processes.contours." ) - def clip(self, *_, **__) -> ma.MaskedArray: + def clip(self, *_, **__) -> ma.MaskedArray: # pragma: no cover """ Clip array by geometry. diff --git a/test/conftest.py b/test/conftest.py index 1b71186a..811889f9 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -179,6 +179,12 @@ def wkt_geom_tl(): # example files +@pytest.fixture +def local_raster(testdata_dir): + """Fixture for HTTP raster.""" + return testdata_dir / "cleantopo/1/0/0.tif" + + @pytest.fixture def http_raster(http_testdata_dir): """Fixture for HTTP raster.""" @@ -511,12 +517,6 @@ def cleantopo_br_tif_s3(minio_testdata_bucket): return prepare_s3_testfile(minio_testdata_bucket, "cleantopo_br.tif") -@pytest.fixture -def cleantopo_tl_tif(): - """Fixture for cleantopo_tl.tif""" - return TESTDATA_DIR / "cleantopo_tl.tif" - - @pytest.fixture def dummy1_3857_tif(): """Fixture for dummy1_3857.tif""" diff --git a/test/test_processes.py b/test/test_processes.py index 4a56dbf4..57e597f7 100644 --- a/test/test_processes.py +++ b/test/test_processes.py @@ -5,7 +5,7 @@ import pytest from mapchete import Empty, MapcheteNodataTile -from mapchete.processes import contours, convert, hillshade +from mapchete.processes import clip, contours, convert, hillshade from mapchete.processes.examples import example_process from mapchete.testing import get_process_mp @@ -20,26 +20,31 @@ def test_example_process(cleantopo_tl): assert output == "empty" -def test_convert_raster(cleantopo_tl_tif, landpoly): +def test_convert_raster(local_raster, landpoly): # tile with data + tile = (8, 28, 89) assert isinstance( convert.execute( - inp=get_process_mp(input=dict(inp=cleantopo_tl_tif), zoom=5).open("inp") + inp=get_process_mp(input=dict(inp=local_raster), tile=tile).open("inp") ), np.ndarray, ) + +def test_convert_raster_empty(local_raster): + tile = (8, 28, 189) # execute on empty tile with pytest.raises(MapcheteNodataTile): convert.execute( - inp=get_process_mp(input=dict(inp=cleantopo_tl_tif), tile=(5, 3, 7)).open( - "inp" - ) + inp=get_process_mp(input=dict(inp=local_raster), tile=tile).open("inp") ) - inp = get_process_mp( - input=dict(inp=cleantopo_tl_tif, clip=landpoly), zoom=5, metatiling=8 - ).open("inp") + +def test_convert_raster_clip(local_raster, landpoly): + tile = (8, 28, 89) + inp = get_process_mp(input=dict(inp=local_raster, clip=landpoly), tile=tile).open( + "inp" + ) # tile with data default = convert.execute(inp) @@ -57,14 +62,13 @@ def test_convert_raster(cleantopo_tl_tif, landpoly): clip_dtype = convert.execute(inp, scale_ratio=2, clip_to_output_dtype="uint8") assert isinstance(clip_dtype, np.ndarray) + +def test_convert_raster_clip_empty(local_raster, landpoly): # NOTE: this was in the test suite but there is no reason why over this process tile # the execute function should return an empty tile # execute on empty tile - mp = get_process_mp( - input=dict(inp=cleantopo_tl_tif, clip=landpoly), - tile=(5, 0, 0), - metatiling=1, - ) + tile = (8, 28, 189) + mp = get_process_mp(input=dict(inp=local_raster, clip=landpoly), tile=tile) with pytest.raises(MapcheteNodataTile): convert.execute(mp.open("inp"), mp.open("clip")) @@ -87,46 +91,39 @@ def test_convert_vector(landpoly): ) -def test_contours_dem(cleantopo_tl_tif): +def test_contours_dem(local_raster): # not empty dem - dem = get_process_mp(input=dict(dem=cleantopo_tl_tif), zoom=5, metatiling=8).open( - "dem" + tile = (8, 28, 89) + output = contours.execute( + get_process_mp(input=dict(dem=local_raster), tile=tile).open("dem") ) - output = contours.execute(dem) assert isinstance(output, list) assert output -def test_contours_empty_dem(cleantopo_tl_tif): +def test_contours_empty_dem(local_raster): # empty dem - dem = get_process_mp( - input=dict(dem=cleantopo_tl_tif), tile=(5, 3, 6), metatiling=8 - ).open("dem") + tile = (8, 28, 189) + dem = get_process_mp(input=dict(dem=local_raster), tile=tile).open("dem") with pytest.raises(Empty): contours.execute(dem) -def test_contours_clipped(cleantopo_tl_tif, landpoly): +def test_contours_clipped(local_raster, landpoly): # clipped contours - mp = get_process_mp( - input=dict(dem=cleantopo_tl_tif, clip=landpoly), zoom=5, metatiling=8 - ) - dem = mp.open("dem") - clip = mp.open("clip") - output = contours.execute(dem, clip) + tile = (8, 28, 89) + mp = get_process_mp(input=dict(dem=local_raster, clip=landpoly), tile=tile) + output = contours.execute(mp.open("dem"), mp.open("clip")) assert isinstance(output, list) assert output -def test_contours_empty_clip(cleantopo_tl_tif, landpoly): +def test_contours_empty_clip(local_raster, landpoly): # empty clip geometry - mp = get_process_mp( - input=dict(dem=cleantopo_tl_tif, clip=landpoly), tile=(5, 3, 6), metatiling=8 - ) - dem = mp.open("dem") - clip = mp.open("clip") + tile = (8, 68, 35) + mp = get_process_mp(input=dict(dem=local_raster, clip=landpoly), tile=tile) with pytest.raises(Empty): - contours.execute(dem, clip) + contours.execute(mp.open("dem"), mp.open("clip")) @pytest.mark.parametrize( @@ -142,27 +139,40 @@ def test_get_contour_values(min_val, max_val, base, interval, control): assert contours.get_contour_values(min_val, max_val, base, interval) == control -def test_hillshade(cleantopo_tl_tif, landpoly): - dem = get_process_mp(input=dict(dem=cleantopo_tl_tif), zoom=5, metatiling=8).open( - "dem" - ) +def test_hillshade(local_raster): + tile = (8, 68, 35) + dem = get_process_mp(input=dict(dem=local_raster), tile=tile).open("dem") assert isinstance(hillshade.execute(dem), np.ndarray) + +def test_hillshade_empty(local_raster, landpoly): # execute on empty tile - dem = get_process_mp( - input=dict(dem=cleantopo_tl_tif), tile=(5, 3, 7), metatiling=8 - ).open("dem") + tile = (8, 28, 189) + dem = get_process_mp(input=dict(dem=local_raster), tile=tile).open("dem") with pytest.raises(MapcheteNodataTile): hillshade.execute(dem) - dem = get_process_mp( - input=dict(dem=cleantopo_tl_tif, clip=landpoly), zoom=5, metatiling=8 - ).open("dem") + +def test_hillshade_clip(local_raster, landpoly): + tile = (8, 28, 89) + dem = get_process_mp(input=dict(dem=local_raster, clip=landpoly), tile=tile).open( + "dem" + ) assert isinstance(hillshade.execute(dem), np.ndarray) + +def test_hillshade_clip_empty(local_raster, landpoly): + tile = (8, 28, 189) # execute on empty tile - mp = get_process_mp( - input=dict(dem=cleantopo_tl_tif, clip=landpoly), tile=(5, 3, 7), metatiling=8 - ) + mp = get_process_mp(input=dict(dem=local_raster, clip=landpoly), tile=tile) with pytest.raises(MapcheteNodataTile): hillshade.execute(mp.open("dem"), mp.open("clip")) + + +def test_clip(local_raster, landpoly): + tile = (8, 28, 89) + mp = get_process_mp(input=dict(inp=local_raster, clip=landpoly), tile=tile) + output = clip.execute(mp.open("inp"), mp.open("clip")) + assert isinstance(output, np.ndarray) + assert not output.mask.all() + assert output.mask.any() From b050e469117c48454c68ef98e68bb754ed2712d5 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 13 May 2024 14:27:29 +0200 Subject: [PATCH 22/28] dev commit --- mapchete/io/raster/array.py | 8 ++++---- mapchete/io/raster/read.py | 17 ++++++++-------- mapchete/io/raster/referenced_raster.py | 26 ++++++++++++++++--------- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/mapchete/io/raster/array.py b/mapchete/io/raster/array.py index d94b76ff..86b0c4da 100644 --- a/mapchete/io/raster/array.py +++ b/mapchete/io/raster/array.py @@ -15,7 +15,7 @@ from mapchete.io.vector import to_shape from mapchete.protocols import GridProtocol -from mapchete.types import BoundsLike, CRSLike, NodataVal +from mapchete.types import BoundsLike, CRSLike, Grid, NodataVal logger = logging.getLogger(__name__) @@ -81,9 +81,9 @@ def extract_from_array( def resample_from_array( array: Union[np.ndarray, ma.MaskedArray, GridProtocol], array_transform: Optional[Affine] = None, - out_grid: Optional[GridProtocol] = None, + out_grid: Optional[Union[Grid, GridProtocol]] = None, in_affine: Optional[Affine] = None, - out_tile: Optional[GridProtocol] = None, + out_tile: Optional[Union[Grid, GridProtocol]] = None, in_crs: Optional[CRSLike] = None, resampling: Union[Resampling, str] = Resampling.nearest, nodataval: Optional[NodataVal] = None, @@ -125,7 +125,7 @@ def resample_from_array( elif isinstance(array, np.ndarray): array = ma.MaskedArray(array, mask=array == nodata) elif hasattr(array, "affine") and hasattr(array, "data"): # pragma: no cover - array_transform = array.affine + array_transform = getattr(array, "affine") in_crs = array.crs array = array.data elif hasattr(array, "transform") and hasattr(array, "data"): # pragma: no cover diff --git a/mapchete/io/raster/read.py b/mapchete/io/raster/read.py index c54480a7..5fed13d5 100644 --- a/mapchete/io/raster/read.py +++ b/mapchete/io/raster/read.py @@ -1,10 +1,11 @@ """Wrapper functions around rasterio and useful raster functions.""" + from __future__ import annotations import logging import warnings from contextlib import contextmanager -from typing import Iterable, List, Optional, Tuple, Union +from typing import Generator, Iterable, List, Optional, Tuple, Union import numpy as np import numpy.ma as ma @@ -37,7 +38,7 @@ @contextmanager def rasterio_read( path: MPathLike, mode: str = "r", **kwargs -) -> Union[DatasetReader, DatasetWriter]: +) -> Generator[Union[DatasetReader, DatasetWriter], None, None]: """ Wrapper around rasterio.open but rasterio.Env is set according to path properties. """ @@ -73,21 +74,21 @@ def read_raster_window( resampling = ( resampling if isinstance(resampling, Resampling) else Resampling[resampling] ) - input_files = [ + input_paths: List[MPath] = [ MPath.from_inp(input_file) for input_file in ( input_files if isinstance(input_files, list) else [input_files] ) ] - if len(input_files) == 0: # pragma: no cover + if len(input_paths) == 0: # pragma: no cover raise ValueError("no input given") - with input_files[0].rio_env(gdal_opts) as env: + with input_paths[0].rio_env(gdal_opts) as env: logger.debug( - "reading %s file(s) with GDAL options %s", len(input_files), env.options + "reading %s file(s) with GDAL options %s", len(input_paths), env.options ) return _read_raster_window( - input_files, + input_paths, grid, indexes=indexes, resampling=resampling, @@ -99,7 +100,7 @@ def read_raster_window( def _read_raster_window( - input_files: Union[MPathLike, List[MPathLike]], + input_files: List[MPath], grid: GridProtocol, indexes: Optional[Union[int, List[int]]] = None, resampling: Resampling = Resampling.nearest, diff --git a/mapchete/io/raster/referenced_raster.py b/mapchete/io/raster/referenced_raster.py index 7a37f824..38a44f83 100644 --- a/mapchete/io/raster/referenced_raster.py +++ b/mapchete/io/raster/referenced_raster.py @@ -16,7 +16,7 @@ from mapchete.path import MPath from mapchete.protocols import GridProtocol from mapchete.tile import BufferedTile -from mapchete.types import Bounds, BoundsLike, CRSLike, MPathLike, NodataVal +from mapchete.types import Bounds, BoundsLike, CRSLike, Grid, MPathLike, NodataVal logger = logging.getLogger(__name__) @@ -87,9 +87,9 @@ def meta(self) -> dict: def read( self, - indexes: Union[int, List[int]] = None, + indexes: Optional[Union[int, List[int]]] = None, tile: Optional[BufferedTile] = None, - grid: Optional[GridProtocol] = None, + grid: Optional[Union[Grid, GridProtocol]] = None, resampling: str = "nearest", ) -> np.ndarray: """Either read full array or resampled to grid.""" @@ -97,7 +97,9 @@ def read( warnings.warn( DeprecationWarning("'tile' is deprecated and should be 'grid'") ) - grid = grid or tile + grid = Grid.from_obj(tile) + elif grid: + grid = Grid.from_obj(grid) # select bands using band indexes if indexes is None or self.data.ndim == 2: band_selection = self.data @@ -120,12 +122,16 @@ def read( resampling=resampling, ) - def _get_band_indexes(self, indexes: Union[List[int], int] = None) -> List[int]: + def _get_band_indexes( + self, indexes: Optional[Union[List[int], int]] = None + ) -> List[int]: """Return valid band indexes.""" if isinstance(indexes, int): return [indexes] - else: + elif isinstance(indexes, list): return indexes + else: + return list(range(1, self.count + 1)) def _stack(self, *args) -> np.ndarray: """return stack of numpy or numpy.masked depending on array type""" @@ -138,9 +144,9 @@ def _stack(self, *args) -> np.ndarray: def to_file( self, path: MPath, - indexes: Union[int, List[int]] = None, + indexes: Optional[Union[int, List[int]]] = None, tile: Optional[BufferedTile] = None, - grid: Optional[GridProtocol] = None, + grid: Optional[Union[Grid, GridProtocol]] = None, resampling: str = "nearest", **kwargs, ) -> MPath: @@ -149,7 +155,9 @@ def to_file( warnings.warn( DeprecationWarning("'tile' is deprecated and should be 'grid'") ) - grid = grid or tile + grid = Grid.from_obj(tile) + elif grid: + grid = Grid.from_obj(grid) with rasterio_open(path, "w", **dict(self.meta, **kwargs)) as dst: src_array = self.read(indexes=indexes, grid=grid, resampling=resampling) if src_array.ndim == 2: From 712869ff9ed61a524d6aba5daa51f3b576f90a81 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 14 May 2024 08:59:38 +0200 Subject: [PATCH 23/28] add more tests for ReferencedRaster and disregard deprecated tile kwarg --- mapchete/io/raster/read.py | 2 +- mapchete/io/raster/referenced_raster.py | 87 ++++++++---------- test/test_io_raster.py | 117 +++++++++++++++++------- 3 files changed, 122 insertions(+), 84 deletions(-) diff --git a/mapchete/io/raster/read.py b/mapchete/io/raster/read.py index 5fed13d5..50e959f5 100644 --- a/mapchete/io/raster/read.py +++ b/mapchete/io/raster/read.py @@ -54,7 +54,7 @@ def rasterio_read( def read_raster_window( input_files: Union[MPathLike, List[MPathLike]], - grid: GridProtocol, + grid: Union[Grid, GridProtocol], indexes: Optional[Union[int, List[int]]] = None, resampling: Union[Resampling, str] = Resampling.nearest, src_nodata: NodataVal = None, diff --git a/mapchete/io/raster/referenced_raster.py b/mapchete/io/raster/referenced_raster.py index 38a44f83..a360fd9b 100644 --- a/mapchete/io/raster/referenced_raster.py +++ b/mapchete/io/raster/referenced_raster.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import warnings from typing import List, Optional, Union import numpy as np @@ -15,7 +14,6 @@ from mapchete.io.raster.read import read_raster_window from mapchete.path import MPath from mapchete.protocols import GridProtocol -from mapchete.tile import BufferedTile from mapchete.types import Bounds, BoundsLike, CRSLike, Grid, MPathLike, NodataVal logger = logging.getLogger(__name__) @@ -88,24 +86,16 @@ def meta(self) -> dict: def read( self, indexes: Optional[Union[int, List[int]]] = None, - tile: Optional[BufferedTile] = None, grid: Optional[Union[Grid, GridProtocol]] = None, resampling: str = "nearest", ) -> np.ndarray: """Either read full array or resampled to grid.""" - if tile: # pragma: no cover - warnings.warn( - DeprecationWarning("'tile' is deprecated and should be 'grid'") - ) - grid = Grid.from_obj(tile) - elif grid: - grid = Grid.from_obj(grid) # select bands using band indexes if indexes is None or self.data.ndim == 2: band_selection = self.data else: band_selection = self._stack( - [self.data[i - 1] for i in self._get_band_indexes(indexes)] + [self.data[i - 1] for i in self.get_band_indexes(indexes)] ) # return either full array or a window resampled to grid @@ -118,11 +108,11 @@ def read( in_crs=self.crs, nodataval=self.nodata, nodata=self.nodata, - out_grid=grid, + out_grid=Grid.from_obj(grid), resampling=resampling, ) - def _get_band_indexes( + def get_band_indexes( self, indexes: Optional[Union[List[int], int]] = None ) -> List[int]: """Return valid band indexes.""" @@ -145,19 +135,12 @@ def to_file( self, path: MPath, indexes: Optional[Union[int, List[int]]] = None, - tile: Optional[BufferedTile] = None, grid: Optional[Union[Grid, GridProtocol]] = None, resampling: str = "nearest", **kwargs, ) -> MPath: """Write raster to output.""" - if tile: # pragma: no cover - warnings.warn( - DeprecationWarning("'tile' is deprecated and should be 'grid'") - ) - grid = Grid.from_obj(tile) - elif grid: - grid = Grid.from_obj(grid) + grid = Grid.from_obj(grid) if grid else None with rasterio_open(path, "w", **dict(self.meta, **kwargs)) as dst: src_array = self.read(indexes=indexes, grid=grid, resampling=resampling) if src_array.ndim == 2: @@ -172,7 +155,10 @@ def to_file( return path @staticmethod - def from_rasterio(src, masked: bool = True) -> ReferencedRaster: + def from_rasterio( + src, + masked: bool = True, + ) -> ReferencedRaster: return ReferencedRaster( data=src.read(masked=masked).copy(), transform=src.transform, @@ -181,13 +167,32 @@ def from_rasterio(src, masked: bool = True) -> ReferencedRaster: ) @staticmethod - def from_file(path, masked: bool = True) -> ReferencedRaster: - with rasterio_open(path) as src: - return ReferencedRaster.from_rasterio(src, masked=masked) + def from_file( + path: MPathLike, + grid: Optional[Union[Grid, GridProtocol]] = None, + masked: bool = True, + **kwargs, + ) -> ReferencedRaster: + path = MPath.from_inp(path) + logger.debug(f"reading {str(path)} into memory") + if grid: + grid = Grid.from_obj(grid) + data = read_raster_window(path, grid=grid, **kwargs) + return ReferencedRaster( + data=data if masked else data.filled(), + transform=grid.transform, + bounds=grid.bounds, + crs=grid.crs, + ) + with rasterio_open(path, "r") as src: + return ReferencedRaster.from_rasterio( + src, + masked=masked, + ) @staticmethod def from_array_like( - array_like: Union[np.ndarray, ma.MaskedArray, GridProtocol], + array_like: Union[np.ndarray, ma.MaskedArray, GridProtocol, ReferencedRaster], transform: Optional[Affine] = None, crs: Optional[CRSLike] = None, ) -> ReferencedRaster: @@ -197,33 +202,13 @@ def from_array_like( if transform is None or crs is None: raise ValueError("array transform and CRS must be provided") return ReferencedRaster(data=array_like, transform=transform, crs=crs) - # elif isinstance(array_like, GridProtocol): - # return ReferencedRaster( - # data=array_like, transform=array_like.transform, crs=array_like.crs - # ) raise TypeError(f"cannot convert {array_like} to ReferencedRaster") def read_raster( - inp: MPathLike, grid: Optional[GridProtocol] = None, **kwargs + inp: MPathLike, + grid: Optional[Union[Grid, GridProtocol]] = None, + masked: bool = True, + **kwargs, ) -> ReferencedRaster: - if kwargs.get("tile"): # pragma: no cover - warnings.warn(DeprecationWarning("'tile' is deprecated and should be 'grid'")) - grid = grid or kwargs.get("tile") - kwargs.pop("tile") - inp = MPath.from_inp(inp) - logger.debug(f"reading {str(inp)} into memory") - if grid: - return ReferencedRaster( - data=read_raster_window(inp, grid=grid, **kwargs), - transform=grid.transform, - bounds=grid.bounds, - crs=grid.crs, - ) - with rasterio_open(inp, "r") as src: - return ReferencedRaster( - data=src.read(masked=True), - transform=src.transform, - bounds=src.bounds, - crs=src.crs, - ) + return ReferencedRaster.from_file(inp, grid=grid, masked=masked, **kwargs) diff --git a/test/test_io_raster.py b/test/test_io_raster.py index 3537a108..0d0befaa 100644 --- a/test/test_io_raster.py +++ b/test/test_io_raster.py @@ -5,6 +5,7 @@ import numpy as np import numpy.ma as ma import pytest +from pytest_lazyfixture import lazy_fixture from rasterio.enums import Compression from shapely.geometry import box from shapely.ops import unary_union @@ -30,6 +31,7 @@ ) from mapchete.io.vector import reproject_geometry from mapchete.tile import BufferedTilePyramid +from mapchete.types import Grid def test_read_raster_window_nofile(raster_4band_tile): @@ -811,11 +813,49 @@ def test_referencedraster_meta(s2_band): assert k in meta +@pytest.mark.parametrize("masked", [True, False]) +@pytest.mark.parametrize("grid", [lazy_fixture("s2_band_tile")]) +def test_referencedraster_from_file(s2_band, masked, grid): + rr = ReferencedRaster.from_file(s2_band, grid=grid, masked=masked) + if masked: + assert isinstance(rr.array, ma.MaskedArray) + else: + assert not isinstance(rr.array, ma.MaskedArray) + assert isinstance(rr.array, np.ndarray) + if grid: + assert rr.array.shape[1:] == grid.shape + + +def test_referencedraster_from_array_like(s2_band): + rr = ReferencedRaster.from_file(s2_band) + assert ReferencedRaster.from_array_like(rr) + assert ReferencedRaster.from_array_like(rr.data, transform=rr.transform, crs=rr.crs) + + +def test_referencedraster_from_array_like_errors(s2_band): + with pytest.raises(TypeError): + ReferencedRaster.from_array_like("foo") + + rr = ReferencedRaster.from_file(s2_band) + with pytest.raises(ValueError): + ReferencedRaster.from_array_like(rr.data) + with pytest.raises(ValueError): + ReferencedRaster.from_array_like(rr.data, transform=rr.transform) + with pytest.raises(ValueError): + ReferencedRaster.from_array_like(rr.data, crs=rr.crs) + + def test_referencedraster_array_interface(s2_band): rr = ReferencedRaster.from_file(s2_band) assert isinstance(ma.array(rr), ma.MaskedArray) +@pytest.mark.parametrize("indexes", [None, 1, [1]]) +def test_referencedraster_get_band_indexes(s2_band, indexes): + rr = ReferencedRaster.from_file(s2_band) + assert rr.get_band_indexes(indexes) == [1] + + @pytest.mark.parametrize("indexes", [None, 1, [1]]) def test_referencedraster_read_band(s2_band, indexes): rr = ReferencedRaster.from_file(s2_band) @@ -825,7 +865,7 @@ def test_referencedraster_read_band(s2_band, indexes): @pytest.mark.parametrize("indexes", [None, 1, [1]]) def test_referencedraster_read_tile_band(s2_band, indexes, s2_band_tile): rr = ReferencedRaster.from_file(s2_band) - assert rr.read(indexes, tile=s2_band_tile).any() + assert rr.read(indexes, grid=s2_band_tile).any() @pytest.mark.parametrize("dims", [2, 3]) @@ -839,7 +879,7 @@ def test_referencedraster_to_file(s2_band, mp_tmpdir, dims): assert src.read(masked=True).any() -@pytest.mark.parametrize("path", [pytest.lazy_fixture("mp_tmpdir")]) +@pytest.mark.parametrize("path", [lazy_fixture("mp_tmpdir")]) @pytest.mark.parametrize("dtype", [np.uint8, np.float32]) @pytest.mark.parametrize("in_memory", [True, False]) def test_rasterio_write(path, dtype, in_memory): @@ -864,7 +904,7 @@ def test_rasterio_write(path, dtype, in_memory): @pytest.mark.integration -@pytest.mark.parametrize("path", [pytest.lazy_fixture("mp_s3_tmpdir")]) +@pytest.mark.parametrize("path", [lazy_fixture("mp_s3_tmpdir")]) @pytest.mark.parametrize("dtype", [np.uint8, np.float32]) @pytest.mark.parametrize("in_memory", [True, False]) def test_rasterio_write_remote(path, dtype, in_memory): @@ -903,7 +943,7 @@ def test_output_s3_single_gtiff_error(output_s3_single_gtiff_error): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band"), + lazy_fixture("raster_4band"), ], ) def test_read_raster_no_crs(path): @@ -916,10 +956,10 @@ def test_read_raster_no_crs(path): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_s3"), - pytest.lazy_fixture("raster_4band_aws_s3"), - pytest.lazy_fixture("raster_4band_http"), - pytest.lazy_fixture("raster_4band_secure_http"), + lazy_fixture("raster_4band_s3"), + lazy_fixture("raster_4band_aws_s3"), + lazy_fixture("raster_4band_http"), + lazy_fixture("raster_4band_secure_http"), ], ) def test_read_raster_no_crs_remote(path): @@ -930,7 +970,7 @@ def test_read_raster_no_crs_remote(path): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_aws_s3"), + lazy_fixture("raster_4band_aws_s3"), ], ) def test_read_raster_no_crs_aws_s3(path): @@ -940,7 +980,7 @@ def test_read_raster_no_crs_aws_s3(path): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band"), + lazy_fixture("raster_4band"), ], ) @pytest.mark.parametrize("grid", ["geodetic", "mercator"]) @@ -987,9 +1027,9 @@ def test_read_raster_window(path, grid, pixelbuffer, zoom): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_s3"), - pytest.lazy_fixture("raster_4band_http"), - pytest.lazy_fixture("raster_4band_secure_http"), + lazy_fixture("raster_4band_s3"), + lazy_fixture("raster_4band_http"), + lazy_fixture("raster_4band_secure_http"), ], ) @pytest.mark.parametrize("grid", ["geodetic", "mercator"]) @@ -1003,7 +1043,7 @@ def test_read_raster_window_remote(path, grid, pixelbuffer, zoom): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_aws_s3"), + lazy_fixture("raster_4band_aws_s3"), ], ) @pytest.mark.parametrize("grid", ["geodetic", "mercator"]) @@ -1016,8 +1056,8 @@ def test_read_raster_window_aws_s3(path, grid, pixelbuffer, zoom): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band"), - pytest.lazy_fixture("stacta"), + lazy_fixture("raster_4band"), + lazy_fixture("stacta"), ], ) def test_read_raster(path): @@ -1026,12 +1066,25 @@ def test_read_raster(path): assert not rr.data.mask.all() +@pytest.mark.parametrize("masked", [True, False]) +@pytest.mark.parametrize("grid", [lazy_fixture("s2_band_tile")]) +def test_read_raster_args(s2_band, masked, grid): + rr = read_raster(s2_band, grid=grid, masked=masked) + if masked: + assert isinstance(rr.array, ma.MaskedArray) + else: + assert not isinstance(rr.array, ma.MaskedArray) + assert isinstance(rr.array, np.ndarray) + if grid: + assert rr.array.shape[1:] == grid.shape + + @pytest.mark.aws_s3 @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_aws_s3"), - pytest.lazy_fixture("aws_s3_stacta"), + lazy_fixture("raster_4band_aws_s3"), + lazy_fixture("aws_s3_stacta"), ], ) def test_read_raster_remote(path): @@ -1042,10 +1095,10 @@ def test_read_raster_remote(path): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_http"), - pytest.lazy_fixture("raster_4band_s3"), - pytest.lazy_fixture("http_stacta"), - pytest.lazy_fixture("secure_http_stacta"), + lazy_fixture("raster_4band_http"), + lazy_fixture("raster_4band_s3"), + lazy_fixture("http_stacta"), + lazy_fixture("secure_http_stacta"), ], ) def test_read_raster_integration(path): @@ -1055,14 +1108,14 @@ def test_read_raster_integration(path): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band"), - pytest.lazy_fixture("stacta"), + lazy_fixture("raster_4band"), + lazy_fixture("stacta"), ], ) def test_read_raster_tile(path): tp = BufferedTilePyramid("geodetic") tile = next(tp.tiles_from_bounds(read_raster(path).bounds, zoom=13)) - rr = read_raster(path, tile=tile) + rr = read_raster(path, grid=tile) assert isinstance(rr, ReferencedRaster) assert not rr.data.mask.all() @@ -1071,8 +1124,8 @@ def test_read_raster_tile(path): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_aws_s3"), - pytest.lazy_fixture("aws_s3_stacta"), + lazy_fixture("raster_4band_aws_s3"), + lazy_fixture("aws_s3_stacta"), ], ) def test_read_raster_tile_remote(path): @@ -1083,11 +1136,11 @@ def test_read_raster_tile_remote(path): @pytest.mark.parametrize( "path", [ - pytest.lazy_fixture("raster_4band_s3"), - pytest.lazy_fixture("raster_4band_http"), - pytest.lazy_fixture("raster_4band_secure_http"), - pytest.lazy_fixture("http_stacta"), - pytest.lazy_fixture("secure_http_stacta"), + lazy_fixture("raster_4band_s3"), + lazy_fixture("raster_4band_http"), + lazy_fixture("raster_4band_secure_http"), + lazy_fixture("http_stacta"), + lazy_fixture("secure_http_stacta"), ], ) def test_read_raster_tile_integration(path): From 71903b9c20b0216a25d058b20871d8793b79edab Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 14 May 2024 12:12:37 +0200 Subject: [PATCH 24/28] increase test coverage --- mapchete/cli/options.py | 4 +-- mapchete/io/raster/array.py | 2 +- mapchete/io/raster/referenced_raster.py | 18 ++++++++--- mapchete/processes/convert.py | 2 +- mapchete/processes/hillshade.py | 2 +- test/test_config.py | 8 +++++ test/test_io_raster.py | 43 +++++++++++++++++++++++-- test/test_processes.py | 7 ++++ 8 files changed, 74 insertions(+), 12 deletions(-) diff --git a/mapchete/cli/options.py b/mapchete/cli/options.py index 21f22849..cfcf0584 100644 --- a/mapchete/cli/options.py +++ b/mapchete/cli/options.py @@ -31,8 +31,8 @@ def _validate_zoom(ctx, param, zoom): if len(zoom_levels) > 2: raise ValueError("zooms can be maximum two items") return ZoomLevels.from_inp(zoom_levels) - except Exception as e: - raise click.BadParameter(e) + except Exception as exc: + raise click.BadParameter(str(exc)) def _validate_bounds(ctx, param, bounds): diff --git a/mapchete/io/raster/array.py b/mapchete/io/raster/array.py index 86b0c4da..9679ede0 100644 --- a/mapchete/io/raster/array.py +++ b/mapchete/io/raster/array.py @@ -301,7 +301,7 @@ def clip_array_with_vector( array_affine: Affine, geometries: List[dict], inverted: bool = False, - clip_buffer: int = 0, + clip_buffer: float = 0, ) -> ma.MaskedArray: """ Clip input array with a vector list. diff --git a/mapchete/io/raster/referenced_raster.py b/mapchete/io/raster/referenced_raster.py index a360fd9b..ae314dc3 100644 --- a/mapchete/io/raster/referenced_raster.py +++ b/mapchete/io/raster/referenced_raster.py @@ -7,6 +7,7 @@ import numpy.ma as ma from affine import Affine from rasterio.transform import array_bounds +from retry import retry from shapely.geometry import mapping, shape from mapchete.io.raster.array import resample_from_array @@ -14,6 +15,7 @@ from mapchete.io.raster.read import read_raster_window from mapchete.path import MPath from mapchete.protocols import GridProtocol +from mapchete.settings import IORetrySettings from mapchete.types import Bounds, BoundsLike, CRSLike, Grid, MPathLike, NodataVal logger = logging.getLogger(__name__) @@ -174,6 +176,7 @@ def from_file( **kwargs, ) -> ReferencedRaster: path = MPath.from_inp(path) + logger.debug(f"reading {str(path)} into memory") if grid: grid = Grid.from_obj(grid) @@ -184,11 +187,16 @@ def from_file( bounds=grid.bounds, crs=grid.crs, ) - with rasterio_open(path, "r") as src: - return ReferencedRaster.from_rasterio( - src, - masked=masked, - ) + + @retry(logger=logger, **dict(IORetrySettings())) + def _read_raster(): + with rasterio_open(path, "r") as src: + return ReferencedRaster.from_rasterio( + src, + masked=masked, + ) + + return _read_raster() @staticmethod def from_array_like( diff --git a/mapchete/processes/convert.py b/mapchete/processes/convert.py index eab55782..8d2cfa2b 100644 --- a/mapchete/processes/convert.py +++ b/mapchete/processes/convert.py @@ -126,7 +126,7 @@ def execute( else: logger.debug(f"writing {len(input_data)} features") return input_data - else: + else: # pragma: no cover raise TypeError( f"inp must either be of type RasterInput or VectorInput, not {type(inp)}" ) diff --git a/mapchete/processes/hillshade.py b/mapchete/processes/hillshade.py index a4214688..1f74ba22 100644 --- a/mapchete/processes/hillshade.py +++ b/mapchete/processes/hillshade.py @@ -88,7 +88,7 @@ def execute( fallback_to_higher_zoom=td_fallback_to_higher_zoom, ) - if elevation_data.mask.all(): + if elevation_data.mask.all(): # pragma: no cover raise Empty logger.debug("calculate hillshade") diff --git a/test/test_config.py b/test/test_config.py index c39b7371..3223f0e3 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -241,6 +241,14 @@ def test_empty_input(file_groups): mapchete.open(config) +def test_input_name_process_params(example_mapchete): + """Input has to be defined if required by process.""" + config = example_mapchete.dict + config.update(process_parameters=dict(file1="foo")) + with pytest.raises(MapcheteConfigError): + mapchete.open(config) + + def test_read_input_groups(file_groups): """Read input data groups.""" config = MapcheteConfig(file_groups.dict) diff --git a/test/test_io_raster.py b/test/test_io_raster.py index 0d0befaa..bebf54e1 100644 --- a/test/test_io_raster.py +++ b/test/test_io_raster.py @@ -7,7 +7,7 @@ import pytest from pytest_lazyfixture import lazy_fixture from rasterio.enums import Compression -from shapely.geometry import box +from shapely.geometry import GeometryCollection, box from shapely.ops import unary_union from tilematrix import Bounds @@ -29,9 +29,9 @@ resample_from_array, write_raster_window, ) +from mapchete.io.raster.array import clip_array_with_vector from mapchete.io.vector import reproject_geometry from mapchete.tile import BufferedTilePyramid -from mapchete.types import Grid def test_read_raster_window_nofile(raster_4band_tile): @@ -1145,3 +1145,42 @@ def test_read_raster_tile_remote(path): ) def test_read_raster_tile_integration(path): test_read_raster_tile(path) + + +def test_clip_array_with_vector(s2_band, s2_band_tile): + rr = ReferencedRaster.from_file(s2_band) + + geometries = [dict(geometry=s2_band_tile.bbox)] + out = clip_array_with_vector(rr.data, rr.affine, geometries) + assert out.mask.all() + + +def test_clip_array_with_vector_geometrycollection(s2_band, s2_band_tile): + rr = ReferencedRaster.from_file(s2_band) + + geometries = [dict(geometry=GeometryCollection([s2_band_tile.bbox]))] + out = clip_array_with_vector(rr.data, rr.affine, geometries) + assert out.mask.all() + + +def test_clip_array_with_vector_2dim(s2_band, s2_band_tile): + rr = ReferencedRaster.from_file(s2_band) + + geometries = [dict(geometry=s2_band_tile.bbox)] + out = clip_array_with_vector(rr.data[0], rr.affine, geometries) + assert out.mask.all() + + +@pytest.mark.parametrize("inverted", [True, False]) +@pytest.mark.parametrize("clip_buffer", [0, 0.1]) +def test_clip_array_with_vector_empty_geometries(s2_band, inverted, clip_buffer): + rr = ReferencedRaster.from_file(s2_band) + + geometries = [dict(geometry=GeometryCollection())] + out = clip_array_with_vector( + rr.data, rr.affine, geometries, inverted=inverted, clip_buffer=clip_buffer + ) + if inverted: + assert not out.mask.all() + else: + assert out.mask.all() diff --git a/test/test_processes.py b/test/test_processes.py index 57e597f7..5d77b741 100644 --- a/test/test_processes.py +++ b/test/test_processes.py @@ -176,3 +176,10 @@ def test_clip(local_raster, landpoly): assert isinstance(output, np.ndarray) assert not output.mask.all() assert output.mask.any() + + +def test_clip_empty(cleantopo_br_tif, landpoly): + tile = (8, 28, 89) + mp = get_process_mp(input=dict(inp=cleantopo_br_tif, clip=landpoly), tile=tile) + with pytest.raises(Empty): + clip.execute(mp.open("inp"), mp.open("clip")) From eea425077851d27948b2bf5ff41b358bb5179623 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 14 May 2024 16:23:40 +0200 Subject: [PATCH 25/28] mapchete.cli: collect all tests in test submodule --- test/cli/__init__.py | 0 test/cli/default/__init__.py | 25 + test/cli/default/test_convert.py | 655 +++++++ test/cli/default/test_cp.py | 116 ++ test/cli/default/test_create.py | 55 + test/cli/default/test_execute.py | 215 +++ test/cli/default/test_formats.py | 14 + test/cli/default/test_index.py | 267 +++ test/cli/default/test_processes.py | 6 + test/cli/default/test_rm.py | 36 + test/cli/default/test_serve.py | 61 + test/cli/default/test_stac.py | 38 + test/cli/test_main.py | 19 + test/{test_cli_mpath.py => cli/test_mpath.py} | 4 +- test/cli/test_options.py | 29 + test/test_cli.py | 1519 ----------------- 16 files changed, 1538 insertions(+), 1521 deletions(-) create mode 100644 test/cli/__init__.py create mode 100644 test/cli/default/__init__.py create mode 100644 test/cli/default/test_convert.py create mode 100644 test/cli/default/test_cp.py create mode 100644 test/cli/default/test_create.py create mode 100644 test/cli/default/test_execute.py create mode 100644 test/cli/default/test_formats.py create mode 100644 test/cli/default/test_index.py create mode 100644 test/cli/default/test_processes.py create mode 100644 test/cli/default/test_rm.py create mode 100644 test/cli/default/test_serve.py create mode 100644 test/cli/default/test_stac.py create mode 100644 test/cli/test_main.py rename test/{test_cli_mpath.py => cli/test_mpath.py} (97%) create mode 100644 test/cli/test_options.py delete mode 100644 test/test_cli.py diff --git a/test/cli/__init__.py b/test/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/cli/default/__init__.py b/test/cli/default/__init__.py new file mode 100644 index 00000000..9f81e5f0 --- /dev/null +++ b/test/cli/default/__init__.py @@ -0,0 +1,25 @@ +import logging + +from click.testing import CliRunner + +from mapchete.cli.main import main as mapchete_cli + +logger = logging.getLogger(__name__) + + +def run_cli( + args, expected_exit_code=0, output_contains=None, raise_exc=True, cli=mapchete_cli +): + result = CliRunner(env=dict(MAPCHETE_TEST="TRUE"), mix_stderr=True).invoke( + cli, list(map(str, args)), catch_exceptions=True, standalone_mode=True + ) + if output_contains: + assert output_contains in result.output or output_contains in str( + result.exception + ) + if raise_exc and result.exception: + logger.error(result.output or result.exception) + raise result.exception + # raise ClickException(result.output or result.exception) + assert result.exit_code == expected_exit_code + return result diff --git a/test/cli/default/test_convert.py b/test/cli/default/test_convert.py new file mode 100644 index 00000000..fd3778cb --- /dev/null +++ b/test/cli/default/test_convert.py @@ -0,0 +1,655 @@ +import os +import warnings +from test.cli.default import run_cli + +import geobuf +import pytest +from rio_cogeo.cogeo import cog_validate +from shapely.geometry import shape + +import mapchete +from mapchete.io import fiona_open, rasterio_open + + +def test_geodetic(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--concurrency", + "none", + ] + ) + for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: + out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_mercator(cleantopo_br_tif, mp_tmpdir): + """Automatic mercator tile pyramid creation of raster files.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "mercator", + "--concurrency", + "none", + ] + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_custom_grid(s2_band, mp_tmpdir, custom_grid_json): + """Automatic mercator tile pyramid creation of raster files.""" + run_cli( + [ + "convert", + s2_band, + mp_tmpdir, + "--output-pyramid", + custom_grid_json, + "--concurrency", + "none", + ] + ) + + for zoom, row, col in [(0, 5298, 631)]: + out_file = mp_tmpdir / zoom / row / col + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_png(cleantopo_br_tif, mp_tmpdir): + """Automatic PNG tile pyramid creation of raster files.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "mercator", + "--output-format", + "PNG", + "--concurrency", + "none", + ] + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / zoom / row / col + ".png" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "PNG" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + + +def test_bidx(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out_bidx.tif" + run_cli( + [ + "convert", + cleantopo_br_tif, + single_gtiff, + "--output-pyramid", + "geodetic", + "-z", + "3", + "--bidx", + "1", + "--concurrency", + "none", + ] + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + assert not src.overviews(1) + + +def test_single_gtiff(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out.tif" + run_cli( + [ + "convert", + cleantopo_br_tif, + single_gtiff, + "--output-pyramid", + "geodetic", + "-z", + "3", + "--concurrency", + "none", + ] + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + assert not src.overviews(1) + + +def test_single_gtiff_cog(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out_cog.tif" + run_cli( + [ + "convert", + cleantopo_br_tif, + single_gtiff, + "--output-pyramid", + "geodetic", + "-z", + "3", + "--cog", + "--concurrency", + "none", + ] + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + assert cog_validate(single_gtiff, strict=True) + + +def test_single_gtiff_overviews(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out.tif" + run_cli( + [ + "convert", + cleantopo_br_tif, + single_gtiff, + "--output-pyramid", + "geodetic", + "-z", + "7", + "--overviews", + "--overviews-resampling-method", + "bilinear", + "--workers", + "1", + "--concurrency", + "none", + ] + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + assert src.overviews(1) + + +@pytest.mark.integration +def test_remote_single_gtiff(http_raster, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out.tif" + run_cli( + [ + "convert", + http_raster, + single_gtiff, + "--output-pyramid", + "geodetic", + "-z", + "1", + "--concurrency", + "none", + ] + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.any() + + +def test_dtype(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation using dtype scale.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "mercator", + "--output-dtype", + "uint8", + "--concurrency", + "none", + ] + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / zoom / row / col + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + + +def test_scale_ratio(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation cropping data.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "mercator", + "--output-dtype", + "uint8", + "--scale-ratio", + "0.003", + "--concurrency", + "none", + ] + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / zoom / row / col + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + assert not data.mask.all() + + +def test_scale_offset(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation cropping data.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "mercator", + "--output-dtype", + "uint8", + "--scale-offset", + "1", + "--concurrency", + "none", + ] + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / zoom / row / col + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + assert not data.mask.all() + + +def test_clip(cleantopo_br_tif, mp_tmpdir, landpoly): + """Automatic tile pyramid creation cropping data.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--clip-geometry", + landpoly, + "-v", + "--concurrency", + "none", + ], + output_contains="Process area is empty", + ) + + +@pytest.mark.parametrize( + "zoom, tiles", + [("3", [(4, 15, 15), (2, 3, 0)]), ("3,4", [(2, 3, 0)]), ("4,3", [(2, 3, 0)])], +) +def test_zoom(cleantopo_br_tif, mp_tmpdir, zoom, tiles): + """Automatic tile pyramid creation using a specific zoom.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "mercator", + "-z", + "3", + "--concurrency", + "none", + ] + ) + for zoom, row, col in tiles: + out_file = mp_tmpdir / zoom / row / col + ".tif" + assert not out_file.exists() + + +def test_mapchete(cleantopo_br, mp_tmpdir): + # prepare data + with mapchete.open(cleantopo_br.path) as mp: + list(mp.execute(zoom=[1, 4])) + run_cli( + [ + "convert", + cleantopo_br.path, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--output-metatiling", + "1", + "-d", + "--concurrency", + "none", + "--bounds", + "168.75", + "-90.0", + "180.0", + "-78.75", + ] + ) + for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: + out_file = mp_tmpdir / zoom / row / col + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_tiledir(cleantopo_br, mp_tmpdir): + # prepare data + with mapchete.open(cleantopo_br.path) as mp: + list(mp.execute(zoom=[1, 4])) + run_cli( + [ + "convert", + os.path.join( + cleantopo_br.dict["config_dir"], cleantopo_br.dict["output"]["path"] + ), + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--output-metatiling", + "1", + "--zoom", + "1,4", + "-d", + "--concurrency", + "none", + "--bounds", + "168.75", + "-90.0", + "180.0", + "-78.75", + ] + ) + for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: + out_file = mp_tmpdir / zoom / row / col + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_geojson(landpoly, mp_tmpdir): + run_cli( + [ + "convert", + landpoly, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--zoom", + "4", + "--bounds", + "-101.25", + "78.75", + "-90.0", + "90.0", + "--concurrency", + "none", + ] + ) + zoom, row, col = (4, 0, 7) + control = 9 + out_file = mp_tmpdir / zoom / row / col + ".geojson" + with fiona_open(out_file, "r") as src: + assert len(src) == control + for f in src: + assert shape(f["geometry"]).is_valid + + +def test_geobuf(landpoly, mp_tmpdir): + # convert to geobuf + geobuf_outdir = mp_tmpdir / "geobuf" + run_cli( + [ + "convert", + landpoly, + geobuf_outdir, + "--output-pyramid", + "geodetic", + "--zoom", + "4", + "--output-format", + "Geobuf", + "--concurrency", + "none", + "--bounds", + "-101.25", + "67.5", + "-90.0", + "90.0", + ] + ) + for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, 32]): + out_file = geobuf_outdir / zoom / row / col + ".pbf" + with open(out_file, "rb") as src: + features = geobuf.decode(src.read())["features"] + assert len(features) == control + for f in features: + assert f["geometry"]["type"] == "Polygon" + assert shape(f["geometry"]).area + + # convert from geobuf + geojson_outdir = mp_tmpdir / "geojson" + run_cli( + [ + "convert", + geobuf_outdir, + geojson_outdir, + "--zoom", + "4", + "--output-format", + "GeoJSON", + "--concurrency", + "none", + "--bounds", + "-101.25", + "67.5", + "-90.0", + "90.0", + ] + ) + for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, [31, 32]]): + out_file = geojson_outdir / zoom / row / col + ".geojson" + with fiona_open(out_file, "r") as src: + if isinstance(control, list): + assert len(src) in control + else: + assert len(src) == control + + for f in src: + assert shape(f["geometry"]).is_valid + + +def test_geobuf_multipolygon(landpoly, mp_tmpdir): + run_cli( + [ + "convert", + landpoly, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--zoom", + "4", + "--output-format", + "Geobuf", + "--output-geometry-type", + "MultiPolygon", + "--concurrency", + "none", + ] + ) + for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [7, 30]): + out_file = mp_tmpdir / zoom / row / col + ".pbf" + with open(out_file, "rb") as src: + features = geobuf.decode(src.read())["features"] + assert len(features) == control + multipolygons = 0 + for f in features: + assert f["geometry"]["type"] in ["Polygon", "MultiPolygon"] + assert shape(f["geometry"]).area + if f["geometry"]["type"] == "MultiPolygon": + multipolygons += 1 + assert multipolygons + + +def test_vrt(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + run_cli( + [ + "convert", + cleantopo_br_tif, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--vrt", + "--zoom", + "1,4", + "--concurrency", + "none", + ] + ) + for zoom in [4, 3, 2, 1]: + out_file = mp_tmpdir / zoom + ".vrt" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "VRT" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_errors(s2_band_jp2, mp_tmpdir, s2_band, cleantopo_br, landpoly): + # output format required + run_cli( + ["convert", s2_band_jp2, mp_tmpdir, "--output-pyramid", "geodetic"], + expected_exit_code=1, + output_contains="Output format required.", + raise_exc=False, + ) + + # output pyramid reqired + run_cli( + ["convert", s2_band, mp_tmpdir], + expected_exit_code=1, + output_contains="Output pyramid required.", + raise_exc=False, + ) + + # prepare data for tiledir input + with mapchete.open(cleantopo_br.path) as mp: + mp.execute(zoom=[1, 4]) + tiledir_path = cleantopo_br.dict["config_dir"] / cleantopo_br.dict["output"]["path"] + + # zoom level required + run_cli( + [ + "convert", + tiledir_path, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--concurrency", + "none", + ], + expected_exit_code=1, + output_contains="Zoom levels required.", + raise_exc=False, + ) + + # incompatible formats + run_cli( + [ + "convert", + tiledir_path, + mp_tmpdir, + "--output-pyramid", + "geodetic", + "--zoom", + "5", + "--output-format", + "GeoJSON", + "--concurrency", + "none", + ], + expected_exit_code=1, + output_contains=("is incompatible with input format"), + raise_exc=False, + ) + + # unsupported output format extension + run_cli( + [ + "convert", + s2_band_jp2, + "output.jp2", + "--output-pyramid", + "geodetic", + "--zoom", + "5", + "--concurrency", + "none", + ], + expected_exit_code=1, + output_contains=("currently only single file GeoTIFFs are allowed"), + raise_exc=False, + ) + + # malformed band index + run_cli( + ["convert", s2_band_jp2, "output.tif", "--bidx", "invalid"], + expected_exit_code=2, + output_contains=("Invalid value for '--bidx'"), + raise_exc=False, + ) diff --git a/test/cli/default/test_cp.py b/test/cli/default/test_cp.py new file mode 100644 index 00000000..17e4f52e --- /dev/null +++ b/test/cli/default/test_cp.py @@ -0,0 +1,116 @@ +from test.cli.default import run_cli + +import pytest + + +def test_cp(mp_tmpdir, cleantopo_br, wkt_geom, testdata_dir): + """Using debug output.""" + # generate TileDirectory + run_cli( + [ + "execute", + cleantopo_br.path, + "-z", + "5", + "-b", + "169.19251592399996", + "-90", + "180", + "-80.18582802550002", + "--concurrency", + "none", + ] + ) + out_path = testdata_dir / cleantopo_br.dict["output"]["path"] + + # copy tiles and subset by point + run_cli( + [ + "cp", + out_path, + mp_tmpdir / "all", + "-z", + "5", + "-p", + "170", + "-85", + "--concurrency", + "none", + ] + ) + # copy tiles and subset by bounds + run_cli( + [ + "cp", + out_path, + mp_tmpdir / "all", + "-z", + "5", + "-b", + "169.19251592399996", + "-90", + "180", + "-80.18582802550002", + "--concurrency", + "none", + ] + ) + # copy all tiles + run_cli( + [ + "cp", + out_path, + mp_tmpdir / "all", + "-z", + "5", + "--concurrency", + "none", + ] + ) + # copy tiles and subset by area + run_cli( + [ + "cp", + out_path, + mp_tmpdir / "all", + "-z", + "5", + "--area", + wkt_geom, + "--concurrency", + "none", + ] + ) + # copy local tiles wit using threads + run_cli( + [ + "cp", + out_path, + mp_tmpdir / "all", + "-z", + "5", + "--concurrency", + "threads", + ] + ) + + +@pytest.mark.integration +def test_cp_http(mp_tmpdir, http_tiledir): + # copy tiles and subset by bounds + run_cli( + [ + "cp", + http_tiledir, + mp_tmpdir / "http", + "-z", + "1", + "-b", + "3.0", + "1.0", + "4.0", + "2.0", + "--concurrency", + "none", + ] + ) diff --git a/test/cli/default/test_create.py b/test/cli/default/test_create.py new file mode 100644 index 00000000..af78a114 --- /dev/null +++ b/test/cli/default/test_create.py @@ -0,0 +1,55 @@ +from test.cli.default import run_cli + +import oyaml as yaml +import pytest + + +def test_create(mp_tmpdir, cleantopo_br_tif): + """Run mapchete create and execute.""" + temp_mapchete = mp_tmpdir / "temp.mapchete" + temp_process = mp_tmpdir / "temp.py" + out_format = "GTiff" + # create from template + run_cli( + [ + "create", + "--mapchete-file", + str(temp_mapchete), + "--process-file", + str(temp_process), + "--out-format", + out_format, + "--pyramid-type", + "geodetic", + ], + expected_exit_code=0, + ) + # edit configuration + with temp_mapchete.open("r") as config_file: + config = yaml.safe_load(config_file) + config["output"].update(bands=1, dtype="uint8", path=str(mp_tmpdir)) + with temp_mapchete.open("w") as config_file: + config_file.write(yaml.dump(config, default_flow_style=False)) + + +def test_create_existing(mp_tmpdir): + """Run mapchete create and execute.""" + temp_mapchete = mp_tmpdir / "temp.mapchete" + temp_process = mp_tmpdir / "temp.py" + out_format = "GTiff" + # create files from template + args = [ + "create", + "--mapchete-file", + temp_mapchete, + "--process-file", + temp_process, + "--out-format", + out_format, + "--pyramid-type", + "geodetic", + ] + run_cli(args) + # try to create again + with pytest.raises((IOError, OSError)): # for python 2 and 3 + run_cli(args, expected_exit_code=-1) diff --git a/test/cli/default/test_execute.py b/test/cli/default/test_execute.py new file mode 100644 index 00000000..c463fe5a --- /dev/null +++ b/test/cli/default/test_execute.py @@ -0,0 +1,215 @@ +"""Test Mapchete main module and processing.""" + +import logging + +from shapely import wkt + +import mapchete +from mapchete.io import rasterio_open + +from . import run_cli + +logger = logging.getLogger(__name__) + + +def test_concurrent_processes(cleantopo_br_metatiling_1): + # """Run mapchete execute with multiple workers.""" + run_cli( + [ + "execute", + cleantopo_br_metatiling_1.path, + "--zoom", + "5", + "--workers", + "2", + "-d", + "--concurrency", + "processes", + ] + ) + + +def test_concurrent_threads(cleantopo_br_metatiling_1): + """Run mapchete execute with multiple workers.""" + run_cli( + [ + "execute", + cleantopo_br_metatiling_1.path, + "--zoom", + "5", + "--workers", + "2", + "-d", + "--concurrency", + "threads", + ], + ) + + +def test_concurrent_dask(cleantopo_br_metatiling_1): + """Run mapchete execute with multiple workers.""" + run_cli( + [ + "execute", + cleantopo_br_metatiling_1.path, + "--zoom", + "5", + "--workers", + "2", + "-d", + "--concurrency", + "dask", + ], + ) + + +def test_debug(example_mapchete): + """Using debug output.""" + run_cli( + [ + "execute", + example_mapchete.path, + "-t", + "10", + "500", + "1040", + "--debug", + "--concurrency", + "none", + ] + ) + + +def test_vrt(mp_tmpdir, cleantopo_br): + """Using debug output.""" + run_cli(["execute", cleantopo_br.path, "-z", "5", "--vrt"]) + with mapchete.open(cleantopo_br.dict) as mp: + vrt_path = mp.config.output.path / "5.vrt" + with rasterio_open(vrt_path) as src: + assert src.read().any() + + # run again, this time with custom output directory + run_cli( + [ + "execute", + cleantopo_br.path, + "-z", + "5", + "--vrt", + "--idx-out-dir", + mp_tmpdir, + "--concurrency", + "none", + ] + ) + with mapchete.open(cleantopo_br.dict) as mp: + vrt_path = mp_tmpdir / "5.vrt" + with rasterio_open(vrt_path) as src: + assert src.read().any() + + # run with single tile + run_cli( + [ + "execute", + cleantopo_br.path, + "-t", + "5", + "3", + "7", + "--vrt", + "--concurrency", + "none", + ] + ) + + # no new entries + run_cli( + [ + "execute", + cleantopo_br.path, + "-t", + "5", + "0", + "0", + "--vrt", + "--concurrency", + "none", + ] + ) + + +def test_verbose(example_mapchete): + """Using verbose output.""" + run_cli( + [ + "execute", + example_mapchete.path, + "-t", + "10", + "500", + "1040", + "--verbose", + "--concurrency", + "none", + ] + ) + + +def test_logfile(mp_tmpdir, example_mapchete): + """Using logfile.""" + logfile = mp_tmpdir / "temp.log" + run_cli( + [ + "execute", + example_mapchete.path, + "-t", + "10", + "500", + "1040", + "--logfile", + logfile, + "--concurrency", + "none", + ] + ) + assert logfile.exists() + with open(logfile) as log: + assert "DEBUG" in log.read() + + +def test_wkt_area(example_mapchete, wkt_geom): + """Using area from WKT.""" + run_cli( + ["execute", example_mapchete.path, "--area", wkt_geom, "--concurrency", "none"] + ) + + +def test_point(example_mapchete, wkt_geom): + """Using bounds from WKT.""" + g = wkt.loads(wkt_geom) + run_cli( + [ + "execute", + example_mapchete.path, + "--point", + str(g.centroid.x), + str(g.centroid.y), + "--concurrency", + "none", + ] + ) + + +def test_callback_errors(cleantopo_tl): + run_cli( + ["execute", cleantopo_tl.path, "--zoom", "4,5,7", "--concurrency", "none"], + expected_exit_code=2, + raise_exc=False, + output_contains="zooms can be maximum two items", + ) + run_cli( + ["execute", cleantopo_tl.path, "--zoom", "invalid", "--concurrency", "none"], + expected_exit_code=2, + raise_exc=False, + output_contains="zoom levels must be integer values", + ) diff --git a/test/cli/default/test_formats.py b/test/cli/default/test_formats.py new file mode 100644 index 00000000..a593a731 --- /dev/null +++ b/test/cli/default/test_formats.py @@ -0,0 +1,14 @@ +from test.cli.default import run_cli + + +def test_formats(capfd): + """Output of mapchete formats command.""" + run_cli(["formats"]) + err = capfd.readouterr()[1] + assert not err + run_cli(["formats", "-i"]) + err = capfd.readouterr()[1] + assert not err + run_cli(["formats", "-o"]) + err = capfd.readouterr()[1] + assert not err diff --git a/test/cli/default/test_index.py b/test/cli/default/test_index.py new file mode 100644 index 00000000..b7c1e3ee --- /dev/null +++ b/test/cli/default/test_index.py @@ -0,0 +1,267 @@ +from test.cli.default import run_cli + +import pytest + +import mapchete +from mapchete.io.vector import fiona_open + + +def test_geojson(cleantopo_br): + # execute process at zoom 3 + run_cli( + ["execute", cleantopo_br.path, "-z", "3", "--debug", "--concurrency", "none"] + ) + + # generate index for zoom 3 + run_cli(["index", cleantopo_br.path, "-z", "3", "--geojson", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert len(files) == 4 + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_geojson_fieldname(cleantopo_br): + # execute process at zoom 3 + run_cli( + ["execute", cleantopo_br.path, "-z", "3", "--debug", "--concurrency", "none"] + ) + + # index and rename "location" to "new_fieldname" + run_cli( + [ + "index", + cleantopo_br.path, + "-z", + "3", + "--geojson", + "--debug", + "--fieldname", + "new_fieldname", + ] + ) + with mapchete.open(cleantopo_br.dict) as mp: + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert "new_fieldname" in f["properties"] + assert len(list(src)) == 1 + + +def test_geojson_basepath(cleantopo_br): + # execute process at zoom 3 + run_cli( + ["execute", cleantopo_br.path, "-z", "3", "--debug", "--concurrency", "none"] + ) + + basepath = "http://localhost" + # index and rename "location" to "new_fieldname" + run_cli( + [ + "index", + cleantopo_br.path, + "-z", + "3", + "--geojson", + "--debug", + "--basepath", + basepath, + ] + ) + with mapchete.open(cleantopo_br.dict) as mp: + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert f["properties"]["location"].startswith(basepath) + assert len(list(src)) == 1 + + +def test_geojson_for_gdal(cleantopo_br): + # execute process at zoom 3 + run_cli(["execute", cleantopo_br.path, "-z", "3", "--debug"]) + + basepath = "http://localhost" + # index and rename "location" to "new_fieldname" + run_cli( + [ + "index", + cleantopo_br.path, + "-z", + "3", + "--geojson", + "--debug", + "--basepath", + basepath, + "--for-gdal", + ] + ) + with mapchete.open(cleantopo_br.dict) as mp: + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert f["properties"]["location"].startswith("/vsicurl/" + basepath) + assert len(list(src)) == 1 + + +def test_geojson_tile(cleantopo_tl): + # execute process for single tile + run_cli( + [ + "execute", + cleantopo_tl.path, + "-t", + "3", + "0", + "0", + "--debug", + "--concurrency", + "none", + ] + ) + # generate index + run_cli(["index", cleantopo_tl.path, "-t", "3", "0", "0", "--geojson", "--debug"]) + with mapchete.open(cleantopo_tl.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert len(files) == 4 + with fiona_open(mp.config.output.path / "3.geojson") as src: + assert len(list(src)) == 1 + + +def test_geojson_wkt_area(cleantopo_br, wkt_geom): + # execute process at zoom 3 + run_cli( + [ + "execute", + cleantopo_br.path, + "--debug", + "--area", + wkt_geom, + "--concurrency", + "none", + ] + ) + + # generate index for zoom 3 + run_cli(["index", cleantopo_br.path, "--geojson", "--debug", "--area", wkt_geom]) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert len(files) == 8 # was 7 before doing the observer pattern + assert "3.geojson" in files + + +def test_gpkg(cleantopo_br): + # execute process + run_cli( + ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] + ) + + # generate index + run_cli(["index", cleantopo_br.path, "-z", "5", "--gpkg", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.gpkg" in files + with fiona_open(mp.config.output.path / "5.gpkg") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + # write again and assert there is no new entry because there is already one + run_cli(["index", cleantopo_br.path, "-z", "5", "--gpkg", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.gpkg" in files + with fiona_open(mp.config.output.path / "5.gpkg") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_shp(cleantopo_br): + # execute process + run_cli( + ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] + ) + + # generate index + run_cli(["index", cleantopo_br.path, "-z", "5", "--shp", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.shp" in files + with fiona_open(mp.config.output.path / "5.shp") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + # write again and assert there is no new entry because there is already one + run_cli(["index", cleantopo_br.path, "-z", "5", "--shp", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.shp" in files + with fiona_open(mp.config.output.path / "5.shp") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_fgb(cleantopo_br): + # execute process + run_cli( + ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] + ) + + # generate index + run_cli(["index", cleantopo_br.path, "-z", "5", "--fgb", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.fgb" in files + with fiona_open(mp.config.output.path / "5.fgb") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + # write again and assert there is no new entry because there is already one + run_cli(["index", cleantopo_br.path, "-z", "5", "--fgb", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.fgb" in files + with fiona_open(mp.config.output.path / "5.fgb") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_text(cleantopo_br): + # execute process + run_cli( + ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] + ) + + # generate index + run_cli(["index", cleantopo_br.path, "-z", "5", "--txt", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.txt" in files + with open(mp.config.output.path / "5.txt") as src: + lines = list(src) + assert len(lines) == 1 + for l in lines: + assert l.endswith("7.tif\n") + + # write again and assert there is no new entry because there is already one + run_cli(["index", cleantopo_br.path, "-z", "5", "--txt", "--debug"]) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.txt" in files + with open(mp.config.output.path / "5.txt") as src: + lines = list(src) + assert len(lines) == 1 + for l in lines: + assert l.endswith("7.tif\n") + + +def test_errors(cleantopo_br): + with pytest.raises(ValueError): + run_cli(["index", cleantopo_br.path, "-z", "5", "--debug"]) + + with pytest.raises(SystemExit): + run_cli(["index", "-z", "5", "--debug"]) diff --git a/test/cli/default/test_processes.py b/test/cli/default/test_processes.py new file mode 100644 index 00000000..3c2124a3 --- /dev/null +++ b/test/cli/default/test_processes.py @@ -0,0 +1,6 @@ +from test.cli.default import run_cli + + +def test_processes(): + run_cli(["processes"]) + run_cli(["processes", "-n", "mapchete.processes.examples.example_process"]) diff --git a/test/cli/default/test_rm.py b/test/cli/default/test_rm.py new file mode 100644 index 00000000..45763a4d --- /dev/null +++ b/test/cli/default/test_rm.py @@ -0,0 +1,36 @@ +from test.cli.default import run_cli + + +def test_rm(cleantopo_br): + run_cli( + [ + "execute", + cleantopo_br.path, + "-z", + "5", + "-b", + "169.19251592399996", + "-90", + "180", + "-80.18582802550002", + "--concurrency", + "none", + ] + ) + out_path = cleantopo_br.dict["output"]["path"] / 5 / 3 / "7.tif" + assert out_path.exists() + run_cli( + [ + "rm", + cleantopo_br.output_path, + "-z", + "5", + "-b", + "169.19251592399996", + "-90", + "180", + "-80.18582802550002", + "-f", + ] + ) + assert not out_path.exists() diff --git a/test/cli/default/test_serve.py b/test/cli/default/test_serve.py new file mode 100644 index 00000000..16c0cd93 --- /dev/null +++ b/test/cli/default/test_serve.py @@ -0,0 +1,61 @@ +import warnings +from test.cli.default import run_cli + +import pytest +from rasterio.io import MemoryFile + + +def test_serve_cli_params(cleantopo_br): + """Test whether different CLI params pass.""" + # assert too few arguments error + with pytest.raises(SystemExit): + run_cli(["serve"]) + + for args in [ + ["serve", cleantopo_br.path], + ["serve", cleantopo_br.path, "--port", "5001"], + ["serve", cleantopo_br.path, "--internal-cache", "512"], + ["serve", cleantopo_br.path, "--zoom", "5"], + ["serve", cleantopo_br.path, "--bounds", "-1", "-1", "1", "1"], + ["serve", cleantopo_br.path, "--overwrite"], + ["serve", cleantopo_br.path, "--readonly"], + ["serve", cleantopo_br.path, "--memory"], + ]: + run_cli(args) + + +def test_serve(client): + """Mapchete serve with default settings.""" + tile_base_url = "/wmts_simple/1.0.0/dem_to_hillshade/default/WGS84/" + for url in ["/"]: + response = client.get(url) + assert response.status_code == 200 + for url in [ + tile_base_url + "5/30/62.png", + tile_base_url + "5/30/63.png", + tile_base_url + "5/31/62.png", + tile_base_url + "5/31/63.png", + ]: + response = client.get(url) + assert response.status_code == 200 + img = response.data + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with MemoryFile(img) as memfile: + with memfile.open() as dataset: + data = dataset.read() + # get alpha band and assert some pixels are masked + assert data[3].any() + # test outside zoom range + response = client.get(tile_base_url + "6/31/63.png") + assert response.status_code == 200 + img = response.data + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with MemoryFile(img) as memfile: + with memfile.open() as dataset: + data = dataset.read() + assert not data.all() + # test invalid url + response = client.get(tile_base_url + "invalid_url") + assert response.status_code == 404 diff --git a/test/cli/default/test_stac.py b/test/cli/default/test_stac.py new file mode 100644 index 00000000..51f34012 --- /dev/null +++ b/test/cli/default/test_stac.py @@ -0,0 +1,38 @@ +from test.cli.default import run_cli + +import pytest +import rasterio +from packaging import version + +from mapchete.io import rasterio_open + + +def test_mapchete_file(cleantopo_br): + run_cli(["execute", cleantopo_br.path]) + run_cli(["stac", "create-item", cleantopo_br.path, "-z", "5", "--force"]) + + +@pytest.mark.integration +def test_tiledir(http_tiledir, mp_tmpdir): + run_cli( + [ + "stac", + "create-item", + http_tiledir, + "-z", + "5", + "--force", + "--item-path", + f"{mp_tmpdir}/stac_example.json", + ] + ) + + +@pytest.mark.skipif( + version.parse(rasterio.__gdal_version__) < version.parse("3.3.0"), + reason="required STACTA driver is only available in GDAL>=3.3.0", +) +def test_prototype_files(cleantopo_br): + run_cli(["execute", cleantopo_br.path]) + run_cli(["stac", "create-prototype-files", cleantopo_br.path]) + rasterio_open(cleantopo_br.mp().config.output.stac_path) diff --git a/test/cli/test_main.py b/test/cli/test_main.py new file mode 100644 index 00000000..54e0c554 --- /dev/null +++ b/test/cli/test_main.py @@ -0,0 +1,19 @@ +from test.cli.default import run_cli + + +def test_main(): + # """Main CLI.""" + for command in ["execute", "serve", "cp", "index"]: + run_cli( + [command], + expected_exit_code=2, + output_contains="Error: Missing argument", + raise_exc=False, + ) + + run_cli( + ["invalid_command"], + expected_exit_code=2, + output_contains="Error: No such command", + raise_exc=False, + ) diff --git a/test/test_cli_mpath.py b/test/cli/test_mpath.py similarity index 97% rename from test/test_cli_mpath.py rename to test/cli/test_mpath.py index 28a08847..88d77d44 100644 --- a/test/test_cli_mpath.py +++ b/test/cli/test_mpath.py @@ -1,9 +1,9 @@ +from test.cli.default import run_cli + import pytest from mapchete.cli.mpath import mpath -from .test_cli import run_cli - @pytest.mark.integration def test_mpath(): diff --git a/test/cli/test_options.py b/test/cli/test_options.py new file mode 100644 index 00000000..137e7d65 --- /dev/null +++ b/test/cli/test_options.py @@ -0,0 +1,29 @@ +from mapchete.cli import options + + +def test_fs_opt_extractor(): + kwargs = options._cb_key_val( + None, + None, + [ + "str=bar", + "int=2", + "float=1.5", + "bool1=true", + "bool2=FALSE", + "bool3=yes", + "bool4=no", + "none=none", + "none2=null", + ], + ) + assert isinstance(kwargs, dict) + assert kwargs["str"] == "bar" + assert kwargs["int"] == 2 + assert kwargs["float"] == 1.5 + assert kwargs["bool1"] is True + assert kwargs["bool2"] is False + assert kwargs["bool3"] is True + assert kwargs["bool4"] is False + assert kwargs["none"] is None + assert kwargs["none2"] is None diff --git a/test/test_cli.py b/test/test_cli.py deleted file mode 100644 index cd6cbfc1..00000000 --- a/test/test_cli.py +++ /dev/null @@ -1,1519 +0,0 @@ -"""Test Mapchete main module and processing.""" -import logging -import os -import warnings - -import geobuf -import pytest -import rasterio -import yaml -from click.testing import CliRunner -from packaging import version -from rasterio.io import MemoryFile -from rio_cogeo.cogeo import cog_validate -from shapely import wkt -from shapely.geometry import shape - -import mapchete -from mapchete.cli import options -from mapchete.cli.main import main as mapchete_cli -from mapchete.io import fiona_open, rasterio_open - -logger = logging.getLogger(__name__) - -SCRIPTDIR = os.path.dirname(os.path.realpath(__file__)) -TESTDATA_DIR = os.path.join(SCRIPTDIR, "testdata") - - -def version_is_greater_equal(a, b): - a_major, a_minor, a_patch = a - b_major, b_minor, b_patch = b - if a_major > b_major: - return True - elif a_major == b_major: - if a_minor > b_minor: - return True - elif a_minor == b_minor: - return a_patch >= b_patch - else: - return False - else: - return False - - -def run_cli( - args, expected_exit_code=0, output_contains=None, raise_exc=True, cli=mapchete_cli -): - result = CliRunner(env=dict(MAPCHETE_TEST="TRUE"), mix_stderr=True).invoke( - cli, map(str, args), catch_exceptions=True, standalone_mode=True - ) - if output_contains: - assert output_contains in result.output or output_contains in str( - result.exception - ) - if raise_exc and result.exception: - logger.error(result.output or result.exception) - raise result.exception - # raise ClickException(result.output or result.exception) - assert result.exit_code == expected_exit_code - return result - - -def test_main(): - # """Main CLI.""" - for command in ["execute", "serve", "cp", "index"]: - run_cli( - [command], - expected_exit_code=2, - output_contains="Error: Missing argument", - raise_exc=False, - ) - - run_cli(["formats"], expected_exit_code=0) - - run_cli( - ["invalid_command"], - expected_exit_code=2, - output_contains="Error: No such command", - raise_exc=False, - ) - - -def test_create(mp_tmpdir, cleantopo_br_tif): - """Run mapchete create and execute.""" - temp_mapchete = mp_tmpdir / "temp.mapchete" - temp_process = mp_tmpdir / "temp.py" - out_format = "GTiff" - # create from template - run_cli( - [ - "create", - "--mapchete-file", - str(temp_mapchete), - "--process-file", - str(temp_process), - "--out-format", - out_format, - "--pyramid-type", - "geodetic", - ], - expected_exit_code=0, - ) - # edit configuration - with temp_mapchete.open("r") as config_file: - config = yaml.safe_load(config_file) - config["output"].update(bands=1, dtype="uint8", path=str(mp_tmpdir)) - with temp_mapchete.open("w") as config_file: - config_file.write(yaml.dump(config, default_flow_style=False)) - - -def test_create_existing(mp_tmpdir): - """Run mapchete create and execute.""" - temp_mapchete = mp_tmpdir / "temp.mapchete" - temp_process = mp_tmpdir / "temp.py" - out_format = "GTiff" - # create files from template - args = [ - "create", - "--mapchete-file", - temp_mapchete, - "--process-file", - temp_process, - "--out-format", - out_format, - "--pyramid-type", - "geodetic", - ] - run_cli(args) - # try to create again - with pytest.raises((IOError, OSError)): # for python 2 and 3 - run_cli(args, expected_exit_code=-1) - - -def test_execute_concurrent_processes(mp_tmpdir, cleantopo_br_metatiling_1): - # """Run mapchete execute with multiple workers.""" - run_cli( - [ - "execute", - cleantopo_br_metatiling_1.path, - "--zoom", - "5", - "--workers", - "2", - "-d", - "--concurrency", - "processes", - ] - ) - - -def test_execute_concurrent_threads(mp_tmpdir, cleantopo_br_metatiling_1): - """Run mapchete execute with multiple workers.""" - run_cli( - [ - "execute", - cleantopo_br_metatiling_1.path, - "--zoom", - "5", - "--workers", - "2", - "-d", - "--concurrency", - "threads", - ], - ) - - -def test_execute_concurrent_dask(mp_tmpdir, cleantopo_br_metatiling_1): - """Run mapchete execute with multiple workers.""" - run_cli( - [ - "execute", - cleantopo_br_metatiling_1.path, - "--zoom", - "5", - "--workers", - "2", - "-d", - "--concurrency", - "dask", - ], - ) - - -def test_execute_debug(mp_tmpdir, example_mapchete): - """Using debug output.""" - run_cli( - [ - "execute", - example_mapchete.path, - "-t", - "10", - "500", - "1040", - "--debug", - "--concurrency", - "none", - ] - ) - - -def test_execute_vrt(mp_tmpdir, cleantopo_br): - """Using debug output.""" - run_cli(["execute", cleantopo_br.path, "-z", "5", "--vrt"]) - with mapchete.open(cleantopo_br.dict) as mp: - vrt_path = mp.config.output.path / "5.vrt" - with rasterio_open(vrt_path) as src: - assert src.read().any() - - # run again, this time with custom output directory - run_cli( - [ - "execute", - cleantopo_br.path, - "-z", - "5", - "--vrt", - "--idx-out-dir", - mp_tmpdir, - "--concurrency", - "none", - ] - ) - with mapchete.open(cleantopo_br.dict) as mp: - vrt_path = os.path.join(mp_tmpdir, "5.vrt") - with rasterio_open(vrt_path) as src: - assert src.read().any() - - # run with single tile - run_cli( - [ - "execute", - cleantopo_br.path, - "-t", - "5", - "3", - "7", - "--vrt", - "--concurrency", - "none", - ] - ) - - # no new entries - run_cli( - [ - "execute", - cleantopo_br.path, - "-t", - "5", - "0", - "0", - "--vrt", - "--concurrency", - "none", - ] - ) - - -def test_execute_verbose(mp_tmpdir, example_mapchete): - """Using verbose output.""" - run_cli( - [ - "execute", - example_mapchete.path, - "-t", - "10", - "500", - "1040", - "--verbose", - "--concurrency", - "none", - ] - ) - - -def test_execute_logfile(mp_tmpdir, example_mapchete): - """Using logfile.""" - logfile = os.path.join(mp_tmpdir, "temp.log") - run_cli( - [ - "execute", - example_mapchete.path, - "-t", - "10", - "500", - "1040", - "--logfile", - logfile, - "--concurrency", - "none", - ] - ) - assert os.path.isfile(logfile) - with open(logfile) as log: - assert "DEBUG" in log.read() - - -def test_execute_wkt_area(mp_tmpdir, example_mapchete, wkt_geom): - """Using area from WKT.""" - run_cli( - ["execute", example_mapchete.path, "--area", wkt_geom, "--concurrency", "none"] - ) - - -def test_execute_point(mp_tmpdir, example_mapchete, wkt_geom): - """Using bounds from WKT.""" - g = wkt.loads(wkt_geom) - run_cli( - [ - "execute", - example_mapchete.path, - "--point", - str(g.centroid.x), - str(g.centroid.y), - "--concurrency", - "none", - ] - ) - - -def test_formats(capfd): - """Output of mapchete formats command.""" - run_cli(["formats"]) - err = capfd.readouterr()[1] - assert not err - run_cli(["formats", "-i"]) - err = capfd.readouterr()[1] - assert not err - run_cli(["formats", "-o"]) - err = capfd.readouterr()[1] - assert not err - - -def test_convert_geodetic(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--concurrency", - "none", - ] - ) - for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_mercator(cleantopo_br_tif, mp_tmpdir): - """Automatic mercator tile pyramid creation of raster files.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "mercator", - "--concurrency", - "none", - ] - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_custom_grid(s2_band, mp_tmpdir, custom_grid_json): - """Automatic mercator tile pyramid creation of raster files.""" - run_cli( - [ - "convert", - s2_band, - mp_tmpdir, - "--output-pyramid", - custom_grid_json, - "--concurrency", - "none", - ] - ) - - for zoom, row, col in [(0, 5298, 631)]: - out_file = mp_tmpdir / zoom / row / col + ".tif" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_png(cleantopo_br_tif, mp_tmpdir): - """Automatic PNG tile pyramid creation of raster files.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "mercator", - "--output-format", - "PNG", - "--concurrency", - "none", - ] - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = mp_tmpdir / zoom / row / col + ".png" - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "PNG" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_bidx(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = mp_tmpdir / "single_out_bidx.tif" - run_cli( - [ - "convert", - cleantopo_br_tif, - single_gtiff, - "--output-pyramid", - "geodetic", - "-z", - "3", - "--bidx", - "1", - "--concurrency", - "none", - ] - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - assert not src.overviews(1) - - -def test_convert_single_gtiff(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = mp_tmpdir / "single_out.tif" - run_cli( - [ - "convert", - cleantopo_br_tif, - single_gtiff, - "--output-pyramid", - "geodetic", - "-z", - "3", - "--concurrency", - "none", - ] - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - assert not src.overviews(1) - - -def test_convert_single_gtiff_cog(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = mp_tmpdir / "single_out_cog.tif" - run_cli( - [ - "convert", - cleantopo_br_tif, - single_gtiff, - "--output-pyramid", - "geodetic", - "-z", - "3", - "--cog", - "--concurrency", - "none", - ] - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - assert cog_validate(single_gtiff, strict=True) - - -def test_convert_single_gtiff_overviews(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = mp_tmpdir / "single_out.tif" - run_cli( - [ - "convert", - cleantopo_br_tif, - single_gtiff, - "--output-pyramid", - "geodetic", - "-z", - "7", - "--overviews", - "--overviews-resampling-method", - "bilinear", - "--workers", - "1", - "--concurrency", - "none", - ] - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - assert src.overviews(1) - - -@pytest.mark.integration -def test_convert_remote_single_gtiff(http_raster, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = mp_tmpdir / "single_out.tif" - run_cli( - [ - "convert", - http_raster, - single_gtiff, - "--output-pyramid", - "geodetic", - "-z", - "1", - "--concurrency", - "none", - ] - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.any() - - -def test_convert_dtype(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation using dtype scale.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "mercator", - "--output-dtype", - "uint8", - "--concurrency", - "none", - ] - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = mp_tmpdir / zoom / row / col + ".tif" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_scale_ratio(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation cropping data.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "mercator", - "--output-dtype", - "uint8", - "--scale-ratio", - "0.003", - "--concurrency", - "none", - ] - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = mp_tmpdir / zoom / row / col + ".tif" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - assert not data.mask.all() - - -def test_convert_scale_offset(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation cropping data.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "mercator", - "--output-dtype", - "uint8", - "--scale-offset", - "1", - "--concurrency", - "none", - ] - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = mp_tmpdir / zoom / row / col + ".tif" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - assert not data.mask.all() - - -def test_convert_clip(cleantopo_br_tif, mp_tmpdir, landpoly): - """Automatic tile pyramid creation cropping data.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--clip-geometry", - landpoly, - "-v", - "--concurrency", - "none", - ], - output_contains="Process area is empty", - ) - - -@pytest.mark.parametrize( - "zoom, tiles", - [("3", [(4, 15, 15), (2, 3, 0)]), ("3,4", [(2, 3, 0)]), ("4,3", [(2, 3, 0)])], -) -def test_convert_zoom(cleantopo_br_tif, mp_tmpdir, zoom, tiles): - """Automatic tile pyramid creation using a specific zoom.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "mercator", - "-z", - "3", - "--concurrency", - "none", - ] - ) - for zoom, row, col in tiles: - out_file = mp_tmpdir / zoom / row / col + ".tif" - assert not out_file.exists() - - -def test_convert_mapchete(cleantopo_br, mp_tmpdir): - # prepare data - with mapchete.open(cleantopo_br.path) as mp: - list(mp.execute(zoom=[1, 4])) - run_cli( - [ - "convert", - cleantopo_br.path, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--output-metatiling", - "1", - "-d", - "--concurrency", - "none", - "--bounds", - "168.75", - "-90.0", - "180.0", - "-78.75", - ] - ) - for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: - out_file = mp_tmpdir / zoom / row / col + ".tif" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_tiledir(cleantopo_br, mp_tmpdir): - # prepare data - with mapchete.open(cleantopo_br.path) as mp: - list(mp.execute(zoom=[1, 4])) - run_cli( - [ - "convert", - os.path.join( - cleantopo_br.dict["config_dir"], cleantopo_br.dict["output"]["path"] - ), - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--output-metatiling", - "1", - "--zoom", - "1,4", - "-d", - "--concurrency", - "none", - "--bounds", - "168.75", - "-90.0", - "180.0", - "-78.75", - ] - ) - for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: - out_file = mp_tmpdir / zoom / row / col + ".tif" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_geojson(landpoly, mp_tmpdir): - run_cli( - [ - "convert", - landpoly, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--zoom", - "4", - "--bounds", - "-101.25", - "78.75", - "-90.0", - "90.0", - "--concurrency", - "none", - ] - ) - zoom, row, col = (4, 0, 7) - control = 9 - out_file = mp_tmpdir / zoom / row / col + ".geojson" - with fiona_open(out_file, "r") as src: - assert len(src) == control - for f in src: - assert shape(f["geometry"]).is_valid - - -def test_convert_geobuf(landpoly, mp_tmpdir): - # convert to geobuf - geobuf_outdir = mp_tmpdir / "geobuf" - run_cli( - [ - "convert", - landpoly, - geobuf_outdir, - "--output-pyramid", - "geodetic", - "--zoom", - "4", - "--output-format", - "Geobuf", - "--concurrency", - "none", - "--bounds", - "-101.25", - "67.5", - "-90.0", - "90.0", - ] - ) - for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, 32]): - out_file = geobuf_outdir / zoom / row / col + ".pbf" - with open(out_file, "rb") as src: - features = geobuf.decode(src.read())["features"] - assert len(features) == control - for f in features: - assert f["geometry"]["type"] == "Polygon" - assert shape(f["geometry"]).area - - # convert from geobuf - geojson_outdir = mp_tmpdir / "geojson" - run_cli( - [ - "convert", - geobuf_outdir, - geojson_outdir, - "--zoom", - "4", - "--output-format", - "GeoJSON", - "--concurrency", - "none", - "--bounds", - "-101.25", - "67.5", - "-90.0", - "90.0", - ] - ) - for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, [31, 32]]): - out_file = geojson_outdir / zoom / row / col + ".geojson" - with fiona_open(out_file, "r") as src: - if isinstance(control, list): - assert len(src) in control - else: - assert len(src) == control - - for f in src: - assert shape(f["geometry"]).is_valid - - -def test_convert_geobuf_multipolygon(landpoly, mp_tmpdir): - run_cli( - [ - "convert", - landpoly, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--zoom", - "4", - "--output-format", - "Geobuf", - "--output-geometry-type", - "MultiPolygon", - "--concurrency", - "none", - ] - ) - for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [7, 30]): - out_file = mp_tmpdir / zoom / row / col + ".pbf" - with open(out_file, "rb") as src: - features = geobuf.decode(src.read())["features"] - assert len(features) == control - multipolygons = 0 - for f in features: - assert f["geometry"]["type"] in ["Polygon", "MultiPolygon"] - assert shape(f["geometry"]).area - if f["geometry"]["type"] == "MultiPolygon": - multipolygons += 1 - assert multipolygons - - -def test_convert_vrt(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - run_cli( - [ - "convert", - cleantopo_br_tif, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--vrt", - "--zoom", - "1,4", - "--concurrency", - "none", - ] - ) - for zoom in [4, 3, 2, 1]: - out_file = mp_tmpdir / zoom + ".vrt" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "VRT" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_errors(s2_band_jp2, mp_tmpdir, s2_band, cleantopo_br, landpoly): - # output format required - run_cli( - ["convert", s2_band_jp2, mp_tmpdir, "--output-pyramid", "geodetic"], - expected_exit_code=1, - output_contains="Output format required.", - raise_exc=False, - ) - - # output pyramid reqired - run_cli( - ["convert", s2_band, mp_tmpdir], - expected_exit_code=1, - output_contains="Output pyramid required.", - raise_exc=False, - ) - - # prepare data for tiledir input - with mapchete.open(cleantopo_br.path) as mp: - mp.execute(zoom=[1, 4]) - tiledir_path = cleantopo_br.dict["config_dir"] / cleantopo_br.dict["output"]["path"] - - # zoom level required - run_cli( - [ - "convert", - tiledir_path, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--concurrency", - "none", - ], - expected_exit_code=1, - output_contains="Zoom levels required.", - raise_exc=False, - ) - - # incompatible formats - run_cli( - [ - "convert", - tiledir_path, - mp_tmpdir, - "--output-pyramid", - "geodetic", - "--zoom", - "5", - "--output-format", - "GeoJSON", - "--concurrency", - "none", - ], - expected_exit_code=1, - output_contains=("is incompatible with input format"), - raise_exc=False, - ) - - # unsupported output format extension - run_cli( - [ - "convert", - s2_band_jp2, - "output.jp2", - "--output-pyramid", - "geodetic", - "--zoom", - "5", - "--concurrency", - "none", - ], - expected_exit_code=1, - output_contains=("currently only single file GeoTIFFs are allowed"), - raise_exc=False, - ) - - # malformed band index - run_cli( - ["convert", s2_band_jp2, "output.tif", "--bidx", "invalid"], - expected_exit_code=2, - output_contains=("Invalid value for '--bidx'"), - raise_exc=False, - ) - - -def test_serve_cli_params(cleantopo_br, mp_tmpdir): - """Test whether different CLI params pass.""" - # assert too few arguments error - with pytest.raises(SystemExit): - run_cli(["serve"]) - - for args in [ - ["serve", cleantopo_br.path], - ["serve", cleantopo_br.path, "--port", "5001"], - ["serve", cleantopo_br.path, "--internal-cache", "512"], - ["serve", cleantopo_br.path, "--zoom", "5"], - ["serve", cleantopo_br.path, "--bounds", "-1", "-1", "1", "1"], - ["serve", cleantopo_br.path, "--overwrite"], - ["serve", cleantopo_br.path, "--readonly"], - ["serve", cleantopo_br.path, "--memory"], - ]: - run_cli(args) - - -def test_serve(client, mp_tmpdir): - """Mapchete serve with default settings.""" - tile_base_url = "/wmts_simple/1.0.0/dem_to_hillshade/default/WGS84/" - for url in ["/"]: - response = client.get(url) - assert response.status_code == 200 - for url in [ - tile_base_url + "5/30/62.png", - tile_base_url + "5/30/63.png", - tile_base_url + "5/31/62.png", - tile_base_url + "5/31/63.png", - ]: - response = client.get(url) - assert response.status_code == 200 - img = response.data - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - with MemoryFile(img) as memfile: - with memfile.open() as dataset: - data = dataset.read() - # get alpha band and assert some pixels are masked - assert data[3].any() - # test outside zoom range - response = client.get(tile_base_url + "6/31/63.png") - assert response.status_code == 200 - img = response.data - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - with MemoryFile(img) as memfile: - with memfile.open() as dataset: - data = dataset.read() - assert not data.all() - # test invalid url - response = client.get(tile_base_url + "invalid_url") - assert response.status_code == 404 - - -def test_index_geojson(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - run_cli( - ["execute", cleantopo_br.path, "-z", "3", "--debug", "--concurrency", "none"] - ) - - # generate index for zoom 3 - run_cli(["index", cleantopo_br.path, "-z", "3", "--geojson", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = mp.config.output.path.ls() - assert len(files) == 4 - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_geojson_fieldname(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - run_cli( - ["execute", cleantopo_br.path, "-z", "3", "--debug", "--concurrency", "none"] - ) - - # index and rename "location" to "new_fieldname" - run_cli( - [ - "index", - cleantopo_br.path, - "-z", - "3", - "--geojson", - "--debug", - "--fieldname", - "new_fieldname", - ] - ) - with mapchete.open(cleantopo_br.dict) as mp: - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert "new_fieldname" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_geojson_basepath(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - run_cli( - ["execute", cleantopo_br.path, "-z", "3", "--debug", "--concurrency", "none"] - ) - - basepath = "http://localhost" - # index and rename "location" to "new_fieldname" - run_cli( - [ - "index", - cleantopo_br.path, - "-z", - "3", - "--geojson", - "--debug", - "--basepath", - basepath, - ] - ) - with mapchete.open(cleantopo_br.dict) as mp: - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert f["properties"]["location"].startswith(basepath) - assert len(list(src)) == 1 - - -def test_index_geojson_for_gdal(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - run_cli(["execute", cleantopo_br.path, "-z", "3", "--debug"]) - - basepath = "http://localhost" - # index and rename "location" to "new_fieldname" - run_cli( - [ - "index", - cleantopo_br.path, - "-z", - "3", - "--geojson", - "--debug", - "--basepath", - basepath, - "--for-gdal", - ] - ) - with mapchete.open(cleantopo_br.dict) as mp: - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert f["properties"]["location"].startswith("/vsicurl/" + basepath) - assert len(list(src)) == 1 - - -def test_index_geojson_tile(mp_tmpdir, cleantopo_tl): - # execute process for single tile - run_cli( - [ - "execute", - cleantopo_tl.path, - "-t", - "3", - "0", - "0", - "--debug", - "--concurrency", - "none", - ] - ) - # generate index - run_cli(["index", cleantopo_tl.path, "-t", "3", "0", "0", "--geojson", "--debug"]) - with mapchete.open(cleantopo_tl.dict) as mp: - files = os.listdir(mp.config.output.path) - assert len(files) == 4 - with fiona_open(mp.config.output.path / "3.geojson") as src: - assert len(list(src)) == 1 - - -def test_index_geojson_wkt_area(mp_tmpdir, cleantopo_br, wkt_geom): - # execute process at zoom 3 - run_cli( - [ - "execute", - cleantopo_br.path, - "--debug", - "--area", - wkt_geom, - "--concurrency", - "none", - ] - ) - - # generate index for zoom 3 - run_cli(["index", cleantopo_br.path, "--geojson", "--debug", "--area", wkt_geom]) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert len(files) == 8 # was 7 before doing the observer pattern - assert "3.geojson" in files - - -def test_index_gpkg(mp_tmpdir, cleantopo_br): - # execute process - run_cli( - ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] - ) - - # generate index - run_cli(["index", cleantopo_br.path, "-z", "5", "--gpkg", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.gpkg" in files - with fiona_open(mp.config.output.path / "5.gpkg") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - # write again and assert there is no new entry because there is already one - run_cli(["index", cleantopo_br.path, "-z", "5", "--gpkg", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.gpkg" in files - with fiona_open(mp.config.output.path / "5.gpkg") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_shp(mp_tmpdir, cleantopo_br): - # execute process - run_cli( - ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] - ) - - # generate index - run_cli(["index", cleantopo_br.path, "-z", "5", "--shp", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.shp" in files - with fiona_open(mp.config.output.path / "5.shp") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - # write again and assert there is no new entry because there is already one - run_cli(["index", cleantopo_br.path, "-z", "5", "--shp", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.shp" in files - with fiona_open(mp.config.output.path / "5.shp") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_fgb(cleantopo_br): - # execute process - run_cli( - ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] - ) - - # generate index - run_cli(["index", cleantopo_br.path, "-z", "5", "--fgb", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.fgb" in files - with fiona_open(mp.config.output.path / "5.fgb") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - # write again and assert there is no new entry because there is already one - run_cli(["index", cleantopo_br.path, "-z", "5", "--fgb", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.fgb" in files - with fiona_open(mp.config.output.path / "5.fgb") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_text(cleantopo_br): - # execute process - run_cli( - ["execute", cleantopo_br.path, "-z", "5", "--debug", "--concurrency", "none"] - ) - - # generate index - run_cli(["index", cleantopo_br.path, "-z", "5", "--txt", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.txt" in files - with open(os.path.join(mp.config.output.path, "5.txt")) as src: - lines = list(src) - assert len(lines) == 1 - for l in lines: - assert l.endswith("7.tif\n") - - # write again and assert there is no new entry because there is already one - run_cli(["index", cleantopo_br.path, "-z", "5", "--txt", "--debug"]) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.txt" in files - with open(os.path.join(mp.config.output.path, "5.txt")) as src: - lines = list(src) - assert len(lines) == 1 - for l in lines: - assert l.endswith("7.tif\n") - - -def test_index_errors(mp_tmpdir, cleantopo_br): - with pytest.raises(ValueError): - run_cli(["index", cleantopo_br.path, "-z", "5", "--debug"]) - - with pytest.raises(SystemExit): - run_cli(["index", "-z", "5", "--debug"]) - - -def test_processes(): - run_cli(["processes"]) - run_cli(["processes", "-n", "mapchete.processes.examples.example_process"]) - - -def test_callback_errors(cleantopo_tl): - run_cli( - ["execute", cleantopo_tl.path, "--zoom", "4,5,7", "--concurrency", "none"], - expected_exit_code=2, - raise_exc=False, - output_contains="zooms can be maximum two items", - ) - run_cli( - ["execute", cleantopo_tl.path, "--zoom", "invalid", "--concurrency", "none"], - expected_exit_code=2, - raise_exc=False, - output_contains="zoom levels must be integer values", - ) - - -def test_cp(mp_tmpdir, cleantopo_br, wkt_geom): - """Using debug output.""" - # generate TileDirectory - run_cli( - [ - "execute", - cleantopo_br.path, - "-z", - "5", - "-b", - "169.19251592399996", - "-90", - "180", - "-80.18582802550002", - "--concurrency", - "none", - ] - ) - out_path = os.path.join(TESTDATA_DIR, cleantopo_br.dict["output"]["path"]) - - # copy tiles and subset by point - run_cli( - [ - "cp", - out_path, - os.path.join(mp_tmpdir, "all"), - "-z", - "5", - "-p", - "170", - "-85", - "--concurrency", - "none", - ] - ) - # copy tiles and subset by bounds - run_cli( - [ - "cp", - out_path, - os.path.join(mp_tmpdir, "all"), - "-z", - "5", - "-b", - "169.19251592399996", - "-90", - "180", - "-80.18582802550002", - "--concurrency", - "none", - ] - ) - # copy all tiles - run_cli( - [ - "cp", - out_path, - os.path.join(mp_tmpdir, "all"), - "-z", - "5", - "--concurrency", - "none", - ] - ) - # copy tiles and subset by area - run_cli( - [ - "cp", - out_path, - os.path.join(mp_tmpdir, "all"), - "-z", - "5", - "--area", - wkt_geom, - "--concurrency", - "none", - ] - ) - # copy local tiles wit using threads - run_cli( - [ - "cp", - out_path, - os.path.join(mp_tmpdir, "all"), - "-z", - "5", - "--concurrency", - "threads", - ] - ) - - -@pytest.mark.integration -def test_cp_http(mp_tmpdir, http_tiledir): - # copy tiles and subset by bounds - run_cli( - [ - "cp", - http_tiledir, - mp_tmpdir / "http", - "-z", - "1", - "-b", - "3.0", - "1.0", - "4.0", - "2.0", - "--concurrency", - "none", - ] - ) - - -def test_rm(cleantopo_br): - run_cli( - [ - "execute", - cleantopo_br.path, - "-z", - "5", - "-b", - "169.19251592399996", - "-90", - "180", - "-80.18582802550002", - "--concurrency", - "none", - ] - ) - out_path = cleantopo_br.dict["output"]["path"] / 5 / 3 / "7.tif" - assert out_path.exists() - run_cli( - [ - "rm", - cleantopo_br.output_path, - "-z", - "5", - "-b", - "169.19251592399996", - "-90", - "180", - "-80.18582802550002", - "-f", - ] - ) - assert not out_path.exists() - - -def test_fs_opt_extractor(): - kwargs = options._cb_key_val( - None, - None, - [ - "str=bar", - "int=2", - "float=1.5", - "bool1=true", - "bool2=FALSE", - "bool3=yes", - "bool4=no", - "none=none", - "none2=null", - ], - ) - assert isinstance(kwargs, dict) - assert kwargs["str"] == "bar" - assert kwargs["int"] == 2 - assert kwargs["float"] == 1.5 - assert kwargs["bool1"] is True - assert kwargs["bool2"] is False - assert kwargs["bool3"] is True - assert kwargs["bool4"] is False - assert kwargs["none"] is None - assert kwargs["none2"] is None - - -def test_stac_mapchete_file(cleantopo_br): - run_cli(["execute", cleantopo_br.path]) - run_cli(["stac", "create-item", cleantopo_br.path, "-z", "5", "--force"]) - - -@pytest.mark.integration -def test_stac_tiledir(http_tiledir, mp_tmpdir): - run_cli( - [ - "stac", - "create-item", - http_tiledir, - "-z", - "5", - "--force", - "--item-path", - f"{mp_tmpdir}/stac_example.json", - ] - ) - - -@pytest.mark.skipif( - version.parse(rasterio.__gdal_version__) < version.parse("3.3.0"), - reason="required STACTA driver is only available in GDAL>=3.3.0", -) -def test_stac_prototype_files(cleantopo_br): - run_cli(["execute", cleantopo_br.path]) - run_cli(["stac", "create-prototype-files", cleantopo_br.path]) - rasterio_open(cleantopo_br.mp().config.output.stac_path) From e25a8bc03d651cf7de3fbee38e75797110aadf14 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 14 May 2024 17:22:34 +0200 Subject: [PATCH 26/28] mapchete.commands: collect all tests in test submodule --- mapchete/commands/_convert.py | 18 +- test/commands/__init__.py | 9 + test/commands/test_convert.py | 368 +++++++++++++ test/commands/test_cp.py | 66 +++ test/commands/test_execute.py | 212 ++++++++ test/commands/test_index.py | 246 +++++++++ test/commands/test_observer.py | 0 test/commands/test_parser.py | 0 test/commands/test_rm.py | 46 ++ test/test_commands.py | 925 --------------------------------- 10 files changed, 958 insertions(+), 932 deletions(-) create mode 100644 test/commands/__init__.py create mode 100644 test/commands/test_convert.py create mode 100644 test/commands/test_cp.py create mode 100644 test/commands/test_execute.py create mode 100644 test/commands/test_index.py create mode 100644 test/commands/test_observer.py create mode 100644 test/commands/test_parser.py create mode 100644 test/commands/test_rm.py delete mode 100644 test/test_commands.py diff --git a/mapchete/commands/_convert.py b/mapchete/commands/_convert.py index eaabc1a3..c433651e 100644 --- a/mapchete/commands/_convert.py +++ b/mapchete/commands/_convert.py @@ -43,7 +43,7 @@ def convert( dask_settings: DaskSettings = DaskSettings(), workers: Optional[int] = None, clip_geometry: Optional[str] = None, - bidx: Optional[List[int]] = None, + bidx: Optional[Union[List[int], int]] = None, output_pyramid: Optional[Union[str, dict, MPathLike]] = None, output_metatiling: Optional[int] = None, output_format: Optional[str] = None, @@ -121,9 +121,11 @@ def convert( ), ) if output_pyramid - else input_info.output_pyramid.to_dict() - if input_info.output_pyramid - else None + else ( + input_info.output_pyramid.to_dict() + if input_info.output_pyramid + else None + ) ), output=dict( { @@ -139,9 +141,11 @@ def convert( ), dtype=output_dtype or input_info.output_params.get("dtype"), **creation_options, - **dict(overviews=True, overviews_resampling=overviews_resampling_method) - if overviews - else dict(), + **( + dict(overviews=True, overviews_resampling=overviews_resampling_method) + if overviews + else dict() + ), ), config_dir=os.getcwd(), zoom_levels=zoom or input_info.zoom_levels, diff --git a/test/commands/__init__.py b/test/commands/__init__.py new file mode 100644 index 00000000..c5ebac17 --- /dev/null +++ b/test/commands/__init__.py @@ -0,0 +1,9 @@ +from mapchete.protocols import ObserverProtocol + + +class TaskCounter(ObserverProtocol): + tasks = 0 + + def update(self, *args, progress=None, **kwargs): + if progress: + self.tasks = progress.current diff --git a/test/commands/test_convert.py b/test/commands/test_convert.py new file mode 100644 index 00000000..e2922bf3 --- /dev/null +++ b/test/commands/test_convert.py @@ -0,0 +1,368 @@ +import warnings +from test.commands import TaskCounter + +import geobuf +import pytest +from rasterio.enums import Resampling +from rio_cogeo import cog_validate +from shapely.geometry import shape + +import mapchete +from mapchete.commands import convert, execute +from mapchete.enums import Concurrency +from mapchete.io.raster.open import rasterio_open +from mapchete.io.vector import fiona_open +from mapchete.tile import BufferedTilePyramid + + +def test_convert_geodetic(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="geodetic") + for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_mercator(cleantopo_br_tif, mp_tmpdir): + """Automatic mercator tile pyramid creation of raster files.""" + convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator") + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_custom_grid(s2_band, mp_tmpdir, custom_grid_json): + """Automatic mercator tile pyramid creation of raster files.""" + convert(s2_band, mp_tmpdir, output_pyramid=custom_grid_json) + for zoom, row, col in [(0, 5298, 631)]: + out_file = mp_tmpdir / zoom / row / col + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_png(cleantopo_br_tif, mp_tmpdir): + """Automatic PNG tile pyramid creation of raster files.""" + convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", output_format="PNG") + + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".png" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "PNG" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_bidx(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out_bidx.tif" + convert(cleantopo_br_tif, single_gtiff, output_pyramid="geodetic", zoom=3, bidx=1) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + assert not src.overviews(1) + + +def test_convert_single_gtiff(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out.tif" + convert(cleantopo_br_tif, single_gtiff, output_pyramid="geodetic", zoom=3) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + assert not src.overviews(1) + + +def test_convert_single_gtiff_cog(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out_cog.tif" + convert(cleantopo_br_tif, single_gtiff, output_pyramid="geodetic", zoom=5, cog=True) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert not data.mask.all() + assert cog_validate(single_gtiff, strict=True)[0] + + +def test_convert_single_gtiff_cog_dask(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out_cog.tif" + convert( + cleantopo_br_tif, + single_gtiff, + output_pyramid="geodetic", + zoom=5, + cog=True, + concurrency=Concurrency.dask, + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert not data.mask.all() + assert cog_validate(single_gtiff, strict=True)[0] + + +def test_convert_single_gtiff_overviews(cleantopo_br_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out.tif" + convert( + cleantopo_br_tif, + single_gtiff, + output_pyramid="geodetic", + zoom=7, + overviews=True, + overviews_resampling_method="bilinear", + concurrency=Concurrency.none, + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + assert src.overviews(1) + + +@pytest.mark.integration +def test_convert_remote_single_gtiff(http_raster, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + single_gtiff = mp_tmpdir / "single_out.tif" + convert( + http_raster, + single_gtiff, + output_pyramid="geodetic", + zoom=1, + concurrency=Concurrency.none, + ) + with rasterio_open(single_gtiff, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.any() + + +def test_convert_dtype(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation using dtype scale.""" + convert( + cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", output_dtype="uint8" + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_scale_ratio(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation cropping data.""" + convert( + cleantopo_br_tif, + mp_tmpdir, + output_pyramid="mercator", + output_dtype="uint8", + scale_ratio=0.003, + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + assert not data.mask.all() + + +def test_convert_scale_offset(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation cropping data.""" + convert( + cleantopo_br_tif, + mp_tmpdir, + output_pyramid="mercator", + output_dtype="uint8", + scale_offset=1, + ) + for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint8" + data = src.read(masked=True) + assert data.mask.any() + assert not data.mask.all() + + +def test_convert_clip(cleantopo_br_tif, mp_tmpdir, landpoly): + """Automatic tile pyramid creation cropping data.""" + task_counter = TaskCounter() + convert( + cleantopo_br_tif, + mp_tmpdir, + output_pyramid="geodetic", + clip_geometry=landpoly, + observers=[task_counter], + ) + assert task_counter.tasks == 0 + + +def test_convert_zoom(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation using a specific zoom.""" + convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", zoom=3) + for zoom, row, col in [(4, 15, 15), (2, 3, 0)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + assert not out_file.exists() + + +def test_convert_zoom_minmax(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation using min max zoom.""" + convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", zoom=[3, 4]) + for zoom, row, col in [(2, 3, 0)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + assert not out_file.exists() + + +def test_convert_zoom_maxmin(cleantopo_br_tif, mp_tmpdir): + """Automatic tile pyramid creation using max min zoom.""" + convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", zoom=[4, 3]) + for zoom, row, col in [(2, 3, 0)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + assert not out_file.exists() + + +def test_convert_mapchete(cleantopo_br, mp_tmpdir): + # prepare data + execute(cleantopo_br.path, zoom=[1, 3]) + + convert( + cleantopo_br.path, + mp_tmpdir, + output_pyramid="geodetic", + output_metatiling=1, + zoom=[1, 3], + ) + for zoom, row, col in [(3, 7, 15), (2, 3, 7), (1, 1, 3)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_tiledir(cleantopo_br, mp_tmpdir): + bounds = BufferedTilePyramid("geodetic").tile(4, 15, 31).bounds + # prepare data + with mapchete.open(cleantopo_br.dict) as mp: + list(mp.execute(zoom=[1, 4])) + convert( + cleantopo_br.dict["config_dir"] / cleantopo_br.dict["output"]["path"], + mp_tmpdir, + output_pyramid="geodetic", + output_metatiling=1, + zoom=[1, 4], + bounds=bounds, + ) + for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".tif" + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_gcps(gcps_tif, mp_tmpdir): + """Automatic geodetic tile pyramid creation of raster files.""" + out_file = mp_tmpdir / "gcps_out.tif" + convert(gcps_tif, out_file, output_pyramid="geodetic", zoom=8) + with rasterio_open(out_file, "r") as src: + assert src.meta["driver"] == "GTiff" + assert src.meta["dtype"] == "uint16" + data = src.read(masked=True) + assert data.mask.any() + + +def test_convert_geojson(landpoly, mp_tmpdir): + convert(landpoly, mp_tmpdir, output_pyramid="geodetic", zoom=4) + for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, 32]): + out_file = mp_tmpdir / str(zoom) / str(row) / str(col) + ".geojson" + with fiona_open(out_file, "r") as src: + assert len(src) == control + for f in src: + assert shape(f["geometry"]).is_valid + + +def test_convert_geobuf(landpoly, mp_tmpdir): + # convert to geobuf + geobuf_outdir = mp_tmpdir / "geobuf" + convert( + landpoly, + geobuf_outdir, + output_pyramid="geodetic", + zoom=4, + output_format="Geobuf", + ) + for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, 32]): + out_file = geobuf_outdir / str(zoom) / str(row) / str(col) + ".pbf" + with open(out_file, "rb") as src: + features = geobuf.decode(src.read())["features"] + assert len(features) == control + for f in features: + assert f["geometry"]["type"] == "Polygon" + assert shape(f["geometry"]).area + + +def test_convert_errors(s2_band_jp2, mp_tmpdir, s2_band, cleantopo_br, landpoly): + # output format required + with pytest.raises(ValueError): + convert(s2_band_jp2, mp_tmpdir, output_pyramid="geodetic") + + # output pyramid reqired + with pytest.raises(ValueError): + convert(s2_band, mp_tmpdir) + + # prepare data for tiledir input + with mapchete.open(cleantopo_br.dict) as mp: + list(mp.execute(zoom=[1, 4])) + tiledir_path = cleantopo_br.dict["config_dir"] / cleantopo_br.dict["output"]["path"] + + # zoom level required + with pytest.raises(ValueError): + convert(tiledir_path, mp_tmpdir, output_pyramid="geodetic") + + # incompatible formats + with pytest.raises(ValueError): + convert( + tiledir_path, + mp_tmpdir, + output_pyramid="geodetic", + zoom=5, + output_format="GeoJSON", + ) + + # unsupported output format extension + with pytest.raises(ValueError): + convert(s2_band_jp2, "output.jp2", output_pyramid="geodetic", zoom=5) + + # malformed band index + with pytest.raises(ValueError): + convert(s2_band_jp2, "output.tif", bidx="invalid") diff --git a/test/commands/test_cp.py b/test/commands/test_cp.py new file mode 100644 index 00000000..1eb1a273 --- /dev/null +++ b/test/commands/test_cp.py @@ -0,0 +1,66 @@ +from test.commands import TaskCounter + +import pytest + +import mapchete +from mapchete.commands import cp + + +def test_cp(mp_tmpdir, cleantopo_br, wkt_geom, testdata_dir): + # generate TileDirectory + with mapchete.open( + cleantopo_br.dict, bounds=[169.19251592399996, -90, 180, -80.18582802550002] + ) as mp: + list(mp.execute(zoom=5)) + out_path = testdata_dir / cleantopo_br.dict["output"]["path"] + # copy tiles and subset by bounds + task_counter = TaskCounter() + cp( + out_path, + mp_tmpdir / "bounds", + zoom=5, + bounds=[169.19251592399996, -90, 180, -80.18582802550002], + observers=[task_counter], + ) + assert task_counter.tasks + + # copy all tiles + task_counter = TaskCounter() + cp(out_path, mp_tmpdir / "all", zoom=5, observers=[task_counter]) + assert task_counter.tasks + + # copy tiles and subset by area + task_counter = TaskCounter() + cp( + out_path, + mp_tmpdir / "area", + zoom=5, + area=wkt_geom, + observers=[task_counter], + ) + assert task_counter.tasks + + # copy local tiles without using threads + task_counter = TaskCounter() + cp( + out_path, + mp_tmpdir / "nothreads", + zoom=5, + workers=1, + observers=[task_counter], + ) + assert task_counter.tasks + + +@pytest.mark.integration +def test_cp_http(mp_tmpdir, http_tiledir): + # copy tiles and subset by bounds + task_counter = TaskCounter() + cp( + http_tiledir, + mp_tmpdir / "http", + zoom=1, + bounds=[3, 1, 4, 2], + observers=[task_counter], + ) + assert task_counter.tasks diff --git a/test/commands/test_execute.py b/test/commands/test_execute.py new file mode 100644 index 00000000..0fcf7314 --- /dev/null +++ b/test/commands/test_execute.py @@ -0,0 +1,212 @@ +from test.commands import TaskCounter + +import pytest +from shapely.geometry import box +from tilematrix import TilePyramid + +import mapchete +from mapchete.commands import convert, cp, execute, index, rm +from mapchete.config import DaskSettings +from mapchete.enums import Concurrency, Status +from mapchete.errors import JobCancelledError +from mapchete.io import fiona_open, rasterio_open +from mapchete.processing.types import TaskInfo +from mapchete.protocols import ObserverProtocol + + +@pytest.mark.parametrize( + "concurrency,process_graph", + [ + ("threads", None), + ("dask", True), + ("dask", False), + ("processes", None), + (None, None), + ], +) +def test_execute( + cleantopo_br_metatiling_1, cleantopo_br_tif, concurrency, process_graph +): + execute_kwargs = dict(concurrency=concurrency) + if concurrency == "dask": + execute_kwargs.update(dask_settings=DaskSettings(process_graph=process_graph)) + + zoom = 5 + tp = TilePyramid("geodetic") + with rasterio_open(cleantopo_br_tif) as src: + tiles = list(tp.tiles_from_bounds(src.bounds, zoom)) + execute(cleantopo_br_metatiling_1.dict, zoom=zoom, **execute_kwargs) + mp = cleantopo_br_metatiling_1.mp() + for t in tiles: + with rasterio_open(mp.config.output.get_path(t)) as src: + assert not src.read(masked=True).mask.all() + + +def test_execute_retry(example_mapchete): + zoom = 10 + retries = 2 + + class ExceptionRaiser: + """Makes the job fail during progress.""" + + def update(*args, progress=None, **kwargs): + if progress and progress.current > 2: + raise RuntimeError("This job just raised an exception!") + + class RetryCounter: + """Count retry attempts.""" + + retries = 0 + + def update(self, *args, status=None, **kwargs): + if status and status == Status.retrying: + self.retries += 1 + + exception_raiser = ExceptionRaiser() + retry_counter = RetryCounter() + + # this job should fail + with pytest.raises(RuntimeError): + execute( + example_mapchete.dict, + zoom=zoom, + retries=retries, + observers=[exception_raiser, retry_counter], + concurrency=Concurrency.none, + ) + + # make sure job has been retried + assert retry_counter.retries == retries + + +def test_execute_cancel(cleantopo_br_metatiling_1): + zoom = 5 + + class CancelObserver: + """Cancels job when running.""" + + def update(*args, progress=None, **kwargs): + if progress and progress.current > 0: + raise JobCancelledError + + class StatusObserver: + """Observes job state.""" + + status = None + + def update(self, *args, status=None, **kwargs): + if status: + self.status = status + + state_observer = StatusObserver() + execute( + cleantopo_br_metatiling_1.dict, + zoom=zoom, + observers=[CancelObserver(), state_observer], + concurrency=Concurrency.none, + ) + assert state_observer.status == Status.cancelled + + +def test_execute_tile(mp_tmpdir, cleantopo_br_metatiling_1): + tile = (5, 30, 63) + + task_counter = TaskCounter() + execute(cleantopo_br_metatiling_1.dict, tile=tile, observers=[task_counter]) + + assert task_counter.tasks == 1 + + mp = cleantopo_br_metatiling_1.mp() + with rasterio_open( + mp.config.output.get_path(mp.config.output_pyramid.tile(*tile)) + ) as src: + assert not src.read(masked=True).mask.all() + + +def test_execute_point(mp_tmpdir, example_mapchete, dummy2_tif): + """Using bounds from WKT.""" + with rasterio_open(dummy2_tif) as src: + g = box(*src.bounds) + + task_counter = TaskCounter() + execute( + example_mapchete.dict, + point=[g.centroid.x, g.centroid.y], + zoom=10, + observers=[task_counter], + ) + assert task_counter.tasks == 1 + + +@pytest.mark.parametrize( + "concurrency,process_graph", + [ + ("threads", None), + ("dask", True), + ("dask", False), + ("processes", None), + (None, None), + ], +) +def test_execute_preprocessing_tasks( + concurrency, preprocess_cache_raster_vector, process_graph +): + execute_kwargs = dict(concurrency=concurrency) + if concurrency == "dask": + execute_kwargs.update(dask_settings=DaskSettings(process_graph=process_graph)) + + task_counter = TaskCounter() + execute( + preprocess_cache_raster_vector.path, observers=[task_counter], **execute_kwargs + ) + assert task_counter.tasks + + +@pytest.mark.parametrize( + "concurrency,process_graph", + [ + # ("threads", False), # profiling does not work with threads + ("dask", False), + ("dask", True), + ("processes", False), + (None, False), + ], +) +def test_execute_profiling(cleantopo_br_metatiling_1, concurrency, process_graph): + execute_kwargs = dict(concurrency=concurrency) + if concurrency == "dask": + execute_kwargs.update(dask_settings=DaskSettings(process_graph=process_graph)) + + zoom = 5 + + class TaskResultObserver(ObserverProtocol): + def update(self, *args, task_result=None, **kwargs): + if task_result: + assert isinstance(task_result, TaskInfo) + assert task_result.profiling + for profiler in ["time", "memory"]: + assert profiler in task_result.profiling + + assert task_result.profiling["time"].elapsed > 0 + + assert task_result.profiling["memory"].max_allocated > 0 + assert task_result.profiling["memory"].total_allocated > 0 + assert task_result.profiling["memory"].allocations > 0 + + execute( + cleantopo_br_metatiling_1.dict, + zoom=zoom, + profiling=True, + observers=[TaskResultObserver()], + **execute_kwargs + ) + + +def test_convert_empty_gpkg(empty_gpkg, mp_tmpdir): + convert( + empty_gpkg, + mp_tmpdir, + output_pyramid="geodetic", + zoom=5, + output_format="GeoJSON", + ) diff --git a/test/commands/test_index.py b/test/commands/test_index.py new file mode 100644 index 00000000..ce73ffdc --- /dev/null +++ b/test/commands/test_index.py @@ -0,0 +1,246 @@ +import pytest + +import mapchete +from mapchete.commands import execute, index +from mapchete.io.vector import fiona_open + + +def test_index_geojson(cleantopo_br): + # execute process at zoom 3 + execute(cleantopo_br.dict, zoom=3) + + # generate index for zoom 3 + index(cleantopo_br.dict, zoom=3, geojson=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert len(files) == 4 + assert "3.geojson" in files + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_index_geojson_fieldname(cleantopo_br): + # execute process at zoom 3 + execute(cleantopo_br.dict, zoom=3) + + # index and rename "location" to "new_fieldname" + index( + cleantopo_br.dict, + zoom=3, + geojson=True, + fieldname="new_fieldname", + ) + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "3.geojson" in files + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert "new_fieldname" in f["properties"] + assert len(list(src)) == 1 + + +def test_index_geojson_basepath(cleantopo_br): + # execute process at zoom 3 + execute(cleantopo_br.dict, zoom=3) + + basepath = "http://localhost" + # index and rename "location" to "new_fieldname" + index(cleantopo_br.dict, zoom=3, geojson=True, basepath=basepath) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "3.geojson" in files + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert f["properties"]["location"].startswith(basepath) + assert len(list(src)) == 1 + + +def test_index_geojson_for_gdal(cleantopo_br): + # execute process at zoom 3 + execute(cleantopo_br.dict, zoom=3) + + basepath = "http://localhost" + # index and rename "location" to "new_fieldname" + index(cleantopo_br.dict, zoom=3, geojson=True, basepath=basepath, for_gdal=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "3.geojson" in files + with fiona_open(mp.config.output.path / "3.geojson") as src: + for f in src: + assert f["properties"]["location"].startswith("/vsicurl/" + basepath) + assert len(list(src)) == 1 + + +def test_index_geojson_tile(cleantopo_tl): + # execute process at zoom 3 + execute(cleantopo_tl.dict, zoom=3) + + # generate index + index(cleantopo_tl.dict, tile=(3, 0, 0), geojson=True) + + with mapchete.open(cleantopo_tl.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert len(files) == 4 + assert "3.geojson" in files + with fiona_open(mp.config.output.path / "3.geojson") as src: + assert len(list(src)) == 1 + + +def test_index_geojson_wkt_area(cleantopo_tl, wkt_geom_tl): + # execute process at zoom 3 + execute(cleantopo_tl.dict, area=wkt_geom_tl) + + # generate index for zoom 3 + index(cleantopo_tl.dict, geojson=True, area=wkt_geom_tl) + + with mapchete.open(cleantopo_tl.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert len(files) == 14 + assert "3.geojson" in files + + +def test_index_gpkg(cleantopo_br): + # execute process + execute(cleantopo_br.dict, zoom=5) + + # generate index + index(cleantopo_br.dict, zoom=5, gpkg=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.gpkg" in files + with fiona_open(mp.config.output.path / "5.gpkg") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + # write again and assert there is no new entry because there is already one + index(cleantopo_br.dict, zoom=5, gpkg=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.gpkg" in files + with fiona_open(mp.config.output.path / "5.gpkg") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_index_shp(cleantopo_br): + # execute process + execute(cleantopo_br.dict, zoom=5) + + # generate index + index(cleantopo_br.dict, zoom=5, shp=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.shp" in files + with fiona_open(mp.config.output.path / "5.shp") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + # write again and assert there is no new entry because there is already one + index(cleantopo_br.dict, zoom=5, shp=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.shp" in files + with fiona_open(mp.config.output.path / "5.shp") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_index_fgb(cleantopo_br): + # execute process + execute(cleantopo_br.dict, zoom=5) + + # generate index + index(cleantopo_br.dict, zoom=5, fgb=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.fgb" in files + with fiona_open(mp.config.output.path / "5.fgb") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + # write again and assert there is no new entry because there is already one + index(cleantopo_br.dict, zoom=5, fgb=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.fgb" in files + with fiona_open(mp.config.output.path / "5.fgb") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_index_text(cleantopo_br): + # execute process + execute(cleantopo_br.dict, zoom=5) + + # generate index + index(cleantopo_br.dict, zoom=5, txt=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.txt" in files + with open(mp.config.output.path / "5.txt") as src: + lines = list(src) + assert len(lines) == 1 + for l in lines: + assert l.endswith("7.tif\n") + + # write again and assert there is no new entry because there is already one + index(cleantopo_br.dict, zoom=5, txt=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.txt" in files + with open(mp.config.output.path / "5.txt") as src: + lines = list(src) + assert len(lines) == 1 + for l in lines: + assert l.endswith("7.tif\n") + + +def test_index_tiledir(cleantopo_br): + # execute process + execute(cleantopo_br.dict, zoom=5) + + # generate index + index(cleantopo_br.output_path, zoom=5, gpkg=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.gpkg" in files + with fiona_open(mp.config.output.path / "5.gpkg") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + # write again and assert there is no new entry because there is already one + index(cleantopo_br.dict, zoom=5, txt=True) + + with mapchete.open(cleantopo_br.dict) as mp: + files = mp.config.output.path.ls(absolute_paths=False) + assert "5.txt" in files + with fiona_open(mp.config.output.path / "5.gpkg") as src: + for f in src: + assert "location" in f["properties"] + assert len(list(src)) == 1 + + +def test_index_errors(cleantopo_br): + with pytest.raises(ValueError): + index(cleantopo_br.dict, zoom=5) diff --git a/test/commands/test_observer.py b/test/commands/test_observer.py new file mode 100644 index 00000000..e69de29b diff --git a/test/commands/test_parser.py b/test/commands/test_parser.py new file mode 100644 index 00000000..e69de29b diff --git a/test/commands/test_rm.py b/test/commands/test_rm.py new file mode 100644 index 00000000..493f6578 --- /dev/null +++ b/test/commands/test_rm.py @@ -0,0 +1,46 @@ +from test.commands import TaskCounter + +import pytest + +import mapchete +from mapchete.commands import rm + + +def test_rm(cleantopo_br, testdata_dir): + # generate TileDirectory + with mapchete.open( + cleantopo_br.dict, bounds=[169.19251592399996, -90, 180, -80.18582802550002] + ) as mp: + list(mp.execute(zoom=5)) + out_path = testdata_dir / cleantopo_br.dict["output"]["path"] + + # remove tiles + task_counter = TaskCounter() + rm(out_path, zoom=5, observers=[task_counter]) + assert task_counter.tasks + + # remove tiles but this time they should already have been removed + task_counter = TaskCounter() + rm(out_path, zoom=5, observers=[task_counter]) + assert task_counter.tasks == 0 + + +def test_rm_path_list(mp_tmpdir): + out_path = mp_tmpdir / "some_file.txt" + with out_path.open("w") as dst: + dst.write("foo") + + assert out_path.exists() + rm(paths=[out_path]) + assert not out_path.exists() + + +@pytest.mark.integration +def test_rm_path_list_s3(s3_testdata_dir): + out_path = s3_testdata_dir / "some_file.txt" + with out_path.open("w") as dst: + dst.write("foo") + + assert out_path.exists() + rm(paths=[out_path]) + assert not out_path.exists() diff --git a/test/test_commands.py b/test/test_commands.py deleted file mode 100644 index d38a6244..00000000 --- a/test/test_commands.py +++ /dev/null @@ -1,925 +0,0 @@ -import os -import warnings - -import geobuf -import pytest -from rio_cogeo.cogeo import cog_validate -from shapely.geometry import box, shape -from tilematrix import TilePyramid - -import mapchete -from mapchete.commands import convert, cp, execute, index, rm -from mapchete.config import DaskSettings -from mapchete.enums import Status -from mapchete.errors import JobCancelledError -from mapchete.io import fiona_open, rasterio_open -from mapchete.processing.types import TaskInfo -from mapchete.protocols import ObserverProtocol -from mapchete.tile import BufferedTilePyramid - -SCRIPTDIR = os.path.dirname(os.path.realpath(__file__)) -TESTDATA_DIR = os.path.join(SCRIPTDIR, "testdata") - - -class TaskCounter(ObserverProtocol): - tasks = 0 - - def update(self, *args, progress=None, **kwargs): - if progress: - self.tasks = progress.current - - -def test_cp(mp_tmpdir, cleantopo_br, wkt_geom): - # generate TileDirectory - with mapchete.open( - cleantopo_br.dict, bounds=[169.19251592399996, -90, 180, -80.18582802550002] - ) as mp: - list(mp.execute(zoom=5)) - out_path = os.path.join(TESTDATA_DIR, cleantopo_br.dict["output"]["path"]) - # copy tiles and subset by bounds - task_counter = TaskCounter() - cp( - out_path, - os.path.join(mp_tmpdir, "bounds"), - zoom=5, - bounds=[169.19251592399996, -90, 180, -80.18582802550002], - observers=[task_counter], - ) - assert task_counter.tasks - - # copy all tiles - task_counter = TaskCounter() - cp(out_path, os.path.join(mp_tmpdir, "all"), zoom=5, observers=[task_counter]) - assert task_counter.tasks - - # copy tiles and subset by area - task_counter = TaskCounter() - cp( - out_path, - os.path.join(mp_tmpdir, "area"), - zoom=5, - area=wkt_geom, - observers=[task_counter], - ) - assert task_counter.tasks - - # copy local tiles without using threads - task_counter = TaskCounter() - cp( - out_path, - os.path.join(mp_tmpdir, "nothreads"), - zoom=5, - workers=1, - observers=[task_counter], - ) - assert task_counter.tasks - - -@pytest.mark.integration -def test_cp_http(mp_tmpdir, http_tiledir): - # copy tiles and subset by bounds - task_counter = TaskCounter() - cp( - http_tiledir, - os.path.join(mp_tmpdir, "http"), - zoom=1, - bounds=[3, 1, 4, 2], - observers=[task_counter], - ) - assert task_counter.tasks - - -def test_rm(cleantopo_br): - # generate TileDirectory - with mapchete.open( - cleantopo_br.dict, bounds=[169.19251592399996, -90, 180, -80.18582802550002] - ) as mp: - list(mp.execute(zoom=5)) - out_path = os.path.join(TESTDATA_DIR, cleantopo_br.dict["output"]["path"]) - - # remove tiles - task_counter = TaskCounter() - rm(out_path, zoom=5, observers=[task_counter]) - assert task_counter.tasks - - # remove tiles but this time they should already have been removed - task_counter = TaskCounter() - rm(out_path, zoom=5, observers=[task_counter]) - assert task_counter.tasks == 0 - - -def test_rm_path_list(mp_tmpdir): - out_path = mp_tmpdir / "some_file.txt" - with out_path.open("w") as dst: - dst.write("foo") - - assert out_path.exists() - rm(paths=[out_path]) - assert not out_path.exists() - - -@pytest.mark.integration -def test_rm_path_list_s3(s3_testdata_dir): - out_path = s3_testdata_dir / "some_file.txt" - with out_path.open("w") as dst: - dst.write("foo") - - assert out_path.exists() - rm(paths=[out_path]) - assert not out_path.exists() - - -@pytest.mark.parametrize( - "concurrency,process_graph", - [ - ("threads", None), - ("dask", True), - ("dask", False), - ("processes", None), - (None, None), - ], -) -def test_execute( - cleantopo_br_metatiling_1, cleantopo_br_tif, concurrency, process_graph -): - execute_kwargs = dict(concurrency=concurrency) - if concurrency == "dask": - execute_kwargs.update(dask_settings=DaskSettings(process_graph=process_graph)) - - zoom = 5 - tp = TilePyramid("geodetic") - with rasterio_open(cleantopo_br_tif) as src: - tiles = list(tp.tiles_from_bounds(src.bounds, zoom)) - execute(cleantopo_br_metatiling_1.dict, zoom=zoom, **execute_kwargs) - mp = cleantopo_br_metatiling_1.mp() - for t in tiles: - with rasterio_open(mp.config.output.get_path(t)) as src: - assert not src.read(masked=True).mask.all() - - -def test_execute_retry(example_mapchete): - zoom = 10 - retries = 2 - - class ExceptionRaiser: - """Makes the job fail during progress.""" - - def update(*args, progress=None, **kwargs): - if progress and progress.current > 2: - raise RuntimeError("This job just raised an exception!") - - class RetryCounter: - """Count retry attempts.""" - - retries = 0 - - def update(self, *args, status=None, **kwargs): - if status and status == Status.retrying: - self.retries += 1 - - exception_raiser = ExceptionRaiser() - retry_counter = RetryCounter() - - # this job should fail - with pytest.raises(RuntimeError): - execute( - example_mapchete.dict, - zoom=zoom, - retries=retries, - observers=[exception_raiser, retry_counter], - concurrency=None, - ) - - # make sure job has been retried - assert retry_counter.retries == retries - - -def test_execute_cancel(cleantopo_br_metatiling_1): - zoom = 5 - - class CancelObserver: - """Cancels job when running.""" - - def update(*args, progress=None, **kwargs): - if progress and progress.current > 0: - raise JobCancelledError - - class StatusObserver: - """Observes job state.""" - - status = None - - def update(self, *args, status=None, **kwargs): - if status: - self.status = status - - state_observer = StatusObserver() - execute( - cleantopo_br_metatiling_1.dict, - zoom=zoom, - observers=[CancelObserver(), state_observer], - concurrency=None, - ) - assert state_observer.status == Status.cancelled - - -def test_execute_tile(mp_tmpdir, cleantopo_br_metatiling_1): - tile = (5, 30, 63) - - task_counter = TaskCounter() - execute(cleantopo_br_metatiling_1.dict, tile=tile, observers=[task_counter]) - - assert task_counter.tasks == 1 - - mp = cleantopo_br_metatiling_1.mp() - with rasterio_open( - mp.config.output.get_path(mp.config.output_pyramid.tile(*tile)) - ) as src: - assert not src.read(masked=True).mask.all() - - -def test_execute_point(mp_tmpdir, example_mapchete, dummy2_tif): - """Using bounds from WKT.""" - with rasterio_open(dummy2_tif) as src: - g = box(*src.bounds) - - task_counter = TaskCounter() - execute( - example_mapchete.dict, - point=[g.centroid.x, g.centroid.y], - zoom=10, - observers=[task_counter], - ) - assert task_counter.tasks == 1 - - -@pytest.mark.parametrize( - "concurrency,process_graph", - [ - ("threads", None), - ("dask", True), - ("dask", False), - ("processes", None), - (None, None), - ], -) -def test_execute_preprocessing_tasks( - concurrency, preprocess_cache_raster_vector, process_graph -): - execute_kwargs = dict(concurrency=concurrency) - if concurrency == "dask": - execute_kwargs.update(dask_settings=DaskSettings(process_graph=process_graph)) - - task_counter = TaskCounter() - execute( - preprocess_cache_raster_vector.path, observers=[task_counter], **execute_kwargs - ) - assert task_counter.tasks - - -@pytest.mark.parametrize( - "concurrency,process_graph", - [ - # ("threads", False), # profiling does not work with threads - ("dask", False), - ("dask", True), - ("processes", False), - (None, False), - ], -) -def test_execute_profiling(cleantopo_br_metatiling_1, concurrency, process_graph): - execute_kwargs = dict(concurrency=concurrency) - if concurrency == "dask": - execute_kwargs.update(dask_settings=DaskSettings(process_graph=process_graph)) - - zoom = 5 - - class TaskResultObserver(ObserverProtocol): - def update(self, *args, task_result=None, **kwargs): - if task_result: - assert isinstance(task_result, TaskInfo) - assert task_result.profiling - for profiler in ["time", "memory"]: - assert profiler in task_result.profiling - - assert task_result.profiling["time"].elapsed > 0 - - assert task_result.profiling["memory"].max_allocated > 0 - assert task_result.profiling["memory"].total_allocated > 0 - assert task_result.profiling["memory"].allocations > 0 - - execute( - cleantopo_br_metatiling_1.dict, - zoom=zoom, - profiling=True, - observers=[TaskResultObserver()], - **execute_kwargs - ) - - -def test_convert_geodetic(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="geodetic") - for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_mercator(cleantopo_br_tif, mp_tmpdir): - """Automatic mercator tile pyramid creation of raster files.""" - convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator") - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_custom_grid(s2_band, mp_tmpdir, custom_grid_json): - """Automatic mercator tile pyramid creation of raster files.""" - convert(s2_band, mp_tmpdir, output_pyramid=custom_grid_json) - for zoom, row, col in [(0, 5298, 631)]: - out_file = mp_tmpdir / zoom / row / col + ".tif" - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_png(cleantopo_br_tif, mp_tmpdir): - """Automatic PNG tile pyramid creation of raster files.""" - convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", output_format="PNG") - - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".png"]) - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "PNG" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_bidx(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = os.path.join(mp_tmpdir, "single_out_bidx.tif") - convert(cleantopo_br_tif, single_gtiff, output_pyramid="geodetic", zoom=3, bidx=1) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - assert not src.overviews(1) - - -def test_convert_single_gtiff(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = os.path.join(mp_tmpdir, "single_out.tif") - convert(cleantopo_br_tif, single_gtiff, output_pyramid="geodetic", zoom=3) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - assert not src.overviews(1) - - -def test_convert_single_gtiff_cog(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = os.path.join(mp_tmpdir, "single_out_cog.tif") - convert(cleantopo_br_tif, single_gtiff, output_pyramid="geodetic", zoom=5, cog=True) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert not data.mask.all() - assert cog_validate(single_gtiff, strict=True) - - -def test_convert_single_gtiff_cog_dask(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = os.path.join(mp_tmpdir, "single_out_cog.tif") - convert( - cleantopo_br_tif, - single_gtiff, - output_pyramid="geodetic", - zoom=5, - cog=True, - concurrency="dask", - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert not data.mask.all() - assert cog_validate(single_gtiff, strict=True) - - -def test_convert_single_gtiff_overviews(cleantopo_br_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = os.path.join(mp_tmpdir, "single_out.tif") - convert( - cleantopo_br_tif, - single_gtiff, - output_pyramid="geodetic", - zoom=7, - overviews=True, - overviews_resampling_method="bilinear", - concurrency=None, - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - assert src.overviews(1) - - -@pytest.mark.integration -def test_convert_remote_single_gtiff(http_raster, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - single_gtiff = os.path.join(mp_tmpdir, "single_out.tif") - convert( - http_raster, single_gtiff, output_pyramid="geodetic", zoom=1, concurrency=None - ) - with rasterio_open(single_gtiff, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.any() - - -def test_convert_dtype(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation using dtype scale.""" - convert( - cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", output_dtype="uint8" - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_scale_ratio(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation cropping data.""" - convert( - cleantopo_br_tif, - mp_tmpdir, - output_pyramid="mercator", - output_dtype="uint8", - scale_ratio=0.003, - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - assert not data.mask.all() - - -def test_convert_scale_offset(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation cropping data.""" - convert( - cleantopo_br_tif, - mp_tmpdir, - output_pyramid="mercator", - output_dtype="uint8", - scale_offset=1, - ) - for zoom, row, col in [(4, 15, 15), (3, 7, 7)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint8" - data = src.read(masked=True) - assert data.mask.any() - assert not data.mask.all() - - -def test_convert_clip(cleantopo_br_tif, mp_tmpdir, landpoly): - """Automatic tile pyramid creation cropping data.""" - task_counter = TaskCounter() - convert( - cleantopo_br_tif, - mp_tmpdir, - output_pyramid="geodetic", - clip_geometry=landpoly, - observers=[task_counter], - ) - assert task_counter.tasks == 0 - - -def test_convert_zoom(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation using a specific zoom.""" - convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", zoom=3) - for zoom, row, col in [(4, 15, 15), (2, 3, 0)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - assert not os.path.isfile(out_file) - - -def test_convert_zoom_minmax(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation using min max zoom.""" - convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", zoom=[3, 4]) - for zoom, row, col in [(2, 3, 0)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - assert not os.path.isfile(out_file) - - -def test_convert_zoom_maxmin(cleantopo_br_tif, mp_tmpdir): - """Automatic tile pyramid creation using max min zoom.""" - convert(cleantopo_br_tif, mp_tmpdir, output_pyramid="mercator", zoom=[4, 3]) - for zoom, row, col in [(2, 3, 0)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - assert not os.path.isfile(out_file) - - -def test_convert_mapchete(cleantopo_br, mp_tmpdir): - # prepare data - execute(cleantopo_br.path, zoom=[1, 3]) - - convert( - cleantopo_br.path, - mp_tmpdir, - output_pyramid="geodetic", - output_metatiling=1, - zoom=[1, 3], - ) - for zoom, row, col in [(3, 7, 15), (2, 3, 7), (1, 1, 3)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_tiledir(cleantopo_br, mp_tmpdir): - bounds = BufferedTilePyramid("geodetic").tile(4, 15, 31).bounds - # prepare data - with mapchete.open(cleantopo_br.dict) as mp: - list(mp.execute(zoom=[1, 4])) - convert( - os.path.join( - cleantopo_br.dict["config_dir"], cleantopo_br.dict["output"]["path"] - ), - mp_tmpdir, - output_pyramid="geodetic", - output_metatiling=1, - zoom=[1, 4], - bounds=bounds, - ) - for zoom, row, col in [(4, 15, 31), (3, 7, 15), (2, 3, 7), (1, 1, 3)]: - out_file = os.path.join(*[mp_tmpdir, str(zoom), str(row), str(col) + ".tif"]) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_gcps(gcps_tif, mp_tmpdir): - """Automatic geodetic tile pyramid creation of raster files.""" - out_file = os.path.join(mp_tmpdir, "gcps_out.tif") - convert(gcps_tif, out_file, output_pyramid="geodetic", zoom=8) - with rasterio_open(out_file, "r") as src: - assert src.meta["driver"] == "GTiff" - assert src.meta["dtype"] == "uint16" - data = src.read(masked=True) - assert data.mask.any() - - -def test_convert_geojson(landpoly, mp_tmpdir): - convert(landpoly, mp_tmpdir, output_pyramid="geodetic", zoom=4) - for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, 32]): - out_file = os.path.join( - *[mp_tmpdir, str(zoom), str(row), str(col) + ".geojson"] - ) - with fiona_open(out_file, "r") as src: - assert len(src) == control - for f in src: - assert shape(f["geometry"]).is_valid - - -def test_convert_geobuf(landpoly, mp_tmpdir): - # convert to geobuf - geobuf_outdir = os.path.join(mp_tmpdir, "geobuf") - convert( - landpoly, - geobuf_outdir, - output_pyramid="geodetic", - zoom=4, - output_format="Geobuf", - ) - for (zoom, row, col), control in zip([(4, 0, 7), (4, 1, 7)], [9, 32]): - out_file = os.path.join( - *[geobuf_outdir, str(zoom), str(row), str(col) + ".pbf"] - ) - with open(out_file, "rb") as src: - features = geobuf.decode(src.read())["features"] - assert len(features) == control - for f in features: - assert f["geometry"]["type"] == "Polygon" - assert shape(f["geometry"]).area - - -def test_convert_errors(s2_band_jp2, mp_tmpdir, s2_band, cleantopo_br, landpoly): - # output format required - with pytest.raises(ValueError): - convert(s2_band_jp2, mp_tmpdir, output_pyramid="geodetic") - - # output pyramid reqired - with pytest.raises(ValueError): - convert(s2_band, mp_tmpdir) - - # prepare data for tiledir input - with mapchete.open(cleantopo_br.dict) as mp: - list(mp.execute(zoom=[1, 4])) - tiledir_path = os.path.join( - cleantopo_br.dict["config_dir"], cleantopo_br.dict["output"]["path"] - ) - - # zoom level required - with pytest.raises(ValueError): - convert(tiledir_path, mp_tmpdir, output_pyramid="geodetic") - - # incompatible formats - with pytest.raises(ValueError): - convert( - tiledir_path, - mp_tmpdir, - output_pyramid="geodetic", - zoom=5, - output_format="GeoJSON", - ) - - # unsupported output format extension - with pytest.raises(ValueError): - convert(s2_band_jp2, "output.jp2", output_pyramid="geodetic", zoom=5) - - # malformed band index - with pytest.raises(ValueError): - convert(s2_band_jp2, "output.tif", bidx="invalid") - - -def test_index_geojson(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - execute(cleantopo_br.dict, zoom=3) - - # generate index for zoom 3 - index(cleantopo_br.dict, zoom=3, geojson=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert len(files) == 4 - assert "3.geojson" in files - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_geojson_fieldname(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - execute(cleantopo_br.dict, zoom=3) - - # index and rename "location" to "new_fieldname" - index( - cleantopo_br.dict, - zoom=3, - geojson=True, - fieldname="new_fieldname", - ) - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "3.geojson" in files - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert "new_fieldname" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_geojson_basepath(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - execute(cleantopo_br.dict, zoom=3) - - basepath = "http://localhost" - # index and rename "location" to "new_fieldname" - index(cleantopo_br.dict, zoom=3, geojson=True, basepath=basepath) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "3.geojson" in files - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert f["properties"]["location"].startswith(basepath) - assert len(list(src)) == 1 - - -def test_index_geojson_for_gdal(mp_tmpdir, cleantopo_br): - # execute process at zoom 3 - execute(cleantopo_br.dict, zoom=3) - - basepath = "http://localhost" - # index and rename "location" to "new_fieldname" - index(cleantopo_br.dict, zoom=3, geojson=True, basepath=basepath, for_gdal=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "3.geojson" in files - with fiona_open(mp.config.output.path / "3.geojson") as src: - for f in src: - assert f["properties"]["location"].startswith("/vsicurl/" + basepath) - assert len(list(src)) == 1 - - -def test_index_geojson_tile(mp_tmpdir, cleantopo_tl): - # execute process at zoom 3 - execute(cleantopo_tl.dict, zoom=3) - - # generate index - index(cleantopo_tl.dict, tile=(3, 0, 0), geojson=True) - - with mapchete.open(cleantopo_tl.dict) as mp: - files = os.listdir(mp.config.output.path) - assert len(files) == 4 - assert "3.geojson" in files - with fiona_open(mp.config.output.path / "3.geojson") as src: - assert len(list(src)) == 1 - - -def test_index_geojson_wkt_area(mp_tmpdir, cleantopo_tl, wkt_geom_tl): - # execute process at zoom 3 - execute(cleantopo_tl.dict, area=wkt_geom_tl) - - # generate index for zoom 3 - index(cleantopo_tl.dict, geojson=True, area=wkt_geom_tl) - - with mapchete.open(cleantopo_tl.dict) as mp: - files = os.listdir(mp.config.output.path) - assert len(files) == 14 - assert "3.geojson" in files - - -def test_index_gpkg(mp_tmpdir, cleantopo_br): - # execute process - execute(cleantopo_br.dict, zoom=5) - - # generate index - index(cleantopo_br.dict, zoom=5, gpkg=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.gpkg" in files - with fiona_open(mp.config.output.path / "5.gpkg") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - # write again and assert there is no new entry because there is already one - index(cleantopo_br.dict, zoom=5, gpkg=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.gpkg" in files - with fiona_open(mp.config.output.path / "5.gpkg") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_shp(mp_tmpdir, cleantopo_br): - # execute process - execute(cleantopo_br.dict, zoom=5) - - # generate index - index(cleantopo_br.dict, zoom=5, shp=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.shp" in files - with fiona_open(mp.config.output.path / "5.shp") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - # write again and assert there is no new entry because there is already one - index(cleantopo_br.dict, zoom=5, shp=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.shp" in files - with fiona_open(mp.config.output.path / "5.shp") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_fgb(mp_tmpdir, cleantopo_br): - # execute process - execute(cleantopo_br.dict, zoom=5) - - # generate index - index(cleantopo_br.dict, zoom=5, fgb=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.fgb" in files - with fiona_open(mp.config.output.path / "5.fgb") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - # write again and assert there is no new entry because there is already one - index(cleantopo_br.dict, zoom=5, fgb=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.fgb" in files - with fiona_open(mp.config.output.path / "5.fgb") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_text(cleantopo_br): - # execute process - execute(cleantopo_br.dict, zoom=5) - - # generate index - index(cleantopo_br.dict, zoom=5, txt=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.txt" in files - with open(os.path.join(mp.config.output.path, "5.txt")) as src: - lines = list(src) - assert len(lines) == 1 - for l in lines: - assert l.endswith("7.tif\n") - - # write again and assert there is no new entry because there is already one - index(cleantopo_br.dict, zoom=5, txt=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.txt" in files - with open(os.path.join(mp.config.output.path, "5.txt")) as src: - lines = list(src) - assert len(lines) == 1 - for l in lines: - assert l.endswith("7.tif\n") - - -def test_index_tiledir(cleantopo_br): - # execute process - execute(cleantopo_br.dict, zoom=5) - - # generate index - index(cleantopo_br.output_path, zoom=5, gpkg=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.gpkg" in files - with fiona_open(mp.config.output.path / "5.gpkg") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - # write again and assert there is no new entry because there is already one - index(cleantopo_br.dict, zoom=5, txt=True) - - with mapchete.open(cleantopo_br.dict) as mp: - files = os.listdir(mp.config.output.path) - assert "5.txt" in files - with fiona_open(mp.config.output.path / "5.gpkg") as src: - for f in src: - assert "location" in f["properties"] - assert len(list(src)) == 1 - - -def test_index_errors(mp_tmpdir, cleantopo_br): - with pytest.raises(ValueError): - index(cleantopo_br.dict, zoom=5) - - -def test_convert_empty_gpkg(empty_gpkg, mp_tmpdir): - convert( - empty_gpkg, - mp_tmpdir, - output_pyramid="geodetic", - zoom=5, - output_format="GeoJSON", - ) From e8b86728fb2fb6ed7011c952171ff09c8ca3af78 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 15 May 2024 09:43:52 +0200 Subject: [PATCH 27/28] rename modules --- mapchete/commands/__init__.py | 11 ++++++----- mapchete/commands/{_convert.py => convert.py} | 2 +- mapchete/commands/{_cp.py => cp.py} | 0 mapchete/commands/{_execute.py => execute.py} | 0 mapchete/commands/{_index.py => index.py} | 0 mapchete/commands/{_rm.py => rm.py} | 0 6 files changed, 7 insertions(+), 6 deletions(-) rename mapchete/commands/{_convert.py => convert.py} (99%) rename mapchete/commands/{_cp.py => cp.py} (100%) rename mapchete/commands/{_execute.py => execute.py} (100%) rename mapchete/commands/{_index.py => index.py} (100%) rename mapchete/commands/{_rm.py => rm.py} (100%) diff --git a/mapchete/commands/__init__.py b/mapchete/commands/__init__.py index ef0af6ef..bdc46cdd 100644 --- a/mapchete/commands/__init__.py +++ b/mapchete/commands/__init__.py @@ -2,10 +2,11 @@ This package contains easy to access functions which otherwise would have to be called via the CLI. This should make the use from within other scripts, notebooks, etc. easier. """ -from mapchete.commands._convert import convert -from mapchete.commands._cp import cp -from mapchete.commands._execute import execute -from mapchete.commands._index import index -from mapchete.commands._rm import rm + +from mapchete.commands.convert import convert +from mapchete.commands.cp import cp +from mapchete.commands.execute import execute +from mapchete.commands.index import index +from mapchete.commands.rm import rm __all__ = ["convert", "cp", "execute", "index", "rm"] diff --git a/mapchete/commands/_convert.py b/mapchete/commands/convert.py similarity index 99% rename from mapchete/commands/_convert.py rename to mapchete/commands/convert.py index c433651e..32d328dd 100644 --- a/mapchete/commands/_convert.py +++ b/mapchete/commands/convert.py @@ -11,7 +11,7 @@ from shapely.geometry import box from shapely.geometry.base import BaseGeometry -from mapchete.commands._execute import execute +from mapchete.commands.execute import execute from mapchete.commands.observer import ObserverProtocol, Observers from mapchete.commands.parser import InputInfo, OutputInfo from mapchete.config import DaskSettings diff --git a/mapchete/commands/_cp.py b/mapchete/commands/cp.py similarity index 100% rename from mapchete/commands/_cp.py rename to mapchete/commands/cp.py diff --git a/mapchete/commands/_execute.py b/mapchete/commands/execute.py similarity index 100% rename from mapchete/commands/_execute.py rename to mapchete/commands/execute.py diff --git a/mapchete/commands/_index.py b/mapchete/commands/index.py similarity index 100% rename from mapchete/commands/_index.py rename to mapchete/commands/index.py diff --git a/mapchete/commands/_rm.py b/mapchete/commands/rm.py similarity index 100% rename from mapchete/commands/_rm.py rename to mapchete/commands/rm.py From 6646e3d613304bce9dd20aa8d0da894705e0cf5f Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 15 May 2024 09:59:31 +0200 Subject: [PATCH 28/28] fix module import --- mapchete/cli/default/rm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapchete/cli/default/rm.py b/mapchete/cli/default/rm.py index bdef8c9f..b55f270e 100644 --- a/mapchete/cli/default/rm.py +++ b/mapchete/cli/default/rm.py @@ -4,7 +4,7 @@ from mapchete import commands from mapchete.cli import options from mapchete.cli.progress_bar import PBar -from mapchete.commands._rm import existing_paths +from mapchete.commands.rm import existing_paths @click.command(help="Remove tiles from TileDirectory.")