Skip to content

Commit

Permalink
Merge pull request #128 from crim-ca/retry-reuse
Browse files Browse the repository at this point in the history
retry reuse + mime-type handling
  • Loading branch information
fmigneault authored May 7, 2020
2 parents d2992ab + 0175756 commit f555d3e
Show file tree
Hide file tree
Showing 14 changed files with 360 additions and 99 deletions.
17 changes: 17 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ Changes
`Unreleased <https://github.com/crim-ca/weaver/tree/master>`_ (latest)
========================================================================

Changes:
--------

- Reuse ``weaver.utils.request_retry`` function across a few locations that where essentially reimplementing
the core functionality.
- Add even more failure-permissive request attempts when validating a MIME-type against IANA website.
- Add auto-resolution of common extensions known under `PyWPS` as well as employing their specific encoding.
- Add ``geotiff`` format type support via `PyWPS` (`#100 <https://github.com/crim-ca/weaver/issues/100>`_).
- Make WPS status check more resilient to failing WPS outputs location not found in case the directory path can be
resolved to a valid local file representing the XML status (i.e.: don't depend as much on the HTTP WPS output route).
- Ensure backward support of generic/default ``text/plain`` I/O when extracted from a referenced WPS-1/2 XML remote
process which provides insufficient format details. For CWL output generated from it, replace the glob pattern to
match anything (``<id>.*``) instead of ``<id>.txt`` extracted from ``text/plain`` to simulate MIME-type as ``*/*``.
Issue log warning message for future use cases.

Fixes:
------

Expand All @@ -14,6 +29,8 @@ Fixes:
not allowed by this implementation.
- Fix parsing of explicitly-typed optional array CWL I/O notation that was not considered
(i.e.: using ``type`` as list with additional ``"null"`` instead of ``type: "<type>?"`` shorthand).
- Fix parsing of MIME-type from ``format`` field to exclude additional parameters (e.g.: ``; charset=UTF-8`` for
remote IANA validation.

`1.5.1 <https://github.com/crim-ca/weaver/tree/1.5.1>`_ (2020-03-26)
========================================================================
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ isort
mock
pluggy>=0.7
pytest
pylint
pylint>=2.5
pylint_quotes
responses
WebTest
Expand Down
5 changes: 3 additions & 2 deletions tests/functional/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def setUp(self):
"weaver.wps": True,
"weaver.wps_output": True,
"weaver.wps_output_path": "/wpsoutputs",
"weaver.wps_output_dir": "/tmp", # nosec: B108 # don't care hardcoded for test
"weaver.wps_path": "/ows/wps",
"weaver.wps_restapi_path": "/",
}
Expand Down Expand Up @@ -99,8 +100,8 @@ def test_jsonarray2netcdf_execute(self):
assert resp.content_type in CONTENT_TYPE_APP_JSON
job_url = resp.json["location"]
nc_path = None
for _ in range(5):
sleep(1)
for delay in range(5):
sleep(delay)
resp = self.app.get(job_url, headers=self.json_headers)
if resp.status_code == 200:
if resp.json["status"] in JOB_STATUS_CATEGORIES[STATUS_CATEGORY_RUNNING]:
Expand Down
8 changes: 4 additions & 4 deletions tests/processes/test_wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF, CONTENT_TYPE_APP_XML, CONTENT_TYPE_TEXT_PLAIN
from weaver.processes.wps_package import (
WPS_LITERAL,
DefaultFormat,
DEFAULT_FORMAT,
_are_different_and_set,
_is_cwl_array_type,
_is_cwl_enum_type,
Expand Down Expand Up @@ -356,16 +356,16 @@ def assert_formats_equal_any_order(format_result, format_expect):

def test_merge_io_formats_no_wps():
wps_fmt = []
cwl_fmt = [DefaultFormat]
cwl_fmt = [DEFAULT_FORMAT]
res_fmt = _merge_io_formats(wps_fmt, cwl_fmt)
assert isinstance(res_fmt, list)
assert len(res_fmt) == 1
assert res_fmt[0] is DefaultFormat
assert res_fmt[0] is DEFAULT_FORMAT


def test_merge_io_formats_with_wps_and_default_cwl():
wps_fmt = [Format(CONTENT_TYPE_APP_NETCDF)]
cwl_fmt = [DefaultFormat]
cwl_fmt = [DEFAULT_FORMAT]
res_fmt = _merge_io_formats(wps_fmt, cwl_fmt)
assert isinstance(res_fmt, list)
assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_NETCDF)])
Expand Down
98 changes: 97 additions & 1 deletion tests/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,52 @@
import os

import mock
import six
from pyramid.httpexceptions import HTTPOk, HTTPRequestTimeout
from pyramid.response import Response
from pywps.inout.formats import Format
from requests.exceptions import ConnectionError

from weaver.formats import (
CONTENT_TYPE_ANY,
CONTENT_TYPE_APP_JSON,
CONTENT_TYPE_APP_GEOJSON,
CONTENT_TYPE_APP_NETCDF,
CONTENT_TYPE_APP_XML,
CONTENT_TYPE_IMAGE_GEOTIFF,
EDAM_MAPPING,
EDAM_NAMESPACE,
EDAM_NAMESPACE_DEFINITION,
FORMAT_NAMESPACES,
IANA_NAMESPACE,
IANA_NAMESPACE_DEFINITION,
clean_mime_type_format,
get_cwl_file_format
get_cwl_file_format,
get_extension,
get_format,
)


def test_get_extension():
assert get_extension(CONTENT_TYPE_APP_JSON) == ".json" # basic
assert get_extension(CONTENT_TYPE_APP_JSON + "; charset=UTF-8") == ".json" # ignore extra parameters
assert get_extension(CONTENT_TYPE_APP_GEOJSON) == ".geojson" # pywps definition
assert get_extension(CONTENT_TYPE_IMAGE_GEOTIFF) == ".tiff" # pywps definition
assert get_extension("application/x-custom") == ".custom"
assert get_extension("application/unknown") == ".unknown"


def test_get_extension_glob_any():
assert get_extension(CONTENT_TYPE_ANY) == ".*"


def test_get_format():
assert get_format(CONTENT_TYPE_APP_JSON) == Format(CONTENT_TYPE_APP_JSON) # basic
assert get_format(CONTENT_TYPE_APP_JSON + "; charset=UTF-8") == Format(CONTENT_TYPE_APP_JSON)
assert get_format(CONTENT_TYPE_APP_GEOJSON) == Format(CONTENT_TYPE_APP_GEOJSON) # pywps vendor MIME-type
assert get_format(CONTENT_TYPE_APP_NETCDF).encoding == "base64" # extra encoding data available


def test_get_cwl_file_format_tuple():
tested = set(FORMAT_NAMESPACES)
for mime_type in [CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF]:
Expand Down Expand Up @@ -63,6 +94,32 @@ def test_get_cwl_file_format_default():
assert res == iana_url + fmt


def test_get_cwl_file_format_retry_attempts():
"""Verifies that failing request will not immediately fail the MIME-type validation."""
codes = {"codes": [HTTPOk.code, HTTPRequestTimeout.code]} # note: used in reverse order

def mock_request_retry(*args, **kwargs): # noqa: E811
m_resp = Response()
m_resp.status_code = codes["codes"].pop()
return m_resp

with mock.patch("requests.request", side_effect=mock_request_retry) as mocked_request:
_, fmt = get_cwl_file_format(CONTENT_TYPE_APP_JSON)
assert fmt == "{}:{}".format(IANA_NAMESPACE, CONTENT_TYPE_APP_JSON)
assert mocked_request.call_count == 2


def test_get_cwl_file_format_retry_fallback_urlopen():
"""Verifies that failing request because of critical error still validate the MIME-type using the fallback."""
def mock_connect_error(*args, **kwargs): # noqa: E811
raise ConnectionError()

with mock.patch("requests.request", side_effect=mock_connect_error) as mocked_request:
_, fmt = get_cwl_file_format(CONTENT_TYPE_APP_JSON)
assert fmt == "{}:{}".format(IANA_NAMESPACE, CONTENT_TYPE_APP_JSON)
assert mocked_request.call_count == 1


def test_clean_mime_type_format_iana():
iana_fmt = "{}:{}".format(IANA_NAMESPACE, CONTENT_TYPE_APP_JSON) # "iana:mime_type"
res_type = clean_mime_type_format(iana_fmt)
Expand All @@ -80,3 +137,42 @@ def test_clean_mime_type_format_edam():
edam_fmt = os.path.join(list(EDAM_NAMESPACE_DEFINITION.values())[0], fmt) # "edam-url/format_####"
res_type = clean_mime_type_format(edam_fmt)
assert res_type == mime_type # application/x-type


def test_clean_mime_type_format_io_remove_extra_parameters():
test_input_formats = [
(CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON),
(CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON + "; charset=UTF-8"),
(CONTENT_TYPE_APP_XML, CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0"),
("application/vnd.api+json", "application/vnd.api+json; charset=UTF-8"),
("application/vnd.api+json", "application/vnd.api+json"),
]
for expect_fmt, test_fmt in test_input_formats:
res_type = clean_mime_type_format(test_fmt, strip_parameters=True)
assert res_type == expect_fmt


def test_clean_mime_type_format_io_strip_base_type():
test_input_formats = [
(CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON),
(CONTENT_TYPE_APP_JSON + "; charset=UTF-8", CONTENT_TYPE_APP_JSON + "; charset=UTF-8"),
(CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0", CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0"),
(CONTENT_TYPE_APP_JSON + "; charset=UTF-8", "application/vnd.api+json; charset=UTF-8"),
(CONTENT_TYPE_APP_JSON, "application/vnd.api+json"),
]
for expect_fmt, test_fmt in test_input_formats:
res_type = clean_mime_type_format(test_fmt, base_subtype=True)
assert res_type == expect_fmt


def test_clean_mime_type_format_io_strip_base_and_remove_parameters():
test_input_formats = [
(CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON),
(CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON + "; charset=UTF-8"),
(CONTENT_TYPE_APP_XML, CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0"),
(CONTENT_TYPE_APP_JSON, "application/vnd.api+json; charset=UTF-8"),
(CONTENT_TYPE_APP_JSON, "application/vnd.api+json"),
]
for expect_fmt, test_fmt in test_input_formats:
res_type = clean_mime_type_format(test_fmt, base_subtype=True, strip_parameters=True)
assert res_type == expect_fmt
53 changes: 49 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@
import mock
import pytest
from lxml import etree
from pyramid.httpexceptions import HTTPConflict
from pyramid.httpexceptions import HTTPError as PyramidHTTPError
from pyramid.httpexceptions import HTTPInternalServerError, HTTPNotFound, HTTPRequestTimeout
from pyramid.httpexceptions import (
HTTPOk,
HTTPCreated,
HTTPNotFound,
HTTPConflict,
HTTPError as PyramidHTTPError,
HTTPInternalServerError,
HTTPRequestTimeout,
HTTPGatewayTimeout,
)
from pywps.response.status import WPS_STATUS
from requests.exceptions import HTTPError as RequestsHTTPError
from requests import Response
Expand All @@ -21,7 +28,7 @@
from tests.compat import contextlib
from tests.utils import mocked_file_response
from weaver import status, utils
from weaver.utils import _NullType, null, fetch_file, make_dirs # noqa: W0212
from weaver.utils import _NullType, null, fetch_file, make_dirs, request_retry # noqa: W0212


def test_null_operators():
Expand Down Expand Up @@ -356,6 +363,44 @@ def test_fetch_file_local_with_protocol():
shutil.rmtree(res_dir, ignore_errors=True)


def test_request_retry_allowed_codes():
"""Verifies that ``allowed_codes`` only are considered as valid status instead of any non-error HTTP code."""
mocked_codes = {"codes": [HTTPCreated.code, HTTPOk.code, HTTPCreated.code]} # note: used in reverse order

def mocked_request(*args, **kwargs): # noqa: E811
mocked_resp = Response()
mocked_resp.status_code = mocked_codes["codes"].pop()
return mocked_resp

with mock.patch("requests.request", side_effect=mocked_request) as mocked:
resp = request_retry("get", "http://whatever", retries=3, allowed_codes=[HTTPOk.code])
assert resp.status_code == HTTPOk.code
assert mocked.call_count == 2


def test_request_retry_intervals():
"""Verifies that ``intervals`` are used for calling the retry operations instead of ``backoff``/``retries``."""

def mock_request(*args, **kwargs): # noqa: E811
m_resp = Response()
m_resp.status_code = HTTPNotFound.code
return m_resp

def mock_sleep(delay): # noqa: E811
return

with mock.patch("requests.request", side_effect=mock_request) as mocked_request:
with mock.patch("weaver.utils.time.sleep", side_effect=mock_sleep) as mocked_sleep:
intervals = [1e6, 3e6, 5e6] # random values that shouldn't normally be used with sleep() (too big)
# values will not match if backoff/retries are not automatically corrected by internals parameter
resp = request_retry("get", "http://whatever", intervals=intervals, backoff=1000, retries=10)
assert resp.status_code == HTTPGatewayTimeout.code
assert mocked_request.call_count == 3
# NOTE: below could fail if using debugger/breakpoints that uses more calls to sleep()
assert mocked_sleep.call_count == 3
mocked_sleep.assert_has_calls([mock.call(i) for i in intervals])


def test_fetch_file_remote_with_request():
"""
Test function :func:`weaver.utils.fetch_file` when the reference is an URL.
Expand Down
15 changes: 10 additions & 5 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
from weaver.wps_restapi.processes.processes import execute_process

if TYPE_CHECKING:
from weaver.typedefs import Any, AnyStr, Callable, List, Optional, SettingsType, Type, Union # noqa: F401
from weaver.typedefs import ( # noqa: F401
Any, AnyResponseType, AnyStr, Callable, List, Optional, SettingsType, Type, Union
)


def ignore_warning_regex(func, warning_message_regex, warning_categories=DeprecationWarning):
Expand Down Expand Up @@ -260,13 +262,15 @@ def read(self, chuck_size=None): # noqa: E811


def mocked_sub_requests(app, function, *args, **kwargs):
# type: (TestApp, AnyStr, *Any, **Any) -> AnyResponseType
"""
Executes ``app.function(*args, **kwargs)`` with a mock of every underlying :func:`requests.request` call
to relay their execution to the :class:`webTest.TestApp`.
Generates a `fake` response from a file if the URL scheme is ``mock://``.
"""

def mocked_request(method, url=None, headers=None, verify=None, cert=None, **req_kwargs): # noqa: E811
def mocked_app_request(method, url=None, headers=None, verify=None, cert=None, **req_kwargs): # noqa: E811
"""
Request corresponding to :func:`requests.request` that instead gets executed by :class:`webTest.TestApp`.
"""
Expand All @@ -278,17 +282,18 @@ def mocked_request(method, url=None, headers=None, verify=None, cert=None, **req
if query:
url = url + "?" + query
if not url.startswith("mock://"):
resp = req(url, params=req_kwargs.get("data"), headers=headers)
resp = req(url, params=req_kwargs.get("data"), headers=headers, expect_errors=True)
setattr(resp, "content", resp.body)
else:
path = get_url_without_query(url.replace("mock://", ""))
resp = mocked_file_response(path, url)
return resp

with contextlib.ExitStack() as stack:
stack.enter_context(mock.patch("requests.request", side_effect=mocked_request))
stack.enter_context(mock.patch("requests.sessions.Session.request", side_effect=mocked_request))
stack.enter_context(mock.patch("requests.request", side_effect=mocked_app_request))
stack.enter_context(mock.patch("requests.sessions.Session.request", side_effect=mocked_app_request))
request_func = getattr(app, function)
kwargs.setdefault("expect_errors", True)
return request_func(*args, **kwargs)


Expand Down
Loading

0 comments on commit f555d3e

Please sign in to comment.