Merge pull request #128 from crim-ca/retry-reuse

retry reuse + mime-type handling
crim-ca · May 7, 2020 · f555d3e · f555d3e
2 parents d2992ab + 0175756
commit f555d3e
Show file tree

Hide file tree

Showing 14 changed files with 360 additions and 99 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -4,6 +4,21 @@ Changes
 `Unreleased <https://github.com/crim-ca/weaver/tree/master>`_ (latest)
 ========================================================================
 
+Changes:
+--------
+
+- Reuse ``weaver.utils.request_retry`` function across a few locations that where essentially reimplementing
+  the core functionality.
+- Add even more failure-permissive request attempts when validating a MIME-type against IANA website.
+- Add auto-resolution of common extensions known under `PyWPS` as well as employing their specific encoding.
+- Add ``geotiff`` format type support via `PyWPS` (`#100 <https://github.com/crim-ca/weaver/issues/100>`_).
+- Make WPS status check more resilient to failing WPS outputs location not found in case the directory path can be
+  resolved to a valid local file representing the XML status (i.e.: don't depend as much on the HTTP WPS output route).
+- Ensure backward support of generic/default ``text/plain`` I/O when extracted from a referenced WPS-1/2 XML remote
+  process which provides insufficient format details. For CWL output generated from it, replace the glob pattern to
+  match anything (``<id>.*``) instead of ``<id>.txt`` extracted from ``text/plain`` to simulate MIME-type as ``*/*``.
+  Issue log warning message for future use cases.
+
 Fixes:
 ------
 
@@ -14,6 +29,8 @@ Fixes:
   not allowed by this implementation.
 - Fix parsing of explicitly-typed optional array CWL I/O notation that was not considered
   (i.e.: using ``type`` as list with additional ``"null"`` instead of ``type: "<type>?"`` shorthand).
+- Fix parsing of MIME-type from ``format`` field to exclude additional parameters (e.g.: ``; charset=UTF-8`` for
+  remote IANA validation.
 
 `1.5.1 <https://github.com/crim-ca/weaver/tree/1.5.1>`_ (2020-03-26)
 ========================================================================

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -10,7 +10,7 @@ isort
 mock
 pluggy>=0.7
 pytest
-pylint
+pylint>=2.5
 pylint_quotes
 responses
 WebTest

diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py
@@ -37,6 +37,7 @@ def setUp(self):
             "weaver.wps": True,
             "weaver.wps_output": True,
             "weaver.wps_output_path": "/wpsoutputs",
+            "weaver.wps_output_dir": "/tmp",  # nosec: B108 # don't care hardcoded for test
             "weaver.wps_path": "/ows/wps",
             "weaver.wps_restapi_path": "/",
         }
@@ -99,8 +100,8 @@ def test_jsonarray2netcdf_execute(self):
             assert resp.content_type in CONTENT_TYPE_APP_JSON
             job_url = resp.json["location"]
             nc_path = None
-            for _ in range(5):
-                sleep(1)
+            for delay in range(5):
+                sleep(delay)
                 resp = self.app.get(job_url, headers=self.json_headers)
                 if resp.status_code == 200:
                     if resp.json["status"] in JOB_STATUS_CATEGORIES[STATUS_CATEGORY_RUNNING]:

diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py
@@ -9,7 +9,7 @@
 from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF, CONTENT_TYPE_APP_XML, CONTENT_TYPE_TEXT_PLAIN
 from weaver.processes.wps_package import (
     WPS_LITERAL,
-    DefaultFormat,
+    DEFAULT_FORMAT,
     _are_different_and_set,
     _is_cwl_array_type,
     _is_cwl_enum_type,
@@ -356,16 +356,16 @@ def assert_formats_equal_any_order(format_result, format_expect):
 
 def test_merge_io_formats_no_wps():
     wps_fmt = []
-    cwl_fmt = [DefaultFormat]
+    cwl_fmt = [DEFAULT_FORMAT]
     res_fmt = _merge_io_formats(wps_fmt, cwl_fmt)
     assert isinstance(res_fmt, list)
     assert len(res_fmt) == 1
-    assert res_fmt[0] is DefaultFormat
+    assert res_fmt[0] is DEFAULT_FORMAT
 
 
 def test_merge_io_formats_with_wps_and_default_cwl():
     wps_fmt = [Format(CONTENT_TYPE_APP_NETCDF)]
-    cwl_fmt = [DefaultFormat]
+    cwl_fmt = [DEFAULT_FORMAT]
     res_fmt = _merge_io_formats(wps_fmt, cwl_fmt)
     assert isinstance(res_fmt, list)
     assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_NETCDF)])

diff --git a/tests/test_formats.py b/tests/test_formats.py
@@ -1,21 +1,52 @@
 import os
 
+import mock
 import six
+from pyramid.httpexceptions import HTTPOk, HTTPRequestTimeout
+from pyramid.response import Response
+from pywps.inout.formats import Format
+from requests.exceptions import ConnectionError
 
 from weaver.formats import (
+    CONTENT_TYPE_ANY,
     CONTENT_TYPE_APP_JSON,
+    CONTENT_TYPE_APP_GEOJSON,
     CONTENT_TYPE_APP_NETCDF,
+    CONTENT_TYPE_APP_XML,
+    CONTENT_TYPE_IMAGE_GEOTIFF,
     EDAM_MAPPING,
     EDAM_NAMESPACE,
     EDAM_NAMESPACE_DEFINITION,
     FORMAT_NAMESPACES,
     IANA_NAMESPACE,
     IANA_NAMESPACE_DEFINITION,
     clean_mime_type_format,
-    get_cwl_file_format
+    get_cwl_file_format,
+    get_extension,
+    get_format,
 )
 
 
+def test_get_extension():
+    assert get_extension(CONTENT_TYPE_APP_JSON) == ".json"  # basic
+    assert get_extension(CONTENT_TYPE_APP_JSON + "; charset=UTF-8") == ".json"  # ignore extra parameters
+    assert get_extension(CONTENT_TYPE_APP_GEOJSON) == ".geojson"  # pywps definition
+    assert get_extension(CONTENT_TYPE_IMAGE_GEOTIFF) == ".tiff"  # pywps definition
+    assert get_extension("application/x-custom") == ".custom"
+    assert get_extension("application/unknown") == ".unknown"
+
+
+def test_get_extension_glob_any():
+    assert get_extension(CONTENT_TYPE_ANY) == ".*"
+
+
+def test_get_format():
+    assert get_format(CONTENT_TYPE_APP_JSON) == Format(CONTENT_TYPE_APP_JSON)  # basic
+    assert get_format(CONTENT_TYPE_APP_JSON + "; charset=UTF-8") == Format(CONTENT_TYPE_APP_JSON)
+    assert get_format(CONTENT_TYPE_APP_GEOJSON) == Format(CONTENT_TYPE_APP_GEOJSON)  # pywps vendor MIME-type
+    assert get_format(CONTENT_TYPE_APP_NETCDF).encoding == "base64"  # extra encoding data available
+
+
 def test_get_cwl_file_format_tuple():
     tested = set(FORMAT_NAMESPACES)
     for mime_type in [CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF]:
@@ -63,6 +94,32 @@ def test_get_cwl_file_format_default():
     assert res == iana_url + fmt
 
 
+def test_get_cwl_file_format_retry_attempts():
+    """Verifies that failing request will not immediately fail the MIME-type validation."""
+    codes = {"codes": [HTTPOk.code, HTTPRequestTimeout.code]}  # note: used in reverse order
+
+    def mock_request_retry(*args, **kwargs):  # noqa: E811
+        m_resp = Response()
+        m_resp.status_code = codes["codes"].pop()
+        return m_resp
+
+    with mock.patch("requests.request", side_effect=mock_request_retry) as mocked_request:
+        _, fmt = get_cwl_file_format(CONTENT_TYPE_APP_JSON)
+        assert fmt == "{}:{}".format(IANA_NAMESPACE, CONTENT_TYPE_APP_JSON)
+        assert mocked_request.call_count == 2
+
+
+def test_get_cwl_file_format_retry_fallback_urlopen():
+    """Verifies that failing request because of critical error still validate the MIME-type using the fallback."""
+    def mock_connect_error(*args, **kwargs):  # noqa: E811
+        raise ConnectionError()
+
+    with mock.patch("requests.request", side_effect=mock_connect_error) as mocked_request:
+        _, fmt = get_cwl_file_format(CONTENT_TYPE_APP_JSON)
+        assert fmt == "{}:{}".format(IANA_NAMESPACE, CONTENT_TYPE_APP_JSON)
+        assert mocked_request.call_count == 1
+
+
 def test_clean_mime_type_format_iana():
     iana_fmt = "{}:{}".format(IANA_NAMESPACE, CONTENT_TYPE_APP_JSON)  # "iana:mime_type"
     res_type = clean_mime_type_format(iana_fmt)
@@ -80,3 +137,42 @@ def test_clean_mime_type_format_edam():
     edam_fmt = os.path.join(list(EDAM_NAMESPACE_DEFINITION.values())[0], fmt)  # "edam-url/format_####"
     res_type = clean_mime_type_format(edam_fmt)
     assert res_type == mime_type  # application/x-type
+
+
+def test_clean_mime_type_format_io_remove_extra_parameters():
+    test_input_formats = [
+        (CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON),
+        (CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON + "; charset=UTF-8"),
+        (CONTENT_TYPE_APP_XML, CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0"),
+        ("application/vnd.api+json", "application/vnd.api+json; charset=UTF-8"),
+        ("application/vnd.api+json", "application/vnd.api+json"),
+    ]
+    for expect_fmt, test_fmt in test_input_formats:
+        res_type = clean_mime_type_format(test_fmt, strip_parameters=True)
+        assert res_type == expect_fmt
+
+
+def test_clean_mime_type_format_io_strip_base_type():
+    test_input_formats = [
+        (CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON),
+        (CONTENT_TYPE_APP_JSON + "; charset=UTF-8", CONTENT_TYPE_APP_JSON + "; charset=UTF-8"),
+        (CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0", CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0"),
+        (CONTENT_TYPE_APP_JSON + "; charset=UTF-8", "application/vnd.api+json; charset=UTF-8"),
+        (CONTENT_TYPE_APP_JSON, "application/vnd.api+json"),
+    ]
+    for expect_fmt, test_fmt in test_input_formats:
+        res_type = clean_mime_type_format(test_fmt, base_subtype=True)
+        assert res_type == expect_fmt
+
+
+def test_clean_mime_type_format_io_strip_base_and_remove_parameters():
+    test_input_formats = [
+        (CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON),
+        (CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_JSON + "; charset=UTF-8"),
+        (CONTENT_TYPE_APP_XML, CONTENT_TYPE_APP_XML + "; charset=UTF-8; version=1.0"),
+        (CONTENT_TYPE_APP_JSON, "application/vnd.api+json; charset=UTF-8"),
+        (CONTENT_TYPE_APP_JSON, "application/vnd.api+json"),
+    ]
+    for expect_fmt, test_fmt in test_input_formats:
+        res_type = clean_mime_type_format(test_fmt, base_subtype=True, strip_parameters=True)
+        assert res_type == expect_fmt
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -10,9 +10,16 @@
 import mock
 import pytest
 from lxml import etree
-from pyramid.httpexceptions import HTTPConflict
-from pyramid.httpexceptions import HTTPError as PyramidHTTPError
-from pyramid.httpexceptions import HTTPInternalServerError, HTTPNotFound, HTTPRequestTimeout
+from pyramid.httpexceptions import (
+    HTTPOk,
+    HTTPCreated,
+    HTTPNotFound,
+    HTTPConflict,
+    HTTPError as PyramidHTTPError,
+    HTTPInternalServerError,
+    HTTPRequestTimeout,
+    HTTPGatewayTimeout,
+)
 from pywps.response.status import WPS_STATUS
 from requests.exceptions import HTTPError as RequestsHTTPError
 from requests import Response
@@ -21,7 +28,7 @@
 from tests.compat import contextlib
 from tests.utils import mocked_file_response
 from weaver import status, utils
-from weaver.utils import _NullType, null, fetch_file, make_dirs  # noqa: W0212
+from weaver.utils import _NullType, null, fetch_file, make_dirs, request_retry  # noqa: W0212
 
 
 def test_null_operators():
@@ -356,6 +363,44 @@ def test_fetch_file_local_with_protocol():
             shutil.rmtree(res_dir, ignore_errors=True)
 
 
+def test_request_retry_allowed_codes():
+    """Verifies that ``allowed_codes`` only are considered as valid status instead of any non-error HTTP code."""
+    mocked_codes = {"codes": [HTTPCreated.code, HTTPOk.code, HTTPCreated.code]}  # note: used in reverse order
+
+    def mocked_request(*args, **kwargs):  # noqa: E811
+        mocked_resp = Response()
+        mocked_resp.status_code = mocked_codes["codes"].pop()
+        return mocked_resp
+
+    with mock.patch("requests.request", side_effect=mocked_request) as mocked:
+        resp = request_retry("get", "http://whatever", retries=3, allowed_codes=[HTTPOk.code])
+        assert resp.status_code == HTTPOk.code
+        assert mocked.call_count == 2
+
+
+def test_request_retry_intervals():
+    """Verifies that ``intervals`` are used for calling the retry operations instead of ``backoff``/``retries``."""
+
+    def mock_request(*args, **kwargs):  # noqa: E811
+        m_resp = Response()
+        m_resp.status_code = HTTPNotFound.code
+        return m_resp
+
+    def mock_sleep(delay):  # noqa: E811
+        return
+
+    with mock.patch("requests.request", side_effect=mock_request) as mocked_request:
+        with mock.patch("weaver.utils.time.sleep", side_effect=mock_sleep) as mocked_sleep:
+            intervals = [1e6, 3e6, 5e6]  # random values that shouldn't normally be used with sleep() (too big)
+            # values will not match if backoff/retries are not automatically corrected by internals parameter
+            resp = request_retry("get", "http://whatever", intervals=intervals, backoff=1000, retries=10)
+            assert resp.status_code == HTTPGatewayTimeout.code
+            assert mocked_request.call_count == 3
+            # NOTE: below could fail if using debugger/breakpoints that uses more calls to sleep()
+            assert mocked_sleep.call_count == 3
+            mocked_sleep.assert_has_calls([mock.call(i) for i in intervals])
+
+
 def test_fetch_file_remote_with_request():
     """
     Test function :func:`weaver.utils.fetch_file` when the reference is an URL.

diff --git a/tests/utils.py b/tests/utils.py
@@ -30,7 +30,9 @@
 from weaver.wps_restapi.processes.processes import execute_process
 
 if TYPE_CHECKING:
-    from weaver.typedefs import Any, AnyStr, Callable, List, Optional, SettingsType, Type, Union  # noqa: F401
+    from weaver.typedefs import (  # noqa: F401
+        Any, AnyResponseType, AnyStr, Callable, List, Optional, SettingsType, Type, Union
+    )
 
 
 def ignore_warning_regex(func, warning_message_regex, warning_categories=DeprecationWarning):
@@ -260,13 +262,15 @@ def read(self, chuck_size=None):  # noqa: E811
 
 
 def mocked_sub_requests(app, function, *args, **kwargs):
+    # type: (TestApp, AnyStr, *Any, **Any) -> AnyResponseType
     """
     Executes ``app.function(*args, **kwargs)`` with a mock of every underlying :func:`requests.request` call
     to relay their execution to the :class:`webTest.TestApp`.
+
     Generates a `fake` response from a file if the URL scheme is ``mock://``.
     """
 
-    def mocked_request(method, url=None, headers=None, verify=None, cert=None, **req_kwargs):  # noqa: E811
+    def mocked_app_request(method, url=None, headers=None, verify=None, cert=None, **req_kwargs):  # noqa: E811
         """
         Request corresponding to :func:`requests.request` that instead gets executed by :class:`webTest.TestApp`.
         """
@@ -278,17 +282,18 @@ def mocked_request(method, url=None, headers=None, verify=None, cert=None, **req
         if query:
             url = url + "?" + query
         if not url.startswith("mock://"):
-            resp = req(url, params=req_kwargs.get("data"), headers=headers)
+            resp = req(url, params=req_kwargs.get("data"), headers=headers, expect_errors=True)
             setattr(resp, "content", resp.body)
         else:
             path = get_url_without_query(url.replace("mock://", ""))
             resp = mocked_file_response(path, url)
         return resp
 
     with contextlib.ExitStack() as stack:
-        stack.enter_context(mock.patch("requests.request", side_effect=mocked_request))
-        stack.enter_context(mock.patch("requests.sessions.Session.request", side_effect=mocked_request))
+        stack.enter_context(mock.patch("requests.request", side_effect=mocked_app_request))
+        stack.enter_context(mock.patch("requests.sessions.Session.request", side_effect=mocked_app_request))
         request_func = getattr(app, function)
+        kwargs.setdefault("expect_errors", True)
         return request_func(*args, **kwargs)