diff --git a/src/reader/_parser/__init__.py b/src/reader/_parser/__init__.py index c734d193..e4c58a7a 100644 --- a/src/reader/_parser/__init__.py +++ b/src/reader/_parser/__init__.py @@ -414,16 +414,21 @@ def process_entry_pairs( @contextmanager -def wrap_exceptions( - url: str, when: str, cls: type[ParseError] = ParseError, **kwargs: Any -) -> Iterator[None]: +def wrap_exceptions(url: str | ParseError, message: str = '') -> Iterator[None]: try: yield + except ParseError: # reader exceptions are pass-through raise - except OSError as e: - # requests.RequestException is also a subclass of OSError - raise cls(url, message=f"error {when}", **kwargs) from e + except Exception as e: - raise cls(url, message=f"unexpected error {when}", **kwargs) from e + exc = ParseError(url, message=message) if isinstance(url, str) else url + + if isinstance(e, OSError): + # expected exception raised for various I/O errors; + # requests.RequestException is a subclass of OSError + raise exc from e + + exc._message = f"unexpected error {exc._message}".rstrip() + raise exc from e diff --git a/src/reader/_parser/http.py b/src/reader/_parser/http.py index 7060c8a1..ca880bd6 100644 --- a/src/reader/_parser/http.py +++ b/src/reader/_parser/http.py @@ -54,57 +54,46 @@ def __call__( if http_accept: request_headers['Accept'] = http_accept - with self.get_session() as session: - with wrap_exceptions(url, "while getting feed"): - response, http_etag, http_last_modified = session.caching_get( - url, - http_etag, - http_last_modified, - headers=request_headers, - stream=True, - ) + error = RetrieveError(url) + + with self.get_session() as session, wrap_exceptions(error): + error._message = "while getting feed" + response, http_etag, http_last_modified = session.caching_get( + url, + http_etag, + http_last_modified, + headers=request_headers, + stream=True, + ) + + with response: + http_info = HTTPInfo(response.status_code, response.headers) + error.http_info = http_info + + if response.status_code == 304: + raise NotModified(url, http_info=http_info) + + error._message = "bad HTTP status code" + response.raise_for_status() - response_headers = response.headers.copy() - http_info = HTTPInfo(response.status_code, response_headers) + response.headers.setdefault('content-location', response.url) - try: - response.raise_for_status() - except Exception as e: - response.close() - raise RetrieveError( - url, - message="bad HTTP status code", - http_info=http_info, - ) from e - - if response.status_code == 304: - response.close() - raise NotModified(url, http_info=http_info) - - response_headers.setdefault('content-location', response.url) - - # https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding - # Content-Encoding is the counterpart of Accept-Encoding; - # it is about binary transformations (mainly compression), - # not text encoding (Content-Type charset does that). - # We let Requests/urllib3 take care of it and remove the header, - # so parsers (like feedparser) don't do it a second time. - response_headers.pop('content-encoding', None) - response.raw.decode_content = True - - content_type = response_headers.get('content-type') - mime_type: str | None - if content_type: - mime_type, _ = parse_options_header(content_type) - else: - mime_type = None - - with ( - wrap_exceptions( - url, "while reading feed", RetrieveError, http_info=http_info - ), - response, - ): + # https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding + # Content-Encoding is the counterpart of Accept-Encoding; + # it is about binary transformations (mainly compression), + # not text encoding (Content-Type charset does that). + # We let Requests/urllib3 take care of it and remove the header, + # so parsers (like feedparser) don't do it a second time. + response.headers.pop('content-encoding', None) + response.raw.decode_content = True + + content_type = response.headers.get('content-type') + if content_type: + mime_type, _ = parse_options_header(content_type) + else: + mime_type = None + + error._message = "while reading feed" yield RetrievedFeed( response.raw, mime_type, diff --git a/src/reader/exceptions.py b/src/reader/exceptions.py index f2a594a7..7cacc3c4 100644 --- a/src/reader/exceptions.py +++ b/src/reader/exceptions.py @@ -29,7 +29,7 @@ class _FancyExceptionBase(Exception): _default_message: str = '' def __init__(self, message: str = ''): - self._message = message + self._message = message or self._default_message @property def _str(self) -> str: @@ -44,7 +44,8 @@ def message(self) -> str: Became read-only. """ - return self._message or self._default_message + # read-only for compatibility with ExceptionGroup + return self._message @cached_property def _cause_name(self) -> str: diff --git a/tests/test_parser.py b/tests/test_parser.py index 45dc21bd..9051944d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -557,7 +557,7 @@ def feedparser_parse(*args, **kwargs): parse(feed_url) assert excinfo.value.__cause__ is exc assert excinfo.value.url == feed_url - assert 'error during parser' in excinfo.value.message + assert 'during parser' in excinfo.value.message assert feedparser_parse.kwargs['resolve_relative_uris'] == True assert feedparser_parse.kwargs['sanitize_html'] == True