From f13daf215d976d49b79e5f12e056d93dda84fe79 Mon Sep 17 00:00:00 2001 From: lemon24 Date: Fri, 16 Aug 2024 11:21:55 +0300 Subject: [PATCH] Rework parser API to return HTTP info for errors too. #307 --- src/reader/_parser/__init__.py | 154 +++++++++++++++------- src/reader/_parser/_lazy.py | 225 ++++++++++++++++++++------------- src/reader/_parser/file.py | 7 +- src/reader/_parser/http.py | 27 ++-- src/reader/_types.py | 14 +- src/reader/_update.py | 24 ++-- tests/fakeparser.py | 13 +- tests/test_parser.py | 10 +- tests/test_reader_private.py | 4 +- 9 files changed, 304 insertions(+), 174 deletions(-) diff --git a/src/reader/_parser/__init__.py b/src/reader/_parser/__init__.py index 853373ae..bc07972b 100644 --- a/src/reader/_parser/__init__.py +++ b/src/reader/_parser/__init__.py @@ -34,12 +34,8 @@ if TYPE_CHECKING: # pragma: no cover from ._lazy import Parser as Parser -__getattr__ = lazy_import(__name__, ['Parser']) - -T = TypeVar('T') -T_co = TypeVar('T_co', covariant=True) -T_cv = TypeVar('T_cv', contravariant=True) +__getattr__ = lazy_import(__name__, ['Parser']) def default_parser( @@ -158,19 +154,94 @@ def mount_parser_by_url(self, url: str, parser: ParserType[Any]) -> None: self._lazy_call('mount_parser_by_url', url, parser) +class FeedArgument(Protocol): # pragma: no cover + """Any :class:`~reader._types.FeedForUpdate`-like object.""" + + @property + def url(self) -> str: + """The feed URL.""" + + @property + def http_etag(self) -> str | None: + """The HTTP ``ETag`` header from the last update.""" + + @property + def http_last_modified(self) -> str | None: + """The the HTTP ``Last-Modified`` header from the last update.""" + + +T = TypeVar('T') +T_co = TypeVar('T_co', covariant=True) +T_cv = TypeVar('T_cv', contravariant=True) +F = TypeVar('F', bound=FeedArgument) +E = TypeVar('E', bound=Exception) + + @dataclass(frozen=True) -class RetrieveResult(_namedtuple_compat, Generic[T_co]): - """The result of retrieving a feed, plus metadata.""" +class HTTPInfo(_namedtuple_compat): + """Details about an HTTP response.""" + + #: The HTTP status code. + status: int + + #: The HTTP response headers. + headers: Headers + + +class RetrieveError(ParseError): + """An error occurred while retrieving the feed. + + Can be used by retrievers to pass additional information to the parser. + + """ + + def __init__( + self, + url: str, + /, + message: str = '', + http_info: HTTPInfo | None = None, + ) -> None: + super().__init__(url, message=message) + + #: Details about the HTTP response. + self.http_info = http_info + + +class NotModified(RetrieveError): + """Raised by retrievers to tell the parser that the feed was not modified.""" + + _default_message = "not modified" + + +@dataclass(frozen=True) +class RetrieveResult(_namedtuple_compat, Generic[F, T, E]): + """The result of retrieving a feed, regardless of the outcome.""" # should be a NamedTuple, but the typing one became generic only in 3.11, # and we don't want to depend on typing_extensions at runtime + #: The feed (a :class:`FeedArgument`, usually a :class:`FeedForUpdate`). + feed: F + + #: One of: + #: + #: * a context manager with the :class:`RetrievedFeed` as target + #: * an exception + #: + value: ContextManager[RetrievedFeed[T]] | E + + +@dataclass(frozen=True) +class RetrievedFeed(_namedtuple_compat, Generic[T]): + """A (successfully) retrieved feed, plus metadata.""" + # TODO: coalesce http_etag and http_last_modified into a single thing? - #: The result of retrieving a feed. + #: The retrieved resource. #: Usually, a readable binary file. #: Passed to the parser. - resource: T_co + resource: T #: The MIME type of the resource. #: Used to select an appropriate parser. mime_type: str | None = None @@ -180,9 +251,8 @@ class RetrieveResult(_namedtuple_compat, Generic[T_co]): #: The HTTP ``Last-Modified`` header associated with the resource. #: Passed back to the retriever on the next update. http_last_modified: str | None = None - #: The HTTP response headers associated with the resource. - #: Passed to the parser. - headers: Headers | None = None + #: Details about the HTTP response. + http_info: HTTPInfo | None = None class RetrieverType(Protocol[T_co]): # pragma: no cover @@ -200,7 +270,7 @@ def __call__( http_etag: str | None, http_last_modified: str | None, http_accept: str | None, - ) -> ContextManager[RetrieveResult[T_co] | None]: + ) -> ContextManager[RetrievedFeed[T_co] | T_co]: """Retrieve a feed. Args: @@ -213,12 +283,15 @@ def __call__( Content types to be retrieved, as an HTTP ``Accept`` header. Returns: - contextmanager(RetrieveResult or None): - A context manager that has as target either the result - or :const:`None`, if the feed didn't change. + contextmanager(RetrievedFeed or None): + A context manager that has as target either + a :class:`RetrievedFeed` wrapping the retrieved resource, + or the bare resource. Raises: ParseError + RetrieveError: To pass additional information to the parser. + NotModified: To tell the parser that the feed was not modified. """ @@ -249,6 +322,27 @@ def process_feed_for_update(self, feed: FeedForUpdate) -> FeedForUpdate: """ +@dataclass(frozen=True) +class ParseResult(_namedtuple_compat, Generic[F, E]): + """The result of retrieving and parsing a feed, regardless of the outcome.""" + + # FIXME: either move to _types with ParsedFeed, or move ParsedFeed here + + #: The feed (a :class:`FeedArgument`, usually a :class:`FeedForUpdate`). + feed: F + + #: One of: + #: + #: * the parsed feed + #: * :const:`None`, if the feed didn't change + #: * an exception + #: + value: ParsedFeed | None | E + + #: Details about the HTTP response. + http_info: HTTPInfo | None = None + + FeedAndEntries = tuple[FeedData, Collection[EntryData]] EntryPair = tuple[EntryData, Optional[EntryForUpdate]] @@ -308,28 +402,6 @@ def process_entry_pairs( """ -class FeedArgument(Protocol): # pragma: no cover - """Any :class:`~reader._types.FeedForUpdate`-like object.""" - - @property - def url(self) -> str: - """The feed URL.""" - - @property - def http_etag(self) -> str | None: - """The HTTP ``ETag`` header from the last update.""" - - @property - def http_last_modified(self) -> str | None: - """The the HTTP ``Last-Modified`` header from the last update.""" - - -class FeedArgumentTuple(NamedTuple): - url: str - http_etag: str | None = None - http_last_modified: str | None = None - - @contextmanager def wrap_exceptions(url: str, when: str) -> Iterator[None]: try: @@ -342,9 +414,3 @@ def wrap_exceptions(url: str, when: str) -> Iterator[None]: raise ParseError(url, message=f"error {when}") from e except Exception as e: raise ParseError(url, message=f"unexpected error {when}") from e - - -@contextmanager -def wrap_cm_exceptions(cm: ContextManager[T], url: str, when: str) -> Iterator[T]: - with wrap_exceptions(url, when), cm as target: - yield target diff --git a/src/reader/_parser/_lazy.py b/src/reader/_parser/_lazy.py index 2e2b1f17..e5c92039 100644 --- a/src/reader/_parser/_lazy.py +++ b/src/reader/_parser/_lazy.py @@ -1,5 +1,6 @@ from __future__ import annotations +import builtins import logging import mimetypes import shutil @@ -7,8 +8,9 @@ from collections.abc import Iterable from collections.abc import Iterator from contextlib import contextmanager -from contextlib import nullcontext +from functools import partial from typing import Any +from typing import cast from typing import ContextManager from .._types import FeedForUpdate @@ -18,14 +20,16 @@ from ..exceptions import ParseError from . import EntryPair from . import EntryPairsParserType -from . import FeedArgument -from . import FeedArgumentTuple +from . import F from . import FeedForUpdateRetrieverType from . import HTTPAcceptParserType +from . import NotModified +from . import ParseResult from . import ParserType +from . import RetrievedFeed +from . import RetrieveError from . import RetrieveResult from . import RetrieverType -from . import wrap_cm_exceptions from . import wrap_exceptions from ._http_utils import parse_accept_header from ._http_utils import unparse_accept_header @@ -81,10 +85,10 @@ def __init__(self) -> None: def parallel( self, - feeds: Iterable[FeedArgument], + feeds: Iterable[F], map: MapFunction[Any, Any] = map, is_parallel: bool = True, - ) -> Iterable[tuple[FeedArgument, ParsedFeed | None | ParseError]]: + ) -> Iterable[ParseResult[F, ParseError]]: """Retrieve and parse many feeds, possibly in parallel. Yields the parsed feeds, as soon as they are ready. @@ -97,31 +101,13 @@ def parallel( is_parallel (bool): Whether ``map`` runs the tasks in parallel. Yields: - tuple(:class:`FeedArgument`, :class:`~reader._types.ParsedFeed` or :const:`None` or :class:`~reader.ParseError`): - - A (feed, result) pair, where result is either: - - * the parsed feed - * :const:`None`, if the feed didn't change - * an exception instance + ParseResult: + The result of retrieving and parsing a feed; + the :attr:`~ParseResult.feed` is the object passed in ``feeds``. """ - - def retrieve( - feed: FeedArgument, - ) -> tuple[ - FeedArgument, ContextManager[RetrieveResult[Any] | None] | Exception - ]: - try: - context = self.retrieve( - feed.url, feed.http_etag, feed.http_last_modified, is_parallel - ) - return feed, context - except Exception as e: - # pass around *all* exception types, - # unhandled exceptions get swallowed by the thread otherwise - log.debug("retrieve() exception, traceback follows", exc_info=True) - return feed, e + # FIXME: just assume is_parallel is always true? + retrieve = partial(self.retrieve_fn, is_parallel=is_parallel) with self.session_factory.persistent(): # if stuff hangs weirdly during debugging, change this to builtins.map @@ -131,26 +117,16 @@ def retrieve( # however, most of the time is spent in pure-Python code, # which doesn't benefit from the threads on CPython: # https://github.com/lemon24/reader/issues/261#issuecomment-956412131 + parse_results = builtins.map(self.parse_fn, retrieve_results) - for feed, context in retrieve_results: - if isinstance(context, ParseError): - yield feed, context - continue - - if isinstance(context, Exception): # pragma: no cover - raise context - - try: - with context as result: - if not result or isinstance(result, ParseError): - yield feed, result - continue - - yield feed, self.parse(feed.url, result) - - except ParseError as e: - log.debug("parse() exception, traceback follows", exc_info=True) - yield feed, e + # interestingly, if we "yield from ..." instead of + # "for x in ...: yield x", mypy 1.11 does not complain + # about yielding ParseResult[Exception] + for result in parse_results: + if isinstance(result.value, Exception): + if not isinstance(result.value, ParseError): + raise result.value + yield cast(ParseResult[F, ParseError], result) def __call__( self, @@ -177,14 +153,38 @@ def __call__( ParseError """ - feed = FeedArgumentTuple(url, http_etag, http_last_modified) + feed = FeedForUpdate( + url, http_etag=http_etag, http_last_modified=http_last_modified + ) # is_parallel=True ensures the parser tests cover more code - ((_, result),) = self.parallel([feed], is_parallel=True) + (result,) = self.parallel([feed], is_parallel=True) + value = result.value + + if isinstance(value, Exception): + raise value + return value + + def retrieve_fn( + self, feed: F, is_parallel: bool + ) -> RetrieveResult[F, Any, Exception]: + """:meth:`retrieve` wrapper used by :meth:`parallel`. - if isinstance(result, Exception): - raise result - return result + Takes one argument and does not raise exceptions. + + """ + try: + return RetrieveResult( + feed, + self.retrieve( + feed.url, feed.http_etag, feed.http_last_modified, is_parallel + ), + ) + except Exception as e: + # pass around *all* exception types, + # unhandled exceptions get swallowed by the thread otherwise + log.debug("retrieve() exception, traceback follows", exc_info=True) + return RetrieveResult(feed, e) def retrieve( self, @@ -192,7 +192,7 @@ def retrieve( http_etag: str | None = None, http_last_modified: str | None = None, is_parallel: bool = False, - ) -> ContextManager[RetrieveResult[Any] | None]: + ) -> ContextManager[RetrievedFeed[Any]]: """Retrieve a feed. Args: @@ -207,8 +207,7 @@ def retrieve( Returns: contextmanager(RetrieveResult or None): - A context manager that has as target either the result - or :const:`None`, if the feed didn't change. + A context manager with the retrieved feed as target. Raises: ParseError @@ -230,48 +229,99 @@ def retrieve( retriever = self.get_retriever(url) + return self._retrieve( + retriever, url, http_etag, http_last_modified, http_accept, is_parallel + ) + + @contextmanager + def _retrieve( + self, + retriever: RetrieverType[Any], + url: str, + http_etag: str | None, + http_last_modified: str | None, + http_accept: str | None, + is_parallel: bool, + ) -> Iterator[RetrievedFeed[Any]]: with wrap_exceptions(url, 'during retriever'): context = retriever(url, http_etag, http_last_modified, http_accept) - context = wrap_cm_exceptions(context, url, 'during retriever') + with context as feed: + if not isinstance(feed, RetrievedFeed): + feed = RetrievedFeed(feed) + + # FIXME: move slow_to_read on RetrievedFeed + + if not (is_parallel and retriever.slow_to_read): + yield feed + return + + # Ensure we read everything *before* yielding the response, + # i.e. __enter__() does most of the work. + # + # Gives a ~20% speed improvement over yielding response.raw + # when updating many feeds in parallel, + # with a 2-8% increase in memory usage: + # https://github.com/lemon24/reader/issues/261#issuecomment-956303210 + # + # SpooledTemporaryFile() is just as fast as TemporaryFile(): + # https://github.com/lemon24/reader/issues/261#issuecomment-957469041 + + with tempfile.TemporaryFile() as temp: + shutil.copyfileobj(feed.resource, temp) + temp.seek(0) + yield feed._replace(resource=temp) + + def parse_fn( + self, result: RetrieveResult[F, Any, Exception] + ) -> ParseResult[F, Exception]: + """:meth:`parse` wrapper used by :meth:`parallel`. + + Takes one argument and does not raise exceptions. - if not (is_parallel and retriever.slow_to_read): - return context + """ + feed = result.feed + context = result.value - # Ensure we read everything *before* yielding the response, - # i.e. __enter__() does most of the work. - # - # Gives a ~20% speed improvement over yielding response.raw - # when updating many feeds in parallel, - # with a 2-8% increase in memory usage: - # https://github.com/lemon24/reader/issues/261#issuecomment-956303210 - # - # SpooledTemporaryFile() is just as fast as TemporaryFile(): - # https://github.com/lemon24/reader/issues/261#issuecomment-957469041 + http_info = None + + value: ParsedFeed | None | Exception + try: + if isinstance(context, Exception): + raise context + + with context as retrieved: + http_info = retrieved.http_info + value = self.parse(feed.url, retrieved) - with context as result: - if not result: - return nullcontext() + except ParseError as e: + if isinstance(e, NotModified): + value = None + else: + log.debug("parse() exception, traceback follows", exc_info=True) + value = e - temp = tempfile.TemporaryFile() - shutil.copyfileobj(result.resource, temp) - temp.seek(0) + if isinstance(e, RetrieveError): + if not http_info: + http_info = e.http_info - result = result._replace(resource=temp) + except Exception as e: + # pass around *all* exception types, + # unhandled exceptions get swallowed by the thread otherwise + # (not needed now, but for symmetry with retrieve_fn()) + log.debug("parse() exception, traceback follows", exc_info=True) + value = e - @contextmanager - def make_context() -> Iterator[RetrieveResult[Any]]: - assert result is not None, result # for mypy - with wrap_exceptions(url, "while reading feed"), temp: - yield result + return ParseResult(feed, value, http_info) - return make_context() + # FIXME: tests for this error handling + # FIXME: tests for http_info getting set - def parse(self, url: str, result: RetrieveResult[Any]) -> ParsedFeed: + def parse(self, url: str, retrieved: RetrievedFeed[Any]) -> ParsedFeed: """Parse a retrieved feed. Args: url (str): The feed URL. - result (RetrieveResult): A retrieve result. + retrieved (RetrievedFeed): The retrieved feed. Returns: ParsedFeed: The feed and entry data. @@ -280,12 +330,13 @@ def parse(self, url: str, result: RetrieveResult[Any]) -> ParsedFeed: ParseError """ - parser, mime_type = self.get_parser(url, result.mime_type) + parser, mime_type = self.get_parser(url, retrieved.mime_type) + headers = retrieved.http_info.headers if retrieved.http_info else None with wrap_exceptions(url, 'during parser'): - feed, entries = parser(url, result.resource, result.headers) + feed, entries = parser(url, retrieved.resource, headers) entries = list(entries) return ParsedFeed( - feed, entries, result.http_etag, result.http_last_modified, mime_type + feed, entries, retrieved.http_etag, retrieved.http_last_modified, mime_type ) def get_parser( diff --git a/src/reader/_parser/file.py b/src/reader/_parser/file.py index 28574177..569e4871 100644 --- a/src/reader/_parser/file.py +++ b/src/reader/_parser/file.py @@ -8,7 +8,6 @@ from typing import IO from ..exceptions import ParseError -from . import RetrieveResult from . import wrap_exceptions from ._url_utils import extract_path from ._url_utils import resolve_root @@ -31,9 +30,7 @@ def __post_init__(self) -> None: self._normalize_url('known-good-feed-url') @contextmanager - def __call__( - self, url: str, *args: Any, **kwargs: Any - ) -> Iterator[RetrieveResult[IO[bytes]]]: + def __call__(self, url: str, *args: Any, **kwargs: Any) -> Iterator[IO[bytes]]: try: normalized_url = self._normalize_url(url) except ValueError as e: @@ -41,7 +38,7 @@ def __call__( with wrap_exceptions(url, "while reading feed"): with open(normalized_url, 'rb') as file: - yield RetrieveResult(file) + yield file def validate_url(self, url: str) -> None: self._normalize_url(url) diff --git a/src/reader/_parser/http.py b/src/reader/_parser/http.py index 91b4ea0b..c9e268c7 100644 --- a/src/reader/_parser/http.py +++ b/src/reader/_parser/http.py @@ -9,8 +9,10 @@ import requests -from ..exceptions import ParseError -from . import RetrieveResult +from . import HTTPInfo +from . import NotModified +from . import RetrievedFeed +from . import RetrieveError from . import wrap_exceptions from ._http_utils import parse_options_header from .requests import SessionWrapper @@ -42,7 +44,7 @@ def __call__( http_etag: str | None = None, http_last_modified: str | None = None, http_accept: str | None = None, - ) -> Iterator[RetrieveResult[IO[bytes]] | None]: + ) -> Iterator[RetrievedFeed[IO[bytes]]]: request_headers = { # https://tools.ietf.org/html/rfc3229#section-10.5.3 # "Accept-Instance-Manipulation" @@ -63,17 +65,23 @@ def __call__( stream=True, ) + response_headers = response.headers.copy() + http_info = HTTPInfo(response.status_code, response_headers) + try: response.raise_for_status() except Exception as e: - raise ParseError(url, message="bad HTTP status code") from e + response.close() + raise RetrieveError( + url, + message="bad HTTP status code", + http_info=http_info, + ) from e if response.status_code == 304: response.close() - yield None - return + raise NotModified(url, http_info=http_info) - response_headers = response.headers.copy() response_headers.setdefault('content-location', response.url) # https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding @@ -93,12 +101,13 @@ def __call__( mime_type = None with wrap_exceptions(url, "while reading feed"), response: - yield RetrieveResult( + # FIXME: should be wrapped in RetrieveError + yield RetrievedFeed( response.raw, mime_type, http_etag, http_last_modified, - response_headers, + http_info, ) def validate_url(self, url: str) -> None: diff --git a/src/reader/_types.py b/src/reader/_types.py index 3dc8bfc1..de84c08d 100644 --- a/src/reader/_types.py +++ b/src/reader/_types.py @@ -267,26 +267,26 @@ class FeedForUpdate(NamedTuple): url: str #: The date the feed was last updated, according to the feed. - updated: datetime | None + updated: datetime | None = None #: The HTTP ``ETag`` header from the last update. - http_etag: str | None + http_etag: str | None = None #: The HTTP ``Last-Modified`` header from the last update. - http_last_modified: str | None + http_last_modified: str | None = None #: Whether the next update should update *all* entries, #: regardless of their :attr:`hash` or :attr:`updated`. - stale: bool + stale: bool = False #: The date the feed was last updated, according to reader; none if never. - last_updated: datetime | None + last_updated: datetime | None = None #: Whether the feed had an exception at the last update. - last_exception: bool + last_exception: bool = False #: The :attr:`~FeedData.hash` of the corresponding FeedData. - hash: bytes | None + hash: bytes | None = None class EntryForUpdate(NamedTuple): diff --git a/src/reader/_update.py b/src/reader/_update.py index f49f286e..40656787 100644 --- a/src/reader/_update.py +++ b/src/reader/_update.py @@ -8,13 +8,13 @@ from datetime import timezone from functools import partial from itertools import chain -from itertools import starmap from itertools import tee from typing import Any from typing import NamedTuple from typing import Optional from typing import TYPE_CHECKING +from ._parser import ParseResult from ._types import EntryData from ._types import EntryForUpdate from ._types import EntryUpdateIntent @@ -398,7 +398,7 @@ def parser_process_feeds_for_update( try: yield self.reader._parser.process_feed_for_update(feed) except ParseError as e: - parser_process_feeds_for_update_errors.append((feed, e)) + parser_process_feeds_for_update_errors.append(ParseResult(feed, e)) # assemble pipeline feeds_for_update = self.reader._storage.get_feeds_for_update(filter) @@ -409,7 +409,7 @@ def parser_process_feeds_for_update( feeds_for_update, self.map, is_parallel ) parse_results = chain(parse_results, parser_process_feeds_for_update_errors) - update_results = starmap(process_parse_result, parse_results) + update_results = map(process_parse_result, parse_results) for url, value in update_results: if isinstance(value, FeedNotFoundError): @@ -425,9 +425,11 @@ def parser_process_feeds_for_update( def process_parse_result( self, config: UpdateConfig, - feed: FeedForUpdate, - result: ParsedFeed | None | ParseError, + result: ParseResult[FeedForUpdate, ParseError], ) -> tuple[str, UpdatedFeed | None | Exception]: + feed = result.feed + value = result.value + # TODO: don't duplicate code from update() # TODO: the feed tag value should come from get_feeds_for_update() config_key = self.reader.make_reader_reserved_name(CONFIG_KEY) @@ -439,15 +441,15 @@ def process_parse_result( self.reader._now(), self.global_now, config, - result, + value, ) try: # assemble pipeline - if result and not isinstance(result, Exception): - entry_pairs = self.get_entry_pairs(result) + if value and not isinstance(value, Exception): + entry_pairs = self.get_entry_pairs(value) entry_pairs = self.reader._parser.process_entry_pairs( - feed.url, result.mime_type, entry_pairs + feed.url, value.mime_type, entry_pairs ) entry_pairs, get_total_count = count_consumed(entry_pairs) else: @@ -461,8 +463,8 @@ def process_parse_result( except Exception as e: return feed.url, e - if not result or isinstance(result, Exception): - return feed.url, result + if not value or isinstance(value, Exception): + return feed.url, value return feed.url, UpdatedFeed(feed.url, *counts, total - sum(counts)) diff --git a/tests/fakeparser.py b/tests/fakeparser.py index a767e0de..25fd15ce 100644 --- a/tests/fakeparser.py +++ b/tests/fakeparser.py @@ -8,7 +8,8 @@ import reader._parser from reader import ParseError -from reader._parser import RetrieveResult +from reader._parser import NotModified +from reader._parser import RetrievedFeed from reader._types import EntryData from reader._types import FeedData from reader._types import ParsedFeed @@ -93,6 +94,8 @@ def __call__(self, url, http_etag, http_last_modified): raise NotImplementedError parallel = reader._parser.Parser.parallel + retrieve_fn = reader._parser.Parser.retrieve_fn + parse_fn = reader._parser.Parser.parse_fn class session_factory: persistent = staticmethod(nullcontext) @@ -105,11 +108,11 @@ def retrieve(self, url, http_etag, http_last_modified, is_parallel): except Exception as e: raise ParseError(url) from e if self.is_not_modified: - return nullcontext(None) - return nullcontext(RetrieveResult(BytesIO(b'opaque'))) + raise NotModified(url) + return nullcontext(RetrievedFeed(BytesIO(b'opaque'))) - def parse(self, url, result): - assert result.resource.read() == b'opaque', result + def parse(self, url, retrieved): + assert retrieved.resource.read() == b'opaque', retrieved for feed_number, feed in self.feeds.items(): if feed.url == url: diff --git a/tests/test_parser.py b/tests/test_parser.py index b554a632..58dc1e03 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -9,9 +9,10 @@ from reader import Feed from reader._parser import default_parser -from reader._parser import FeedArgumentTuple +from reader._parser import FeedForUpdate +from reader._parser import HTTPInfo from reader._parser import Parser -from reader._parser import RetrieveResult +from reader._parser import RetrievedFeed from reader._parser.feedparser import FeedparserParser from reader._parser.file import FileRetriever from reader._parser.jsonfeed import JSONFeedParser @@ -466,7 +467,7 @@ def req_plugin(session, request, **kwargs): parse.session_factory.request_hooks.append(req_plugin) feeds = [ - FeedArgumentTuple(make_http_url(data_dir.joinpath(name))) + FeedForUpdate(make_http_url(data_dir.joinpath(name))) for name in ('empty.atom', 'empty.rss') ] list(parse.parallel(feeds)) @@ -921,7 +922,8 @@ def make_dummy_retriever(name, mime_type='type/subtype', headers=None): @contextmanager def retriever(url, http_etag, http_last_modified, http_accept): retriever.last_http_accept = http_accept - yield RetrieveResult(name, mime_type, http_etag, http_last_modified, headers) + http_info = HTTPInfo(200, headers) + yield RetrievedFeed(name, mime_type, http_etag, http_last_modified, http_info) retriever.slow_to_read = False return retriever diff --git a/tests/test_reader_private.py b/tests/test_reader_private.py index a2115a5e..6d5df10d 100644 --- a/tests/test_reader_private.py +++ b/tests/test_reader_private.py @@ -12,7 +12,7 @@ from reader import FeedNotFoundError from reader import make_reader from reader import ParseError -from reader._parser import RetrieveResult +from reader._parser import RetrievedFeed from reader._types import EntryData from reader._types import FeedData from reader._types import FeedFilter @@ -160,7 +160,7 @@ def __call__(self, url, http_etag, *_): @contextmanager def _make_cm(self, url, http_etag): self.after_enter(url) - yield RetrieveResult( + yield RetrievedFeed( io.BytesIO(b'file'), 'x.test', http_etag=http_etag.upper() if http_etag else http_etag,