From d780b2ea2a75e84aa1ab726d35284e303b6b3dd1 Mon Sep 17 00:00:00 2001 From: lemon24 Date: Tue, 17 Sep 2024 21:43:25 +0300 Subject: [PATCH] parser: s/http_accept/accept/. #307 --- CHANGES.rst | 13 +++++------ docs/internal.rst | 2 +- src/reader/_parser/__init__.py | 24 +++++++++----------- src/reader/_parser/_lazy.py | 38 ++++++++++++++++---------------- src/reader/_parser/feedparser.py | 2 +- src/reader/_parser/http.py | 6 ++--- src/reader/_parser/jsonfeed.py | 2 +- tests/test_parser.py | 18 +++++++-------- tests/test_reader_private.py | 2 +- 9 files changed, 51 insertions(+), 56 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bf5ef6f7..d9db682a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,14 +11,13 @@ Version 3.15 Unreleased -* Update the parser API to expose HTTP information to the updater. (:issue:`307`) +* Update the (unstable) parser API to expose HTTP information to the updater, + plus additional clean-ups. (:issue:`307`) - .. note:: - - The (unstable) :class:`.RetrieverType` protocol used by retrievers changed. - - * Allow retrievers to store arbitrary caching data via - :attr:`.RetrievedFeed.caching_info`. + * The :class:`.RetrieverType` protocol used by retrievers changed + (new return type, allow storing arbitrary caching data via + :attr:`~.RetrievedFeed.caching_info`). + * The ``HTTPAcceptParserType`` was renamed to :class:`.AcceptParserType`. Version 3.14 diff --git a/docs/internal.rst b/docs/internal.rst index 405cf4f1..e2b375c9 100644 --- a/docs/internal.rst +++ b/docs/internal.rst @@ -70,7 +70,7 @@ Protocols :members: :special-members: __call__ -.. autoclass:: HTTPAcceptParserType +.. autoclass:: AcceptParserType :members: :show-inheritance: diff --git a/src/reader/_parser/__init__.py b/src/reader/_parser/__init__.py index c35df4ff..719d9c61 100644 --- a/src/reader/_parser/__init__.py +++ b/src/reader/_parser/__init__.py @@ -146,13 +146,13 @@ def mount_retriever(self, prefix: str, retriever: RetrieverType[Any]) -> None: self._lazy_call('mount_retriever', prefix, retriever) def mount_parser_by_mime_type( - self, parser: ParserType[Any], http_accept: str | None = None + self, parser: ParserType[Any], accept: str | None = None ) -> None: # duplicate Parser check (fail early) - if not http_accept: # pragma: no cover - if not isinstance(parser, HTTPAcceptParserType): - raise TypeError("unaware parser type with no http_accept given") - self._lazy_call('mount_parser_by_mime_type', parser, http_accept) + if not accept: # pragma: no cover + if not isinstance(parser, AcceptParserType): + raise TypeError("unaware parser type with no accept given") + self._lazy_call('mount_parser_by_mime_type', parser, accept) def mount_parser_by_url(self, url: str, parser: ParserType[Any]) -> None: self._lazy_call('mount_parser_by_url', url, parser) @@ -263,11 +263,7 @@ class RetrieverType(Protocol[T_co]): # pragma: no cover """A callable that knows how to retrieve a feed.""" def __call__( - self, - url: str, - caching_info: JSONType | None, - # FIXME also s/http_accept/accept/ - http_accept: str | None, + self, url: str, caching_info: JSONType | None, accept: str | None ) -> ContextManager[RetrievedFeed[T_co] | T_co]: """Retrieve a feed. @@ -275,7 +271,7 @@ def __call__( feed (str): The feed URL. caching_info (JSONType or None): :attr:`~RetrievedFeed.caching_info` from the last update. - http_accept (str or None): + accept (str or None): Content types to be retrieved, as an HTTP ``Accept`` header. Returns: @@ -380,11 +376,11 @@ def __call__( @runtime_checkable -class HTTPAcceptParserType(ParserType[T_cv], Protocol): # pragma: no cover - """A :class:`ParserType` that knows what content it can handle.""" +class AcceptParserType(ParserType[T_cv], Protocol): # pragma: no cover + """A :class:`ParserType` that knows what content types it can handle.""" @property - def http_accept(self) -> str: + def accept(self) -> str: """The content types this parser supports, as an ``Accept`` HTTP header value. diff --git a/src/reader/_parser/_lazy.py b/src/reader/_parser/_lazy.py index 1cc648ce..3c3c02a9 100644 --- a/src/reader/_parser/_lazy.py +++ b/src/reader/_parser/_lazy.py @@ -17,11 +17,11 @@ from ..exceptions import InvalidFeedURLError from ..exceptions import ParseError from ..types import JSONType +from . import AcceptParserType from . import EntryPair from . import EntryPairsParserType from . import F from . import FeedForUpdateRetrieverType -from . import HTTPAcceptParserType from . import NotModified from . import ParsedFeed from . import ParseResult @@ -201,21 +201,21 @@ def retrieve( """ parser = self.get_parser_by_url(url) - http_accept: str | None + accept: str | None if not parser: - http_accept = unparse_accept_header( + accept = unparse_accept_header( (mime_type, quality) for mime_type, parsers in self.parsers_by_mime_type.items() for quality, _ in parsers ) else: # URL parsers get the default session / requests Accept (*/*); - # later, we may use parser.http_accept, if it exists, but YAGNI - http_accept = None + # later, we may use parser.accept, if it exists, but YAGNI + accept = None retriever = self.get_retriever(url) - return self._retrieve(retriever, url, caching_info, http_accept) + return self._retrieve(retriever, url, caching_info, accept) @contextmanager def _retrieve( @@ -223,10 +223,10 @@ def _retrieve( retriever: RetrieverType[Any], url: str, caching_info: JSONType | None, - http_accept: str | None, + accept: str | None, ) -> Iterator[RetrievedFeed[Any]]: with wrap_exceptions(url, 'during retriever'): - context = retriever(url, caching_info, http_accept) + context = retriever(url, caching_info, accept) with context as feed: if not isinstance(feed, RetrievedFeed): feed = RetrievedFeed(feed) @@ -413,31 +413,31 @@ def get_retriever(self, url: str) -> RetrieverType[Any]: raise ParseError(url, message="no retriever for URL") def mount_parser_by_mime_type( - self, parser: ParserType[Any], http_accept: str | None = None + self, parser: ParserType[Any], accept: str | None = None ) -> None: """Register a parser to one or more MIME types. Args: parser (ParserType): The parser. - http_accept (str or None): + accept (str or None): The content types the parser supports, - as an ``Accept`` HTTP header value. + as an HTTP ``Accept`` header. If not given, use the parser's - :attr:`~HTTPAcceptParserType.http_accept` attribute, + :attr:`~AcceptParserType.accept` attribute, if it has one. Raises: TypeError: The parser does not have an - :attr:`~HTTPAcceptParserType.http_accept` attribute, - and no ``http_accept`` was given. + :attr:`~AcceptParserType.accept` attribute, + and no ``accept`` was given. """ - if not http_accept: - if not isinstance(parser, HTTPAcceptParserType): - raise TypeError("unaware parser type with no http_accept given") - http_accept = parser.http_accept + if not accept: + if not isinstance(parser, AcceptParserType): + raise TypeError("unaware parser type with no accept given") + accept = parser.accept - for mime_type, quality in parse_accept_header(http_accept): + for mime_type, quality in parse_accept_header(accept): if not quality: continue diff --git a/src/reader/_parser/feedparser.py b/src/reader/_parser/feedparser.py index fbb61e06..c6c9c23f 100644 --- a/src/reader/_parser/feedparser.py +++ b/src/reader/_parser/feedparser.py @@ -30,7 +30,7 @@ class FeedparserParser: # The wildcard gets added back explicitly later on. - http_accept = unparse_accept_header( + accept = unparse_accept_header( (v, q) for v, q in parse_accept_header(feedparser.http.ACCEPT_HEADER) if v != '*/*' diff --git a/src/reader/_parser/http.py b/src/reader/_parser/http.py index eb094f71..836ad149 100644 --- a/src/reader/_parser/http.py +++ b/src/reader/_parser/http.py @@ -43,7 +43,7 @@ def __call__( self, url: str, caching_info: Any = None, - http_accept: str | None = None, + accept: str | None = None, ) -> Iterator[RetrievedFeed[IO[bytes]]]: request_headers = { # https://tools.ietf.org/html/rfc3229#section-10.5.3 @@ -52,8 +52,8 @@ def __call__( # https://www.ctrl.blog/entry/feed-caching.html 'A-IM': 'feed', } - if http_accept: - request_headers['Accept'] = http_accept + if accept: + request_headers['Accept'] = accept error = RetrieveError(url) diff --git a/src/reader/_parser/jsonfeed.py b/src/reader/_parser/jsonfeed.py index fb763c83..35f593c4 100644 --- a/src/reader/_parser/jsonfeed.py +++ b/src/reader/_parser/jsonfeed.py @@ -27,7 +27,7 @@ class JSONFeedParser: """https://jsonfeed.org/version/1.1""" - http_accept = 'application/feed+json,application/json;q=0.9' + accept = 'application/feed+json,application/json;q=0.9' def __call__( self, diff --git a/tests/test_parser.py b/tests/test_parser.py index b05b5940..41dc6289 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -986,8 +986,8 @@ def test_parser_mount_order(): def make_dummy_retriever(name, mime_type='type/subtype', headers=None): @contextmanager - def retriever(url, caching_info, http_accept): - retriever.last_http_accept = http_accept + def retriever(url, caching_info, accept): + retriever.last_accept = accept http_info = HTTPInfo(200, headers) yield RetrievedFeed(name, mime_type, caching_info, http_info) @@ -995,13 +995,13 @@ def retriever(url, caching_info, http_accept): return retriever -def make_dummy_parser(prefix='', http_accept=None): +def make_dummy_parser(prefix='', accept=None): def parser(url, file, headers): parser.last_headers = headers return prefix + file, [url] - if http_accept: - parser.http_accept = http_accept + if accept: + parser.accept = accept return parser @@ -1025,7 +1025,7 @@ def test_parser_selection(): 'type/http', 'caching', ) - assert http_retriever.last_http_accept == 'type/http' + assert http_retriever.last_accept == 'type/http' assert http_parser.last_headers == 'headers' # this should not get in the way of anything else; @@ -1047,7 +1047,7 @@ def test_parser_selection(): 'type/file', 'caching', ) - assert file_retriever.last_http_accept == 'type/http,type/file,text/plain;q=0.8' + assert file_retriever.last_accept == 'type/http,type/file,text/plain;q=0.8' assert file_parser.last_headers is None with pytest.raises(ParseError) as excinfo: @@ -1080,10 +1080,10 @@ def test_parser_selection(): None, ) assert parse('unkn:one') == ('fallbackp-unkn', ['unkn:one'], 'type/unknown', None) - assert nomt_retriever.last_http_accept == 'type/http,type/file,text/plain;q=0.8,*/*' + assert nomt_retriever.last_accept == 'type/http,type/file,text/plain;q=0.8,*/*' assert parse('file:o') == ('urlp-file', ['file:o'], 'type/file', None) - assert file_retriever.last_http_accept is None + assert file_retriever.last_accept is None # this assert is commented because the selected retriever # depends on urlunparse() behavior, which in turn depends diff --git a/tests/test_reader_private.py b/tests/test_reader_private.py index dff59856..99c118a5 100644 --- a/tests/test_reader_private.py +++ b/tests/test_reader_private.py @@ -179,7 +179,7 @@ def process_feed_for_update(self, feed): class CustomParser: - http_accept = 'x.test' + accept = 'x.test' def __call__(self, url, file, headers): self.in_call(url)