Skip to content

Commit

Permalink
parser: s/http_accept/accept/. #307
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed Sep 17, 2024
1 parent 5ac548c commit d780b2e
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 56 deletions.
13 changes: 6 additions & 7 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@ Version 3.15

Unreleased

* Update the parser API to expose HTTP information to the updater. (:issue:`307`)
* Update the (unstable) parser API to expose HTTP information to the updater,
plus additional clean-ups. (:issue:`307`)

.. note::

The (unstable) :class:`.RetrieverType` protocol used by retrievers changed.

* Allow retrievers to store arbitrary caching data via
:attr:`.RetrievedFeed.caching_info`.
* The :class:`.RetrieverType` protocol used by retrievers changed
(new return type, allow storing arbitrary caching data via
:attr:`~.RetrievedFeed.caching_info`).
* The ``HTTPAcceptParserType`` was renamed to :class:`.AcceptParserType`.


Version 3.14
Expand Down
2 changes: 1 addition & 1 deletion docs/internal.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Protocols
:members:
:special-members: __call__

.. autoclass:: HTTPAcceptParserType
.. autoclass:: AcceptParserType
:members:
:show-inheritance:

Expand Down
24 changes: 10 additions & 14 deletions src/reader/_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@ def mount_retriever(self, prefix: str, retriever: RetrieverType[Any]) -> None:
self._lazy_call('mount_retriever', prefix, retriever)

def mount_parser_by_mime_type(
self, parser: ParserType[Any], http_accept: str | None = None
self, parser: ParserType[Any], accept: str | None = None
) -> None:
# duplicate Parser check (fail early)
if not http_accept: # pragma: no cover
if not isinstance(parser, HTTPAcceptParserType):
raise TypeError("unaware parser type with no http_accept given")
self._lazy_call('mount_parser_by_mime_type', parser, http_accept)
if not accept: # pragma: no cover
if not isinstance(parser, AcceptParserType):
raise TypeError("unaware parser type with no accept given")
self._lazy_call('mount_parser_by_mime_type', parser, accept)

def mount_parser_by_url(self, url: str, parser: ParserType[Any]) -> None:
self._lazy_call('mount_parser_by_url', url, parser)
Expand Down Expand Up @@ -263,19 +263,15 @@ class RetrieverType(Protocol[T_co]): # pragma: no cover
"""A callable that knows how to retrieve a feed."""

def __call__(
self,
url: str,
caching_info: JSONType | None,
# FIXME also s/http_accept/accept/
http_accept: str | None,
self, url: str, caching_info: JSONType | None, accept: str | None
) -> ContextManager[RetrievedFeed[T_co] | T_co]:
"""Retrieve a feed.
Args:
feed (str): The feed URL.
caching_info (JSONType or None):
:attr:`~RetrievedFeed.caching_info` from the last update.
http_accept (str or None):
accept (str or None):
Content types to be retrieved, as an HTTP ``Accept`` header.
Returns:
Expand Down Expand Up @@ -380,11 +376,11 @@ def __call__(


@runtime_checkable
class HTTPAcceptParserType(ParserType[T_cv], Protocol): # pragma: no cover
"""A :class:`ParserType` that knows what content it can handle."""
class AcceptParserType(ParserType[T_cv], Protocol): # pragma: no cover
"""A :class:`ParserType` that knows what content types it can handle."""

@property
def http_accept(self) -> str:
def accept(self) -> str:
"""The content types this parser supports,
as an ``Accept`` HTTP header value.
Expand Down
38 changes: 19 additions & 19 deletions src/reader/_parser/_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
from ..exceptions import InvalidFeedURLError
from ..exceptions import ParseError
from ..types import JSONType
from . import AcceptParserType
from . import EntryPair
from . import EntryPairsParserType
from . import F
from . import FeedForUpdateRetrieverType
from . import HTTPAcceptParserType
from . import NotModified
from . import ParsedFeed
from . import ParseResult
Expand Down Expand Up @@ -201,32 +201,32 @@ def retrieve(
"""
parser = self.get_parser_by_url(url)

http_accept: str | None
accept: str | None
if not parser:
http_accept = unparse_accept_header(
accept = unparse_accept_header(
(mime_type, quality)
for mime_type, parsers in self.parsers_by_mime_type.items()
for quality, _ in parsers
)
else:
# URL parsers get the default session / requests Accept (*/*);
# later, we may use parser.http_accept, if it exists, but YAGNI
http_accept = None
# later, we may use parser.accept, if it exists, but YAGNI
accept = None

retriever = self.get_retriever(url)

return self._retrieve(retriever, url, caching_info, http_accept)
return self._retrieve(retriever, url, caching_info, accept)

@contextmanager
def _retrieve(
self,
retriever: RetrieverType[Any],
url: str,
caching_info: JSONType | None,
http_accept: str | None,
accept: str | None,
) -> Iterator[RetrievedFeed[Any]]:
with wrap_exceptions(url, 'during retriever'):
context = retriever(url, caching_info, http_accept)
context = retriever(url, caching_info, accept)
with context as feed:
if not isinstance(feed, RetrievedFeed):
feed = RetrievedFeed(feed)
Expand Down Expand Up @@ -413,31 +413,31 @@ def get_retriever(self, url: str) -> RetrieverType[Any]:
raise ParseError(url, message="no retriever for URL")

def mount_parser_by_mime_type(
self, parser: ParserType[Any], http_accept: str | None = None
self, parser: ParserType[Any], accept: str | None = None
) -> None:
"""Register a parser to one or more MIME types.
Args:
parser (ParserType): The parser.
http_accept (str or None):
accept (str or None):
The content types the parser supports,
as an ``Accept`` HTTP header value.
as an HTTP ``Accept`` header.
If not given, use the parser's
:attr:`~HTTPAcceptParserType.http_accept` attribute,
:attr:`~AcceptParserType.accept` attribute,
if it has one.
Raises:
TypeError: The parser does not have an
:attr:`~HTTPAcceptParserType.http_accept` attribute,
and no ``http_accept`` was given.
:attr:`~AcceptParserType.accept` attribute,
and no ``accept`` was given.
"""
if not http_accept:
if not isinstance(parser, HTTPAcceptParserType):
raise TypeError("unaware parser type with no http_accept given")
http_accept = parser.http_accept
if not accept:
if not isinstance(parser, AcceptParserType):
raise TypeError("unaware parser type with no accept given")
accept = parser.accept

for mime_type, quality in parse_accept_header(http_accept):
for mime_type, quality in parse_accept_header(accept):
if not quality:
continue

Expand Down
2 changes: 1 addition & 1 deletion src/reader/_parser/feedparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

class FeedparserParser:
# The wildcard gets added back explicitly later on.
http_accept = unparse_accept_header(
accept = unparse_accept_header(
(v, q)
for v, q in parse_accept_header(feedparser.http.ACCEPT_HEADER)
if v != '*/*'
Expand Down
6 changes: 3 additions & 3 deletions src/reader/_parser/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __call__(
self,
url: str,
caching_info: Any = None,
http_accept: str | None = None,
accept: str | None = None,
) -> Iterator[RetrievedFeed[IO[bytes]]]:
request_headers = {
# https://tools.ietf.org/html/rfc3229#section-10.5.3
Expand All @@ -52,8 +52,8 @@ def __call__(
# https://www.ctrl.blog/entry/feed-caching.html
'A-IM': 'feed',
}
if http_accept:
request_headers['Accept'] = http_accept
if accept:
request_headers['Accept'] = accept

error = RetrieveError(url)

Expand Down
2 changes: 1 addition & 1 deletion src/reader/_parser/jsonfeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
class JSONFeedParser:
"""https://jsonfeed.org/version/1.1"""

http_accept = 'application/feed+json,application/json;q=0.9'
accept = 'application/feed+json,application/json;q=0.9'

def __call__(
self,
Expand Down
18 changes: 9 additions & 9 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,22 +986,22 @@ def test_parser_mount_order():

def make_dummy_retriever(name, mime_type='type/subtype', headers=None):
@contextmanager
def retriever(url, caching_info, http_accept):
retriever.last_http_accept = http_accept
def retriever(url, caching_info, accept):
retriever.last_accept = accept
http_info = HTTPInfo(200, headers)
yield RetrievedFeed(name, mime_type, caching_info, http_info)

retriever.slow_to_read = False
return retriever


def make_dummy_parser(prefix='', http_accept=None):
def make_dummy_parser(prefix='', accept=None):
def parser(url, file, headers):
parser.last_headers = headers
return prefix + file, [url]

if http_accept:
parser.http_accept = http_accept
if accept:
parser.accept = accept

return parser

Expand All @@ -1025,7 +1025,7 @@ def test_parser_selection():
'type/http',
'caching',
)
assert http_retriever.last_http_accept == 'type/http'
assert http_retriever.last_accept == 'type/http'
assert http_parser.last_headers == 'headers'

# this should not get in the way of anything else;
Expand All @@ -1047,7 +1047,7 @@ def test_parser_selection():
'type/file',
'caching',
)
assert file_retriever.last_http_accept == 'type/http,type/file,text/plain;q=0.8'
assert file_retriever.last_accept == 'type/http,type/file,text/plain;q=0.8'
assert file_parser.last_headers is None

with pytest.raises(ParseError) as excinfo:
Expand Down Expand Up @@ -1080,10 +1080,10 @@ def test_parser_selection():
None,
)
assert parse('unkn:one') == ('fallbackp-unkn', ['unkn:one'], 'type/unknown', None)
assert nomt_retriever.last_http_accept == 'type/http,type/file,text/plain;q=0.8,*/*'
assert nomt_retriever.last_accept == 'type/http,type/file,text/plain;q=0.8,*/*'

assert parse('file:o') == ('urlp-file', ['file:o'], 'type/file', None)
assert file_retriever.last_http_accept is None
assert file_retriever.last_accept is None

# this assert is commented because the selected retriever
# depends on urlunparse() behavior, which in turn depends
Expand Down
2 changes: 1 addition & 1 deletion tests/test_reader_private.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def process_feed_for_update(self, feed):


class CustomParser:
http_accept = 'x.test'
accept = 'x.test'

def __call__(self, url, file, headers):
self.in_call(url)
Expand Down

0 comments on commit d780b2e

Please sign in to comment.