From 2d13425bbd6d321afbf6047e2ec86761bbd8ada1 Mon Sep 17 00:00:00 2001 From: barrust Date: Wed, 3 Jan 2024 23:52:15 -0500 Subject: [PATCH 01/17] black formatting and isort --- mediawiki/__init__.py | 14 +++++++------- mediawiki/mediawikipage.py | 10 ++++++---- mediawiki/utilities.py | 2 +- scripts/generate_test_data.py | 17 +++++++++++------ 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/mediawiki/__init__.py b/mediawiki/__init__.py index d3b83f4..0219034 100644 --- a/mediawiki/__init__.py +++ b/mediawiki/__init__.py @@ -1,19 +1,19 @@ """ mediawiki module initialization """ -from .mediawiki import MediaWiki, URL, VERSION -from .mediawikipage import MediaWikiPage from .exceptions import ( - MediaWikiException, - PageError, - MediaWikiGeoCoordError, - RedirectError, DisambiguationError, - MediaWikiAPIURLError, HTTPTimeoutError, + MediaWikiAPIURLError, MediaWikiCategoryTreeError, + MediaWikiException, + MediaWikiGeoCoordError, MediaWikiLoginError, + PageError, + RedirectError, ) +from .mediawiki import URL, VERSION, MediaWiki +from .mediawikipage import MediaWikiPage __author__ = "Tyler Barrus" __maintainer__ = "Tyler Barrus" diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 081440e..dc22208 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -4,19 +4,21 @@ # MIT License # Author: Tyler Barrus (barrust@gmail.com) -from decimal import Decimal import re from collections import OrderedDict +from decimal import Decimal + from bs4 import BeautifulSoup, Tag -from .utilities import str_or_unicode, is_relative_url + from .exceptions import ( + ODD_ERROR_MESSAGE, + DisambiguationError, MediaWikiBaseException, MediaWikiException, PageError, RedirectError, - DisambiguationError, - ODD_ERROR_MESSAGE, ) +from .utilities import is_relative_url, str_or_unicode class MediaWikiPage(object): diff --git a/mediawiki/utilities.py b/mediawiki/utilities.py index f468f75..be94467 100644 --- a/mediawiki/utilities.py +++ b/mediawiki/utilities.py @@ -1,9 +1,9 @@ """ Utility functions """ -import sys import functools import inspect +import sys import time diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index d724dc5..dc8ed2b 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -1,16 +1,21 @@ ''' Generate data for tests ''' -import sys +import json import os +import sys from datetime import timedelta -import json from decimal import Decimal -sys.path.insert(0, '../mediawiki') -from mediawiki import (MediaWiki, PageError, RedirectError, - DisambiguationError, MediaWikiAPIURLError, - MediaWikiGeoCoordError) +sys.path.insert(0, '../mediawiki') +from mediawiki import ( + DisambiguationError, + MediaWiki, + MediaWikiAPIURLError, + MediaWikiGeoCoordError, + PageError, + RedirectError, +) # set up the json objects REQUESTS_FILE = './tests/mock_requests.json' From 1a385cd1a72b20d2619dfd5b398a670b931a8ac5 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 00:06:24 -0500 Subject: [PATCH 02/17] non relative import paths --- mediawiki/__init__.py | 6 +- mediawiki/exceptions.py | 149 ++++++------ mediawiki/mediawiki.py | 338 ++++++++++++++------------ mediawiki/mediawikipage.py | 165 ++++++------- mediawiki/utilities.py | 25 +- scripts/generate_test_data.py | 446 +++++++++++++++++----------------- tests/mediawiki_test.py | 28 ++- 7 files changed, 585 insertions(+), 572 deletions(-) diff --git a/mediawiki/__init__.py b/mediawiki/__init__.py index 0219034..8ba98d8 100644 --- a/mediawiki/__init__.py +++ b/mediawiki/__init__.py @@ -1,7 +1,7 @@ """ mediawiki module initialization """ -from .exceptions import ( +from mediawiki.exceptions import ( DisambiguationError, HTTPTimeoutError, MediaWikiAPIURLError, @@ -12,8 +12,8 @@ PageError, RedirectError, ) -from .mediawiki import URL, VERSION, MediaWiki -from .mediawikipage import MediaWikiPage +from mediawiki.mediawiki import URL, VERSION, MediaWiki +from mediawiki.mediawikipage import MediaWikiPage __author__ = "Tyler Barrus" __maintainer__ = "Tyler Barrus" diff --git a/mediawiki/exceptions.py b/mediawiki/exceptions.py index de5f049..8525e74 100644 --- a/mediawiki/exceptions.py +++ b/mediawiki/exceptions.py @@ -1,7 +1,9 @@ """ MediaWiki Exceptions """ -from .utilities import str_or_unicode +from typing import List, Optional + +from mediawiki.utilities import str_or_unicode ODD_ERROR_MESSAGE = ( "This should not happen. If the MediaWiki site you are " @@ -11,12 +13,12 @@ class MediaWikiBaseException(Exception): - """ Base MediaWikiException + """Base MediaWikiException - Args: - message: The message of the exception """ + Args: + message: The message of the exception""" - def __init__(self, message): + def __init__(self, message: str): self._message = message super(MediaWikiBaseException, self).__init__(self.message) @@ -27,63 +29,55 @@ def __str__(self): return str_or_unicode(self.__unicode__()) @property - def message(self): - """ str: The MediaWiki exception message """ + def message(self) -> str: + """str: The MediaWiki exception message""" return self._message class MediaWikiException(MediaWikiBaseException): - """ MediaWiki Exception Class + """MediaWiki Exception Class - Args: - error (str): The error message that the MediaWiki site returned """ + Args: + error (str): The error message that the MediaWiki site returned""" - def __init__(self, error): + def __init__(self, error: str): self._error = error - msg = ('An unknown error occurred: "{0}". Please report it on GitHub!').format( - self.error - ) + msg = ('An unknown error occurred: "{0}". Please report it on GitHub!').format(self.error) super(MediaWikiException, self).__init__(msg) @property - def error(self): - """ str: The error message that the MediaWiki site returned """ + def error(self) -> str: + """str: The error message that the MediaWiki site returned""" return self._error class PageError(MediaWikiBaseException): - """ Exception raised when no MediaWiki page matched a query + """Exception raised when no MediaWiki page matched a query - Args: - title (str): Title of the page - pageid (int): MediaWiki page id of the page""" + Args: + title (str): Title of the page + pageid (int): MediaWiki page id of the page""" - def __init__(self, title=None, pageid=None): + def __init__(self, title: Optional[str] = None, pageid: Optional[int] = None): if title: self._title = title - msg = ('"{0}" does not match any pages. Try another query!').format( - self.title - ) + msg = ('"{0}" does not match any pages. Try another query!').format(self.title) elif pageid: self._pageid = pageid - msg = ('Page id "{0}" does not match any pages. Try another id!').format( - self.pageid - ) + msg = ('Page id "{0}" does not match any pages. Try another id!').format(self.pageid) else: self._title = "" - msg = ('"{0}" does not match any pages. Try another query!').format( - self.title - ) + msg = ('"{0}" does not match any pages. Try another query!').format(self.title) super(PageError, self).__init__(msg) @property - def title(self): - """ str: The title that caused the page error """ + def title(self) -> str: + """str: The title that caused the page error""" return self._title @property - def pageid(self): - """ int: The page id that caused the page error """ + def pageid(self) -> int: + """int: The page id that caused the page error""" return self._pageid @@ -97,18 +91,17 @@ class RedirectError(MediaWikiBaseException): This should only occur if both auto_suggest and redirect \ are set to **False** """ - def __init__(self, title): + def __init__(self, title: str): self._title = title msg = ( - '"{0}" resulted in a redirect. Set the redirect property to True ' - "to allow automatic redirects." + '"{0}" resulted in a redirect. Set the redirect property to True ' "to allow automatic redirects." ).format(self.title) super(RedirectError, self).__init__(msg) @property - def title(self): - """ str: The title that was redirected """ + def title(self) -> str: + """str: The title that was redirected""" return self._title @@ -125,50 +118,48 @@ class DisambiguationError(MediaWikiBaseException): `options` only includes titles that link to valid \ MediaWiki pages """ - def __init__(self, title, may_refer_to, url, details=None): + def __init__(self, title: str, may_refer_to: List[str], url: str, details: Optional[List[str]] = None): self._title = title self._unordered_options = may_refer_to self._options = sorted(may_refer_to) self._details = details self._url = url - msg = ('\n"{0}" may refer to: \n ' "{1}").format( - self.title, "\n ".join(self.options) - ) + msg = ('\n"{0}" may refer to: \n ' "{1}").format(self.title, "\n ".join(self.options)) super(DisambiguationError, self).__init__(msg) @property - def url(self): - """ str: The url, if possible, of the disambiguation page """ + def url(self) -> str: + """str: The url, if possible, of the disambiguation page""" return self._url @property - def title(self): - """ str: The title of the page """ + def title(self) -> str: + """str: The title of the page""" return self._title @property - def options(self): - """ list: The list of possible page titles """ + def options(self) -> List[str]: + """list: The list of possible page titles""" return self._options @property - def unordered_options(self): + def unordered_options(self) -> List[str]: """list: The list of possible page titles, un-sorted in an attempt to get them as they showup on the page""" return self._unordered_options @property - def details(self): - """ list: The details of the proposed non-disambigous pages """ + def details(self) -> Optional[List[str]]: + """list: The details of the proposed non-disambigous pages""" return self._details class HTTPTimeoutError(MediaWikiBaseException): - """ Exception raised when a request to the Mediawiki site times out. + """Exception raised when a request to the Mediawiki site times out. - Args: - query (str): The query that timed out""" + Args: + query (str): The query that timed out""" - def __init__(self, query): + def __init__(self, query: str): self._query = query msg = ( 'Searching for "{0}" resulted in a timeout. ' @@ -178,25 +169,25 @@ def __init__(self, query): super(HTTPTimeoutError, self).__init__(msg) @property - def query(self): - """ str: The query that timed out """ + def query(self) -> str: + """str: The query that timed out""" return self._query class MediaWikiAPIURLError(MediaWikiBaseException): - """ Exception raised when the MediaWiki server does not support the API + """Exception raised when the MediaWiki server does not support the API - Args: - api_url (str): The API URL that was not recognized """ + Args: + api_url (str): The API URL that was not recognized""" - def __init__(self, api_url): + def __init__(self, api_url: str): self._api_url = api_url msg = "{0} is not a valid MediaWiki API URL".format(self.api_url) super(MediaWikiAPIURLError, self).__init__(msg) @property - def api_url(self): - """ str: The api url that raised the exception """ + def api_url(self) -> str: + """str: The api url that raised the exception""" return self._api_url @@ -207,7 +198,7 @@ class MediaWikiGeoCoordError(MediaWikiBaseException): error (str): Error message from the MediaWiki site related to \ GeoCoordinates """ - def __init__(self, error): + def __init__(self, error: str): self._error = error msg = ( "GeoData search resulted in the following error: {0}" @@ -216,19 +207,19 @@ def __init__(self, error): super(MediaWikiGeoCoordError, self).__init__(msg) @property - def error(self): - """ str: The error that was thrown when pulling GeoCoordinates """ + def error(self) -> str: + """str: The error that was thrown when pulling GeoCoordinates""" return self._error class MediaWikiCategoryTreeError(MediaWikiBaseException): - """ Exception when the category tree is unable to complete for an unknown - reason + """Exception when the category tree is unable to complete for an unknown + reason - Args: - category (str): The category that threw an exception """ + Args: + category (str): The category that threw an exception""" - def __init__(self, category): + def __init__(self, category: str): self._category = category msg = ( "Categorytree threw an exception for trying to get the " @@ -239,23 +230,23 @@ def __init__(self, category): super(MediaWikiCategoryTreeError, self).__init__(msg) @property - def category(self): + def category(self) -> str: """ str: The category that threw an exception during category tree \ generation """ return self._category class MediaWikiLoginError(MediaWikiBaseException): - """ Exception raised when unable to login to the MediaWiki site + """Exception raised when unable to login to the MediaWiki site - Args: - error (str): The error message that the MediaWiki site returned """ + Args: + error (str): The error message that the MediaWiki site returned""" - def __init__(self, error): + def __init__(self, error: str): self._error = error super(MediaWikiLoginError, self).__init__(error) @property - def error(self): - """ str: The error message that the MediaWiki site returned """ + def error(self) -> str: + """str: The error message that the MediaWiki site returned""" return self._error diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 7e4f199..6ecca3c 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -12,7 +12,7 @@ import requests import requests.exceptions as rex -from .exceptions import ( +from mediawiki.exceptions import ( HTTPTimeoutError, MediaWikiAPIURLError, MediaWikiCategoryTreeError, @@ -21,6 +21,7 @@ MediaWikiLoginError, PageError, ) + from .mediawikipage import MediaWikiPage from .utilities import memoize @@ -85,7 +86,7 @@ def __init__( proxies=None, verify_ssl=True, ): - """ Init Function """ + """Init Function""" self._version = VERSION self._lang = lang.lower() self._api_url = url.format(lang=self._lang) @@ -133,57 +134,57 @@ def __init__( # non-settable properties @property def version(self): - """ str: The version of the pymediawiki library + """str: The version of the pymediawiki library - Note: - Not settable """ + Note: + Not settable""" return self._version @property def api_version(self): - """ str: API Version of the MediaWiki site + """str: API Version of the MediaWiki site - Note: - Not settable """ + Note: + Not settable""" return self._api_version_str @property def base_url(self): - """ str: Base URL for the MediaWiki site + """str: Base URL for the MediaWiki site - Note: - Not settable """ + Note: + Not settable""" return self._base_url @property def extensions(self): - """ list: Extensions installed on the MediaWiki site + """list: Extensions installed on the MediaWiki site - Note: - Not settable """ + Note: + Not settable""" return self._extensions # settable properties @property def rate_limit(self): - """ bool: Turn on or off Rate Limiting """ + """bool: Turn on or off Rate Limiting""" return self._rate_limit @rate_limit.setter def rate_limit(self, rate_limit): - """ Turn on or off rate limiting """ + """Turn on or off rate limiting""" self._rate_limit = bool(rate_limit) self._rate_limit_last_call = None self.clear_memoized() @property def proxies(self): - """ dict: Turn on, off, or set proxy use with the Requests library """ + """dict: Turn on, off, or set proxy use with the Requests library""" return self._proxies @proxies.setter def proxies(self, proxies): - """ Turn on, off, or set proxy use through the Requests library """ + """Turn on, off, or set proxy use through the Requests library""" if proxies and isinstance(proxies, dict): self._proxies = proxies else: @@ -198,34 +199,34 @@ def use_cache(self): @use_cache.setter def use_cache(self, use_cache): - """ toggle using the cache or not """ + """toggle using the cache or not""" self._use_cache = bool(use_cache) @property def rate_limit_min_wait(self): - """ timedelta: Time to wait between calls + """timedelta: Time to wait between calls - Note: - Only used if rate_limit is **True** """ + Note: + Only used if rate_limit is **True**""" return self._min_wait @rate_limit_min_wait.setter def rate_limit_min_wait(self, min_wait): - """ Set minimum wait to use for rate limiting """ + """Set minimum wait to use for rate limiting""" self._min_wait = min_wait self._rate_limit_last_call = None @property def timeout(self): - """ float: Response timeout for API requests + """float: Response timeout for API requests - Note: - Use **None** for no response timeout """ + Note: + Use **None** for no response timeout""" return self._timeout @timeout.setter def timeout(self, timeout): - """ Set request timeout in seconds (or fractions of a second) """ + """Set request timeout in seconds (or fractions of a second)""" if timeout is None: self._timeout = None # no timeout @@ -234,12 +235,12 @@ def timeout(self, timeout): @property def verify_ssl(self): - """ bool | str: Verify SSL when using requests or path to cert file """ + """bool | str: Verify SSL when using requests or path to cert file""" return self._verify_ssl @verify_ssl.setter def verify_ssl(self, verify_ssl): - """ Set request verify SSL parameter; defaults to True if issue """ + """Set request verify SSL parameter; defaults to True if issue""" self._verify_ssl = True if isinstance(verify_ssl, (bool, str)): self._verify_ssl = verify_ssl @@ -259,7 +260,7 @@ def language(self): @language.setter def language(self, lang): - """ Set the language to use; attempts to change the API URL """ + """Set the language to use; attempts to change the API URL""" lang = lang.lower() if self._lang == lang: return @@ -273,15 +274,15 @@ def language(self, lang): @property def category_prefix(self): - """ str: The category prefix to use when using category based functions + """str: The category prefix to use when using category based functions - Note: - Use the correct category name for the language selected """ + Note: + Use the correct category name for the language selected""" return self._cat_prefix @category_prefix.setter def category_prefix(self, prefix): - """ Set the category prefix correctly """ + """Set the category prefix correctly""" if prefix[-1:] == ":": prefix = prefix[:-1] self._cat_prefix = prefix @@ -305,29 +306,29 @@ def user_agent(self, user_agent): @property def api_url(self): - """ str: API URL of the MediaWiki site + """str: API URL of the MediaWiki site - Note: - Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`""" + Note: + Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`""" return self._api_url @property def memoized(self): - """ dict: Return the memoize cache + """dict: Return the memoize cache - Note: - Not settable; see - :py:func:`mediawiki.MediaWiki.clear_memoized` """ + Note: + Not settable; see + :py:func:`mediawiki.MediaWiki.clear_memoized`""" return self._cache @property def refresh_interval(self): - """ int: The interval at which the memoize cache is to be refresh """ + """int: The interval at which the memoize cache is to be refresh""" return self._refresh_interval @refresh_interval.setter def refresh_interval(self, refresh_interval): - """ Set the new cache refresh interval """ + """Set the new cache refresh interval""" if isinstance(refresh_interval, int) and refresh_interval > 0: self._refresh_interval = refresh_interval else: @@ -380,7 +381,11 @@ def login(self, username, password, strict=True): # non-properties def set_api_url( - self, api_url="https://{lang}.wikipedia.org/w/api.php", lang="en", username=None, password=None, + self, + api_url="https://{lang}.wikipedia.org/w/api.php", + lang="en", + username=None, + password=None, ): """ Set the API URL and language @@ -412,7 +417,7 @@ def set_api_url( self.clear_memoized() def _reset_session(self): - """ Set session information """ + """Set session information""" if self._session: self._session.close() @@ -425,17 +430,17 @@ def _reset_session(self): self._is_logged_in = False def clear_memoized(self): - """ Clear memoized (cached) values """ + """Clear memoized (cached) values""" if hasattr(self, "_cache"): self._cache.clear() # non-setup functions @property def supported_languages(self): - """ dict: All supported language prefixes on the MediaWiki site + """dict: All supported language prefixes on the MediaWiki site - Note: - Not Settable """ + Note: + Not Settable""" if self.__supported_languages is None: res = self.wiki_request({"meta": "siteinfo", "siprop": "languages"}) tmp = res["query"]["languages"] @@ -445,10 +450,10 @@ def supported_languages(self): @property def available_languages(self): - """ dict: All available language prefixes on the MediaWiki site + """dict: All available language prefixes on the MediaWiki site - Note: - Not Settable """ + Note: + Not Settable""" if self.__available_languages is None: available = {} for lang in self.supported_languages: @@ -462,16 +467,16 @@ def available_languages(self): @property def logged_in(self): - """ bool: Returns if logged into the MediaWiki site """ + """bool: Returns if logged into the MediaWiki site""" return self._is_logged_in def random(self, pages=1): - """ Request a random page title or list of random titles + """Request a random page title or list of random titles - Args: - pages (int): Number of random pages to return - Returns: - list or int: A list of random page titles or a random page title if pages = 1 """ + Args: + pages (int): Number of random pages to return + Returns: + list or int: A list of random page titles or a random page title if pages = 1""" if pages is None or pages < 1: raise ValueError("Number of pages must be greater than 0") @@ -486,15 +491,15 @@ def random(self, pages=1): @memoize def allpages(self, query="", results=10): - """ Request all pages from mediawiki instance + """Request all pages from mediawiki instance - Args: - query (str): Search string to use for pulling pages - results (int): The number of pages to return - Returns: - list: The pages that meet the search query - Note: - Could add ability to continue past the limit of 500 + Args: + query (str): Search string to use for pulling pages + results (int): The number of pages to return + Returns: + list: The pages that meet the search query + Note: + Could add ability to continue past the limit of 500 """ max_pull = 500 limit = min(results, max_pull) if results is not None else max_pull @@ -509,16 +514,16 @@ def allpages(self, query="", results=10): @memoize def search(self, query, results=10, suggestion=False): - """ Search for similar titles + """Search for similar titles - Args: - query (str): Page title - results (int): Number of pages to return - suggestion (bool): Use suggestion - Returns: - tuple or list: tuple (list results, suggestion) if suggestion is **True**; list of results otherwise - Note: - Could add ability to continue past the limit of 500 + Args: + query (str): Page title + results (int): Number of pages to return + suggestion (bool): Use suggestion + Returns: + tuple or list: tuple (list results, suggestion) if suggestion is **True**; list of results otherwise + Note: + Could add ability to continue past the limit of 500 """ self._check_query(query, "Query must be specified") @@ -550,13 +555,13 @@ def search(self, query, results=10, suggestion=False): @memoize def suggest(self, query): - """ Gather suggestions based on the provided title or None if no - suggestions found + """Gather suggestions based on the provided title or None if no + suggestions found - Args: - query (str): Page title - Returns: - String or None: Suggested page title or **None** if no suggestion found + Args: + query (str): Page title + Returns: + String or None: Suggested page title or **None** if no suggestion found """ res, suggest = self.search(query, results=1, suggestion=True) try: @@ -567,30 +572,36 @@ def suggest(self, query): @memoize def geosearch( - self, latitude=None, longitude=None, radius=1000, title=None, auto_suggest=True, results=10, + self, + latitude=None, + longitude=None, + radius=1000, + title=None, + auto_suggest=True, + results=10, ): - """ Search for pages that relate to the provided geocoords or near - the page + """Search for pages that relate to the provided geocoords or near + the page - Args: - latitude (Decimal or None): Latitude geocoord; must be coercible to decimal - longitude (Decimal or None): Longitude geocoord; must be coercible to decimal - radius (int): Radius around page or geocoords to pull back; in meters - title (str): Page title to use as a geocoordinate; this has precedence over lat/long - auto_suggest (bool): Auto-suggest the page title - results (int): Number of pages within the radius to return - Returns: - list: A listing of page titles - Note: - The Geosearch API does not support pulling more than the maximum of 500 - Note: - If the page doesn't match the provided title, try setting auto_suggest to `False` - Raises: - ValueError: If either the passed latitude or longitude are not coercible to a Decimal + Args: + latitude (Decimal or None): Latitude geocoord; must be coercible to decimal + longitude (Decimal or None): Longitude geocoord; must be coercible to decimal + radius (int): Radius around page or geocoords to pull back; in meters + title (str): Page title to use as a geocoordinate; this has precedence over lat/long + auto_suggest (bool): Auto-suggest the page title + results (int): Number of pages within the radius to return + Returns: + list: A listing of page titles + Note: + The Geosearch API does not support pulling more than the maximum of 500 + Note: + If the page doesn't match the provided title, try setting auto_suggest to `False` + Raises: + ValueError: If either the passed latitude or longitude are not coercible to a Decimal """ def test_lat_long(val): - """ handle testing lat and long """ + """handle testing lat and long""" if not isinstance(val, Decimal): error = ( "Latitude and Longitude must be specified either as " @@ -625,18 +636,18 @@ def test_lat_long(val): @memoize def opensearch(self, query, results=10, redirect=True): - """ Execute a MediaWiki opensearch request, similar to search box - suggestions and conforming to the OpenSearch specification + """Execute a MediaWiki opensearch request, similar to search box + suggestions and conforming to the OpenSearch specification - Args: - query (str): Title to search for - results (int): Number of pages within the radius to return - redirect (bool): If **False** return the redirect itself, otherwise resolve redirects - Returns: - List: List of results that are stored in a tuple (Title, Summary, URL) - Note: - The Opensearch API does not support pulling more than the maximum of 500 - Raises: + Args: + query (str): Title to search for + results (int): Number of pages within the radius to return + redirect (bool): If **False** return the redirect itself, otherwise resolve redirects + Returns: + List: List of results that are stored in a tuple (Title, Summary, URL) + Note: + The Opensearch API does not support pulling more than the maximum of 500 + Raises: """ self._check_query(query, "Query must be specified") @@ -717,16 +728,16 @@ def summary(self, title, sentences=0, chars=0, auto_suggest=True, redirect=True) @memoize def categorymembers(self, category, results=10, subcategories=True): - """ Get information about a category: pages and subcategories + """Get information about a category: pages and subcategories - Args: - category (str): Category name - results (int): Number of result - subcategories (bool): Include subcategories (**True**) or not (**False**) - Returns: - Tuple or List: Either a tuple ([pages], [subcategories]) or just the list of pages - Note: - Set results to **None** to get all results """ + Args: + category (str): Category name + results (int): Number of result + subcategories (bool): Include subcategories (**True**) or not (**False**) + Returns: + Tuple or List: Either a tuple ([pages], [subcategories]) or just the list of pages + Note: + Set results to **None** to get all results""" self._check_query(category, "Category must be specified") max_pull = 500 @@ -783,28 +794,28 @@ def categorymembers(self, category, results=10, subcategories=True): return pages def categorytree(self, category, depth=5): - """ Generate the Category Tree for the given categories + """Generate the Category Tree for the given categories - Args: - category(str or list of strings): Category name(s) - depth(int): Depth to traverse the tree - Returns: - dict: Category tree structure - Note: - Set depth to **None** to get the whole tree - Note: - Return Data Structure: Subcategory contains the same recursive structure - - >>> { - 'category': { - 'depth': Number, - 'links': list, - 'parent-categories': list, - 'sub-categories': dict - } + Args: + category(str or list of strings): Category name(s) + depth(int): Depth to traverse the tree + Returns: + dict: Category tree structure + Note: + Set depth to **None** to get the whole tree + Note: + Return Data Structure: Subcategory contains the same recursive structure + + >>> { + 'category': { + 'depth': Number, + 'links': list, + 'parent-categories': list, + 'sub-categories': dict } + } - .. versionadded:: 0.3.10 """ + .. versionadded:: 0.3.10""" # make it simple to use both a list or a single category term cats = [category] if not isinstance(category, list) else category @@ -820,22 +831,22 @@ def categorytree(self, category, depth=5): return results def page(self, title=None, pageid=None, auto_suggest=True, redirect=True, preload=False): - """ Get MediaWiki page based on the provided title or pageid + """Get MediaWiki page based on the provided title or pageid - Args: - title (str): Page title - pageid (int): MediaWiki page identifier - auto-suggest (bool): **True:** Allow page title auto-suggest - redirect (bool): **True:** Follow page redirects - preload (bool): **True:** Load most page properties - Raises: - ValueError: when title is blank or None and no pageid is provided - Raises: - :py:func:`mediawiki.exceptions.PageError`: if page does not exist - Note: - Title takes precedence over pageid if both are provided - Note: - If the page doesn't match the provided title, try setting auto_suggest to `False`""" + Args: + title (str): Page title + pageid (int): MediaWiki page identifier + auto-suggest (bool): **True:** Allow page title auto-suggest + redirect (bool): **True:** Follow page redirects + preload (bool): **True:** Load most page properties + Raises: + ValueError: when title is blank or None and no pageid is provided + Raises: + :py:func:`mediawiki.exceptions.PageError`: if page does not exist + Note: + Title takes precedence over pageid if both are provided + Note: + If the page doesn't match the provided title, try setting auto_suggest to `False`""" if (title is None or title.strip() == "") and pageid is None: raise ValueError("Either a title or a pageid must be specified") if title: @@ -879,7 +890,7 @@ def wiki_request(self, params): # Protected functions def _get_site_info(self): - """ Parse out the Wikimedia site information including API Version and Extensions """ + """Parse out the Wikimedia site information including API Version and Extensions""" response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"}) # parse what we need out here! @@ -915,7 +926,7 @@ def _get_site_info(self): @staticmethod def _check_error_response(response, query): - """ check for default error messages and throw correct exception """ + """check for default error messages and throw correct exception""" if "error" in response: http_error = ["HTTP request timed out.", "Pool queue is full"] geo_error = [ @@ -932,7 +943,7 @@ def _check_error_response(response, query): @staticmethod def _check_query(value, message): - """ check if the query is 'valid' """ + """check if the query is 'valid'""" if value is None or value.strip() == "": raise ValueError(message) @@ -952,7 +963,7 @@ def __category_parameter_verification(cats, depth, category): raise ValueError(msg) def __cat_tree_rec(self, cat, depth, tree, level, categories, links): - """ recursive function to build out the tree """ + """recursive function to build out the tree""" tree[cat] = dict() tree[cat]["depth"] = level tree[cat]["sub-categories"] = dict() @@ -990,18 +1001,23 @@ def __cat_tree_rec(self, cat, depth, tree, level, categories, links): else: for ctg in links[cat][1]: self.__cat_tree_rec( - ctg, depth, tree[cat]["sub-categories"], level + 1, categories, links, + ctg, + depth, + tree[cat]["sub-categories"], + level + 1, + categories, + links, ) def _get_response(self, params): - """ wrap the call to the requests package """ + """wrap the call to the requests package""" try: return self._session.get(self._api_url, params=params, timeout=self._timeout).json() except JSONDecodeError: return {} def _post_response(self, params): - """ wrap a post call to the requests package """ + """wrap a post call to the requests package""" try: return self._session.post(self._api_url, data=params, timeout=self._timeout).json() except JSONDecodeError: diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index dc22208..6123dd2 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, Tag -from .exceptions import ( +from mediawiki.exceptions import ( ODD_ERROR_MESSAGE, DisambiguationError, MediaWikiBaseException, @@ -18,7 +18,7 @@ PageError, RedirectError, ) -from .utilities import is_relative_url, str_or_unicode +from mediawiki.utilities import is_relative_url, str_or_unicode class MediaWikiPage(object): @@ -45,6 +45,7 @@ class MediaWikiPage(object): Warning: This should never need to be used directly! Please use \ :func:`mediawiki.MediaWiki.page` """ + __slots__ = [ "mediawiki", "url", @@ -82,7 +83,6 @@ def __init__( preload=False, original_title="", ): - self.mediawiki = mediawiki self.url = None if title is not None: @@ -131,34 +131,31 @@ def __init__( if preload: for prop in preload_props: getattr(self, prop) + # end __init__ def __repr__(self): - """ repr """ + """repr""" return self.__str__() def __unicode__(self): - """ python 2.7 unicode """ + """python 2.7 unicode""" return """""".format(self.title) def __str__(self): - """ python > 3 unicode python 2.7 byte str """ + """python > 3 unicode python 2.7 byte str""" return str_or_unicode(self.__unicode__()) def __eq__(self, other): - """ base eq function """ + """base eq function""" try: - return ( - self.pageid == other.pageid - and self.title == other.title - and self.url == other.url - ) + return self.pageid == other.pageid and self.title == other.title and self.url == other.url except AttributeError: return False # Properties def _pull_content_revision_parent(self): - """ combine the pulling of these three properties """ + """combine the pulling of these three properties""" if self._revision_id is None: query_params = { @@ -173,55 +170,55 @@ def _pull_content_revision_parent(self): self._revision_id = page_info["revisions"][0]["revid"] self._parent_id = page_info["revisions"][0]["parentid"] - if self._content is None and 'TextExtracts' not in self.mediawiki.extensions: + if self._content is None and "TextExtracts" not in self.mediawiki.extensions: msg = "Unable to extract page content; the TextExtracts extension must be installed!" raise MediaWikiBaseException(msg) return self._content, self._revision_id, self._parent_id @property def content(self): - """ str: The page content in text format + """str: The page content in text format - Note: - Not settable - Note: - Side effect is to also get revision_id and parent_id """ + Note: + Not settable + Note: + Side effect is to also get revision_id and parent_id""" if self._content is None: self._pull_content_revision_parent() return self._content @property def revision_id(self): - """ int: The current revision id of the page + """int: The current revision id of the page - Note: - Not settable - Note: - Side effect is to also get content and parent_id """ + Note: + Not settable + Note: + Side effect is to also get content and parent_id""" if self._revision_id is None: self._pull_content_revision_parent() return self._revision_id @property def parent_id(self): - """ int: The parent id of the page + """int: The parent id of the page - Note: - Not settable - Note: - Side effect is to also get content and revision_id """ + Note: + Not settable + Note: + Side effect is to also get content and revision_id""" if self._parent_id is None: self._pull_content_revision_parent() return self._parent_id @property def html(self): - """ str: HTML representation of the page + """str: HTML representation of the page - Note: - Not settable - Warning: - This can be slow for very large pages """ + Note: + Not settable + Warning: + This can be slow for very large pages""" if self._html is False: self._html = None query_params = { @@ -238,10 +235,10 @@ def html(self): @property def wikitext(self): - """ str: Wikitext representation of the page + """str: Wikitext representation of the page - Note: - Not settable """ + Note: + Not settable""" if self._wikitext is None: query_params = { "action": "parse", @@ -255,10 +252,10 @@ def wikitext(self): @property def images(self): - """ list: Images on the page + """list: Images on the page - Note: - Not settable """ + Note: + Not settable""" if self._images is None: self._images = list() params = { @@ -298,14 +295,14 @@ def logos(self): @property def hatnotes(self): - """ list: Parse hatnotes from the HTML + """list: Parse hatnotes from the HTML - Note: - Not settable - Note: - Side effect is to also pull the html which can be slow - Note: - This is a parsing operation and not part of the standard API""" + Note: + Not settable + Note: + Side effect is to also pull the html which can be slow + Note: + This is a parsing operation and not part of the standard API""" if self._hatnotes is None: self._hatnotes = list() # Cache the results of parsing the html, so that multiple calls happen much faster @@ -339,10 +336,10 @@ def references(self): @property def categories(self): - """ list: Non-hidden categories on the page + """list: Non-hidden categories on the page - Note: - Not settable """ + Note: + Not settable""" if self._categories is None: self._categories = list() self.__pull_combined_properties() @@ -364,10 +361,10 @@ def coordinates(self): @property def links(self): - """ list: List of all MediaWiki page links on the page + """list: List of all MediaWiki page links on the page - Note: - Not settable """ + Note: + Not settable""" if self._links is None: self._links = list() self.__pull_combined_properties() @@ -387,10 +384,10 @@ def redirects(self): @property def backlinks(self): - """ list: Pages that link to this page + """list: Pages that link to this page - Note: - Not settable """ + Note: + Not settable""" if self._backlinks is None: self._backlinks = list() params = { @@ -430,12 +427,12 @@ def langlinks(self): @property def preview(self): - """ dict: Page preview information that builds the preview hover """ + """dict: Page preview information that builds the preview hover""" if self._preview is None: params = { "action": "query", "formatversion": "2", - "prop":"info|extracts|pageimages|revisions|pageterms|coordinates", + "prop": "info|extracts|pageimages|revisions|pageterms|coordinates", "exsentences": "5", "explaintext": "true", "piprop": "thumbnail|original", @@ -451,10 +448,10 @@ def preview(self): @property def summary(self): - """ str: Default page summary + """str: Default page summary - Note: - Not settable """ + Note: + Not settable""" if self._summary is None: self.__pull_combined_properties() return self._summary @@ -487,10 +484,10 @@ def summarize(self, sentences=0, chars=0): @property def sections(self): - """ list: Table of contents sections + """list: Table of contents sections - Note: - Not settable """ + Note: + Not settable""" # NOTE: Due to MediaWiki sites adding superscripts or italics or bold # information in the sections, moving to regex to get the # `non-decorated` name instead of using the query api! @@ -500,12 +497,12 @@ def sections(self): @property def table_of_contents(self): - """ OrderedDict: Dictionary of sections and sub-sections + """OrderedDict: Dictionary of sections and sub-sections - Note: - Leaf nodes are empty OrderedDict objects - Note: - Not Settable""" + Note: + Leaf nodes are empty OrderedDict objects + Note: + Not Settable""" if self._table_of_contents is None: self._parse_sections() @@ -600,7 +597,7 @@ def parse_section_links(self, section_title): # Protected Methods def __load(self, redirect=True, preload=False): - """ load the basic page information """ + """load the basic page information""" query_params = { "prop": "info|pageprops", "inprop": "url", @@ -631,13 +628,13 @@ def __load(self, redirect=True, preload=False): self.url = page["fullurl"] def _raise_page_error(self): - """ raise the correct type of page error """ + """raise the correct type of page error""" if hasattr(self, "title"): raise PageError(title=self.title) raise PageError(pageid=self.pageid) def _raise_disambiguation_error(self, page, pageid): - """ parse and throw a disambiguation error """ + """parse and throw a disambiguation error""" query_params = { "prop": "revisions", "rvprop": "content", @@ -649,9 +646,7 @@ def _raise_disambiguation_error(self, page, pageid): html = request["query"]["pages"][pageid]["revisions"][0]["*"] lis = BeautifulSoup(html, "html.parser").find_all("li") - filtered_lis = [ - li for li in lis if "tocsection" not in "".join(li.get("class", list())) - ] + filtered_lis = [li for li in lis if "tocsection" not in "".join(li.get("class", list()))] may_refer_to = [li.a.get_text() for li in filtered_lis if li.a] disambiguation = list() @@ -673,7 +668,7 @@ def _raise_disambiguation_error(self, page, pageid): ) def _handle_redirect(self, redirect, preload, query, page): - """ handle redirect """ + """handle redirect""" if redirect: redirects = query["redirects"][0] @@ -701,8 +696,8 @@ def _handle_redirect(self, redirect, preload, query, page): raise RedirectError(getattr(self, "title", page["title"])) def _continued_query(self, query_params, key="pages"): - """ Based on - https://www.mediawiki.org/wiki/API:Query#Continuing_queries """ + """Based on + https://www.mediawiki.org/wiki/API:Query#Continuing_queries""" query_params.update(self.__title_query_param()) last_cont = dict() @@ -734,7 +729,7 @@ def _continued_query(self, query_params, key="pages"): last_cont = request["continue"] def _parse_section_links(self, id_tag): - """ given a section id, parse the links in the unordered list """ + """given a section id, parse the links in the unordered list""" all_links = list() if id_tag is None: @@ -775,7 +770,7 @@ def _parse_section_links(self, id_tag): return all_links def __parse_link_info(self, link): - """ parse the tag for the link """ + """parse the tag for the link""" href = link.get("href", "") txt = link.string or href is_rel = is_relative_url(href) @@ -788,7 +783,7 @@ def __parse_link_info(self, link): return txt, tmp def _parse_sections(self): - """ parse sections and TOC """ + """parse sections and TOC""" def _list_to_dict(_dict, path, sec): tmp = _dict @@ -835,13 +830,13 @@ def _list_to_dict(_dict, path, sec): self._table_of_contents = res def __title_query_param(self): - """ util function to determine which parameter method to use """ + """util function to determine which parameter method to use""" if getattr(self, "title", None) is not None: return {"titles": self.title} return {"pageids": self.pageid} def __pull_combined_properties(self): - """ something here... """ + """something here...""" query_params = { "titles": self.title, @@ -915,10 +910,10 @@ def __pull_combined_properties(self): # categories def _get_cat(val): - """ parse the category correctly """ + """parse the category correctly""" tmp = val["title"] if tmp.startswith(self.mediawiki.category_prefix): - return tmp[len(self.mediawiki.category_prefix) + 1:] + return tmp[len(self.mediawiki.category_prefix) + 1 :] return tmp tmp = [_get_cat(link) for link in results.get("categories", list())] diff --git a/mediawiki/utilities.py b/mediawiki/utilities.py index be94467..123a8f5 100644 --- a/mediawiki/utilities.py +++ b/mediawiki/utilities.py @@ -5,10 +5,11 @@ import inspect import sys import time +from typing import Any, Callable, Dict -def parse_all_arguments(func): - """ determine all positional and named arguments as a dict """ +def parse_all_arguments(func: Callable) -> Dict[str, Any]: + """determine all positional and named arguments as a dict""" args = dict() func_args = inspect.signature(func) @@ -19,15 +20,15 @@ def parse_all_arguments(func): return args -def memoize(func): - """ quick memoize decorator for class instance methods - NOTE: this assumes that the class that the functions to be - memoized already has a memoized and refresh_interval - property """ +def memoize(func: Callable) -> Callable: + """quick memoize decorator for class instance methods + NOTE: this assumes that the class that the functions to be + memoized already has a memoized and refresh_interval + property""" @functools.wraps(func) def wrapper(*args, **kwargs): - """ wrap it up and store info in a cache """ + """wrap it up and store info in a cache""" cache = args[0].memoized refresh = args[0].refresh_interval use_cache = args[0].use_cache @@ -66,14 +67,14 @@ def wrapper(*args, **kwargs): return wrapper -def str_or_unicode(text): - """ handle python 3 unicode """ +def str_or_unicode(text: str) -> str: + """handle python 3 unicode""" encoding = sys.stdout.encoding return text.encode(encoding).decode(encoding) -def is_relative_url(url): - """ simple method to determine if a url is relative or absolute """ +def is_relative_url(url: str) -> bool: + """simple method to determine if a url is relative or absolute""" if url.startswith("#"): return None if url.find("://") > 0 or url.startswith("//"): diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index dc8ed2b..f5c2e3d 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -1,13 +1,13 @@ -''' +""" Generate data for tests -''' +""" import json import os import sys from datetime import timedelta from decimal import Decimal -sys.path.insert(0, '../mediawiki') +sys.path.insert(0, "../mediawiki") from mediawiki import ( DisambiguationError, MediaWiki, @@ -18,18 +18,19 @@ ) # set up the json objects -REQUESTS_FILE = './tests/mock_requests.json' -RESPONSES_FILE = './tests/mock_responses.json' -CATTREE_FILE = './tests/mock_categorytree.json' +REQUESTS_FILE = "./tests/mock_requests.json" +RESPONSES_FILE = "./tests/mock_responses.json" +CATTREE_FILE = "./tests/mock_categorytree.json" def capture_response(func): - ''' capture_response decorator to be used for tests ''' + """capture_response decorator to be used for tests""" + def wrapper(*args, **kwargs): - ''' define the actions ''' + """define the actions""" file_path = os.path.abspath(REQUESTS_FILE) if os.path.isfile(file_path): - with open(file_path, 'r') as mock: + with open(file_path, "r") as mock: mock_data = json.load(mock) else: mock_data = dict() @@ -43,31 +44,37 @@ def wrapper(*args, **kwargs): except Exception: res = dict() mock_data[args[0].api_url][new_params] = res - with open(file_path, 'w') as mock: - json.dump(mock_data, mock, ensure_ascii=False, indent=1, - sort_keys=True) + with open(file_path, "w") as mock: + json.dump(mock_data, mock, ensure_ascii=False, indent=1, sort_keys=True) return res + return wrapper class MediaWikiOverloaded(MediaWiki): - ''' overloaded mediawiki class ''' - def __init__(self, url='https://{lang}.wikipedia.org/w/api.php', lang='en', - timeout=None, rate_limit=False, - rate_limit_wait=timedelta(milliseconds=50)): - ''' overloaded init ''' - MediaWiki.__init__(self, url=url, lang=lang, timeout=timeout, - rate_limit=rate_limit, - rate_limit_wait=rate_limit_wait) + """overloaded mediawiki class""" + + def __init__( + self, + url="https://{lang}.wikipedia.org/w/api.php", + lang="en", + timeout=None, + rate_limit=False, + rate_limit_wait=timedelta(milliseconds=50), + ): + """overloaded init""" + MediaWiki.__init__( + self, url=url, lang=lang, timeout=timeout, rate_limit=rate_limit, rate_limit_wait=rate_limit_wait + ) @capture_response def _get_response(self, params): - ''' overloaded response ''' + """overloaded response""" return MediaWiki._get_response(self, params) @capture_response def _post_response(self, params): - ''' overloaded response ''' + """overloaded response""" return MediaWiki._post_response(self, params) @@ -104,11 +111,11 @@ def _post_response(self, params): # make files if they don't exist if not os.path.isfile(REQUESTS_FILE): - with open(REQUESTS_FILE, 'w') as file_handle: + with open(REQUESTS_FILE, "w") as file_handle: json.dump(dict(), file_handle, ensure_ascii=False) if os.path.isfile(RESPONSES_FILE): - with open(RESPONSES_FILE, 'r') as file_handle: + with open(RESPONSES_FILE, "r") as file_handle: responses = json.load(file_handle) else: responses = dict() @@ -116,11 +123,9 @@ def _post_response(self, params): # Begin building out new data objects site = MediaWikiOverloaded() -french_site = MediaWikiOverloaded(url='https://fr.wikipedia.org/w/api.php', - lang='fr') -asoiaf = MediaWikiOverloaded(url='https://awoiaf.westeros.org/api.php', - lang='fr') -# plants = MediaWikiOverloaded(url='https://practicalplants.org/w/api.php') +french_site = MediaWikiOverloaded(url="https://fr.wikipedia.org/w/api.php", lang="fr") +asoiaf = MediaWikiOverloaded(url="https://awoiaf.westeros.org/api.php", lang="fr") +plants = MediaWikiOverloaded(url="https://practicalplants.org/w/api.php") wikipedia = MediaWikiOverloaded() @@ -135,27 +140,27 @@ def _post_response(self, params): # pull in standard information for all sites (every time) if site.api_url not in responses: responses[site.api_url] = dict() -responses[site.api_url]['api'] = site.api_url -responses[site.api_url]['lang'] = site.language -responses[site.api_url]['languages'] = site.supported_languages -responses[site.api_url]['api_version'] = site.api_version -responses[site.api_url]['extensions'] = site.extensions +responses[site.api_url]["api"] = site.api_url +responses[site.api_url]["lang"] = site.language +responses[site.api_url]["languages"] = site.supported_languages +responses[site.api_url]["api_version"] = site.api_version +responses[site.api_url]["extensions"] = site.extensions if french_site.api_url not in responses: responses[french_site.api_url] = dict() -responses[french_site.api_url]['api'] = french_site.api_url -responses[french_site.api_url]['lang'] = french_site.language -responses[french_site.api_url]['languages'] = french_site.supported_languages -responses[french_site.api_url]['api_version'] = french_site.api_version -responses[french_site.api_url]['extensions'] = french_site.extensions +responses[french_site.api_url]["api"] = french_site.api_url +responses[french_site.api_url]["lang"] = french_site.language +responses[french_site.api_url]["languages"] = french_site.supported_languages +responses[french_site.api_url]["api_version"] = french_site.api_version +responses[french_site.api_url]["extensions"] = french_site.extensions if asoiaf.api_url not in responses: responses[asoiaf.api_url] = dict() -responses[asoiaf.api_url]['api'] = asoiaf.api_url -responses[asoiaf.api_url]['lang'] = asoiaf.language -responses[asoiaf.api_url]['languages'] = asoiaf.supported_languages -responses[asoiaf.api_url]['api_version'] = asoiaf.api_version -responses[asoiaf.api_url]['extensions'] = asoiaf.extensions +responses[asoiaf.api_url]["api"] = asoiaf.api_url +responses[asoiaf.api_url]["lang"] = asoiaf.language +responses[asoiaf.api_url]["languages"] = asoiaf.supported_languages +responses[asoiaf.api_url]["api_version"] = asoiaf.api_version +responses[asoiaf.api_url]["extensions"] = asoiaf.extensions # if plants.api_url not in responses: # responses[plants.api_url] = dict() @@ -163,359 +168,356 @@ def _post_response(self, params): print("Completed basic mediawiki information") if PULL_ALL is True or PULL_SEARCHES is True: - res = site.search('chest set', suggestion=False) - responses[site.api_url]['search_without_suggestion'] = res - res = site.search('chest set', suggestion=True) - responses[site.api_url]['search_with_suggestion_found'] = res - res = site.search('chess set', suggestion=True) - responses[site.api_url]['search_with_suggestion_not_found'] = res - res = site.search('chess set', results=505, suggestion=False) - responses[site.api_url]['search_with_suggestion_not_found_large'] = res - res = site.search('chess set', results=3, suggestion=False) - responses[site.api_url]['search_with_suggestion_not_found_small'] = res + res = site.search("chest set", suggestion=False) + responses[site.api_url]["search_without_suggestion"] = res + res = site.search("chest set", suggestion=True) + responses[site.api_url]["search_with_suggestion_found"] = res + res = site.search("chess set", suggestion=True) + responses[site.api_url]["search_with_suggestion_not_found"] = res + res = site.search("chess set", results=505, suggestion=False) + responses[site.api_url]["search_with_suggestion_not_found_large"] = res + res = site.search("chess set", results=3, suggestion=False) + responses[site.api_url]["search_with_suggestion_not_found_small"] = res print("Completed pulling searches") if PULL_ALL is True or PULL_ALLPAGES is True: - res = site.allpages('a') - responses[site.api_url]['all_pages_query_a'] = res + res = site.allpages("a") + responses[site.api_url]["all_pages_query_a"] = res res = site.allpages("a", results=1) - responses[site.api_url]['all_pages_query_a_1'] = res + responses[site.api_url]["all_pages_query_a_1"] = res print("Completed pulling allpages") if PULL_ALL is True or PULL_RANDOM is True: - responses[site.api_url]['random_1'] = site.random(pages=1) - responses[site.api_url]['random_2'] = site.random(pages=2) - responses[site.api_url]['random_10'] = site.random(pages=10) - responses[site.api_url]['random_202'] = site.random(pages=202) + responses[site.api_url]["random_1"] = site.random(pages=1) + responses[site.api_url]["random_2"] = site.random(pages=2) + responses[site.api_url]["random_10"] = site.random(pages=10) + responses[site.api_url]["random_202"] = site.random(pages=202) print("Completed pulling random pages") if PULL_ALL is True or PULL_SUGGEST is True: - responses[site.api_url]['suggest_chest_set'] = site.suggest("chest set") - responses[site.api_url]['suggest_chess_set'] = site.suggest("chess set") - responses[site.api_url]['suggest_new_york'] = site.suggest('new york') - responses[site.api_url]['suggest_yonkers'] = site.suggest('yonkers') - responses[site.api_url]['suggest_no_results'] = site.suggest('gobbilygook') + responses[site.api_url]["suggest_chest_set"] = site.suggest("chest set") + responses[site.api_url]["suggest_chess_set"] = site.suggest("chess set") + responses[site.api_url]["suggest_new_york"] = site.suggest("new york") + responses[site.api_url]["suggest_yonkers"] = site.suggest("yonkers") + responses[site.api_url]["suggest_no_results"] = site.suggest("gobbilygook") print("Completed pulling suggestions") if PULL_ALL is True or PULL_OPENSEARCH is True: - res = site.opensearch('new york') - responses[site.api_url]['opensearch_new_york'] = res - res = site.opensearch('new york', results=5) - responses[site.api_url]['opensearch_new_york_result'] = res - res = site.opensearch('new york', redirect=False) - responses[site.api_url]['opensearch_new_york_redirect'] = res - res = site.opensearch('new york', results=5, redirect=False) - responses[site.api_url]['opensearch_new_york_result_redirect'] = res + res = site.opensearch("new york") + responses[site.api_url]["opensearch_new_york"] = res + res = site.opensearch("new york", results=5) + responses[site.api_url]["opensearch_new_york_result"] = res + res = site.opensearch("new york", redirect=False) + responses[site.api_url]["opensearch_new_york_redirect"] = res + res = site.opensearch("new york", results=5, redirect=False) + responses[site.api_url]["opensearch_new_york_result_redirect"] = res print("Completed pulling open searches") if PULL_ALL is True or PULL_PREFIXSEARCH is True: - responses[site.api_url]['prefixsearch_ar'] = site.prefixsearch('ar') - responses[site.api_url]['prefixsearch_ba'] = site.prefixsearch('ba') - res = site.prefixsearch('ba', results=5) - responses[site.api_url]['prefixsearch_ba_5'] = res - res = site.prefixsearch('ba', results=30) - responses[site.api_url]['prefixsearch_ba_30'] = res + responses[site.api_url]["prefixsearch_ar"] = site.prefixsearch("ar") + responses[site.api_url]["prefixsearch_ba"] = site.prefixsearch("ba") + res = site.prefixsearch("ba", results=5) + responses[site.api_url]["prefixsearch_ba_5"] = res + res = site.prefixsearch("ba", results=30) + responses[site.api_url]["prefixsearch_ba_30"] = res print("Completed pulling prefix searches") if PULL_ALL is True or PULL_GEOSEARCH is True: - res = site.geosearch(latitude=Decimal('0.0'), longitude=Decimal('0.0')) - responses[site.api_url]['geosearch_decimals'] = res - res = site.geosearch(latitude=Decimal('0.0'), longitude=0.0) - responses[site.api_url]['geosearch_mix_types'] = res - res = site.geosearch(title='new york city', latitude=Decimal('-9999999999.999'), - longitude=Decimal('0.0'), results=22, radius=10000) - responses[site.api_url]['geosearch_page_invalid_lat_long'] = res - res = site.geosearch(title='new york city', results=22, radius=10000) - responses[site.api_url]['geosearch_page_radius_results_set'] = res - res = site.geosearch(title='new york city', radius=10000) - responses[site.api_url]['geosearch_page_radius_results'] = res - res = site.geosearch(title='new york city') - responses[site.api_url]['geosearch_page'] = res + res = site.geosearch(latitude=Decimal("0.0"), longitude=Decimal("0.0")) + responses[site.api_url]["geosearch_decimals"] = res + res = site.geosearch(latitude=Decimal("0.0"), longitude=0.0) + responses[site.api_url]["geosearch_mix_types"] = res + res = site.geosearch( + title="new york city", latitude=Decimal("-9999999999.999"), longitude=Decimal("0.0"), results=22, radius=10000 + ) + responses[site.api_url]["geosearch_page_invalid_lat_long"] = res + res = site.geosearch(title="new york city", results=22, radius=10000) + responses[site.api_url]["geosearch_page_radius_results_set"] = res + res = site.geosearch(title="new york city", radius=10000) + responses[site.api_url]["geosearch_page_radius_results"] = res + res = site.geosearch(title="new york city") + responses[site.api_url]["geosearch_page"] = res try: - site.geosearch(latitude=None, longitude=Decimal('0.0'), results=22, - radius=10000) - except (ValueError) as ex: - responses[site.api_url]['invalid_lat_long_value_msg'] = str(ex) + site.geosearch(latitude=None, longitude=Decimal("0.0"), results=22, radius=10000) + except ValueError as ex: + responses[site.api_url]["invalid_lat_long_value_msg"] = str(ex) try: - site.geosearch(latitude=Decimal('-9999999999.999'), - longitude=Decimal('0.0'), results=22, radius=10000) - except (MediaWikiGeoCoordError) as ex: - responses[site.api_url]['invalid_lat_long_geo_msg'] = ex.message + site.geosearch(latitude=Decimal("-9999999999.999"), longitude=Decimal("0.0"), results=22, radius=10000) + except MediaWikiGeoCoordError as ex: + responses[site.api_url]["invalid_lat_long_geo_msg"] = ex.message print("Completed pulling geo search") if PULL_ALL is True or PULL_CATEGORYMEMBERS is True: res = site.categorymembers("Chess", results=15, subcategories=True) - responses[site.api_url]['category_members_with_subcategories'] = res + responses[site.api_url]["category_members_with_subcategories"] = res res = site.categorymembers("Chess", results=15, subcategories=False) - responses[site.api_url]['category_members_without_subcategories'] = res + responses[site.api_url]["category_members_without_subcategories"] = res res = site.categorymembers("Chess", results=5, subcategories=False) - responses[site.api_url]['category_members_without_subcategories_5'] = res - res = site.categorymembers('Disambiguation categories', results=None) - responses[site.api_url]['category_members_very_large'] = res + responses[site.api_url]["category_members_without_subcategories_5"] = res + res = site.categorymembers("Disambiguation categories", results=None) + responses[site.api_url]["category_members_very_large"] = res print("Completed pulling category members") if PULL_ALL is True or PULL_CATEGORYTREE is True: site.rate_limit = True - ct = site.categorytree(['Chess', 'Ebola'], depth=None) - with open(CATTREE_FILE, 'w') as fp: + ct = site.categorytree(["Chess", "Ebola"], depth=None) + with open(CATTREE_FILE, "w") as fp: json.dump(ct, fp, ensure_ascii=False, sort_keys=True) try: - site.categorytree('Chess Ebola', depth=None) + site.categorytree("Chess Ebola", depth=None) except Exception as ex: - responses[site.api_url]['missing_categorytree'] = str(ex) + responses[site.api_url]["missing_categorytree"] = str(ex) site.rate_limit = False print("Completed pulling category tree") if PULL_ALL is True or PULL_SUMMARY is True: - res = site.summary('chess', chars=50) - responses[site.api_url]['summarize_chars_50'] = res - res = site.summary('chess', sentences=5) - responses[site.api_url]['summarize_sent_5'] = res - res = site.summary('chess') - responses[site.api_url]['summarize_first_paragraph'] = res + res = site.summary("chess", chars=50) + responses[site.api_url]["summarize_chars_50"] = res + res = site.summary("chess", sentences=5) + responses[site.api_url]["summarize_sent_5"] = res + res = site.summary("chess") + responses[site.api_url]["summarize_first_paragraph"] = res print("Completed pulling summaries") if PULL_ALL is True or PULL_PAGE_ERRORS is True: try: - site.page('gobbilygook') + site.page("gobbilygook") except PageError as ex: - responses[site.api_url]['page_error_msg'] = ex.message + responses[site.api_url]["page_error_msg"] = ex.message try: - site.page('gobbilygook', auto_suggest=False) + site.page("gobbilygook", auto_suggest=False) except PageError as ex: - responses[site.api_url]['page_error_msg_title'] = ex.message + responses[site.api_url]["page_error_msg_title"] = ex.message try: site.page(pageid=-1) except PageError as ex: - responses[site.api_url]['page_error_msg_pageid'] = ex.message + responses[site.api_url]["page_error_msg_pageid"] = ex.message print("Completed pulling page errors") if PULL_ALL is True or PULL_DISAMBIGUATION_ERRORS is True: try: - site.page('bush') + site.page("bush") except DisambiguationError as ex: - responses[site.api_url]['disambiguation_error_msg'] = ex.message + responses[site.api_url]["disambiguation_error_msg"] = ex.message try: - site.page('Oasis') + site.page("Oasis") except DisambiguationError as ex: msg = ex.message - responses[site.api_url]['disambiguation_error_msg_with_empty'] = msg + responses[site.api_url]["disambiguation_error_msg_with_empty"] = msg print("Completed pulling disambiguation errors") if PULL_ALL is True or PULL_API_URL_ERROR is True: - url = 'https://french.wikipedia.org/w/api.php' + url = "https://french.wikipedia.org/w/api.php" try: - site.set_api_url(api_url=url, lang='fr') + site.set_api_url(api_url=url, lang="fr") except MediaWikiAPIURLError as ex: - responses[site.api_url]['api_url_error_msg'] = ex.message + responses[site.api_url]["api_url_error_msg"] = ex.message # this shouldn't be necessary since it should go back to the original # values - site.set_api_url(api_url='https://en.wikipedia.org/w/api.php', lang='en') + site.set_api_url(api_url="https://en.wikipedia.org/w/api.php", lang="en") print("Completed pulling api url errors") if PULL_ALL is True or PULL_REDIRECT_ERROR is True: # print('Start redirect error') try: - asoiaf.page('arya', auto_suggest=False, redirect=False) + asoiaf.page("arya", auto_suggest=False, redirect=False) except RedirectError as ex: - responses[asoiaf.api_url]['redirect_error_msg'] = ex.message + responses[asoiaf.api_url]["redirect_error_msg"] = ex.message print("Completed pulling redirect errors") if PULL_ALL is True or PULL_PAGES is True: # unicode - site.page(u"Jacques Léonard Muller") + site.page("Jacques Léonard Muller") # page id and wikitext p = site.page(pageid=24337758, auto_suggest=False) - responses['bpp-complexity_wikitext'] = p.wikitext + responses["bpp-complexity_wikitext"] = p.wikitext # coordinates - p = site.page('Washington Monument') + p = site.page("Washington Monument") coords = p.coordinates - responses[site.api_url]['wash_mon'] = [str(coords[0]), str(coords[1])] + responses[site.api_url]["wash_mon"] = [str(coords[0]), str(coords[1])] # page properties # arya - pg = asoiaf.page('arya') - responses[asoiaf.api_url]['arya'] = dict() - responses[asoiaf.api_url]['arya']['title'] = pg.title - responses[asoiaf.api_url]['arya']['pageid'] = pg.pageid - responses[asoiaf.api_url]['arya']['revision_id'] = pg.revision_id - responses[asoiaf.api_url]['arya']['parent_id'] = pg.parent_id - responses[asoiaf.api_url]['arya']['content'] = pg.content - responses[asoiaf.api_url]['arya']['url'] = pg.url + pg = asoiaf.page("arya") + responses[asoiaf.api_url]["arya"] = dict() + responses[asoiaf.api_url]["arya"]["title"] = pg.title + responses[asoiaf.api_url]["arya"]["pageid"] = pg.pageid + responses[asoiaf.api_url]["arya"]["revision_id"] = pg.revision_id + responses[asoiaf.api_url]["arya"]["parent_id"] = pg.parent_id + responses[asoiaf.api_url]["arya"]["content"] = pg.content + responses[asoiaf.api_url]["arya"]["url"] = pg.url # other properties - responses[asoiaf.api_url]['arya']['backlinks'] = pg.backlinks - responses[asoiaf.api_url]['arya']['images'] = pg.images - responses[asoiaf.api_url]['arya']['redirects'] = pg.redirects - responses[asoiaf.api_url]['arya']['links'] = pg.links - responses[asoiaf.api_url]['arya']['categories'] = pg.categories - responses[asoiaf.api_url]['arya']['references'] = pg.references - responses[asoiaf.api_url]['arya']['content'] = pg.content - responses[asoiaf.api_url]['arya']['parent_id'] = pg.parent_id - responses[asoiaf.api_url]['arya']['revision_id'] = pg.revision_id - responses[asoiaf.api_url]['arya']['coordinates'] = pg.coordinates - responses[asoiaf.api_url]['arya']['summary'] = pg.summary - responses[asoiaf.api_url]['arya']['sections'] = pg.sections - res = pg.section('A Game of Thrones') - responses[asoiaf.api_url]['arya']['section_a_game_of_thrones'] = res - res = pg.section('External links') - responses[asoiaf.api_url]['arya']['last_section'] = res - responses[asoiaf.api_url]['arya']['html'] = pg.html + responses[asoiaf.api_url]["arya"]["backlinks"] = pg.backlinks + responses[asoiaf.api_url]["arya"]["images"] = pg.images + responses[asoiaf.api_url]["arya"]["redirects"] = pg.redirects + responses[asoiaf.api_url]["arya"]["links"] = pg.links + responses[asoiaf.api_url]["arya"]["categories"] = pg.categories + responses[asoiaf.api_url]["arya"]["references"] = pg.references + responses[asoiaf.api_url]["arya"]["content"] = pg.content + responses[asoiaf.api_url]["arya"]["parent_id"] = pg.parent_id + responses[asoiaf.api_url]["arya"]["revision_id"] = pg.revision_id + responses[asoiaf.api_url]["arya"]["coordinates"] = pg.coordinates + responses[asoiaf.api_url]["arya"]["summary"] = pg.summary + responses[asoiaf.api_url]["arya"]["sections"] = pg.sections + res = pg.section("A Game of Thrones") + responses[asoiaf.api_url]["arya"]["section_a_game_of_thrones"] = res + res = pg.section("External links") + responses[asoiaf.api_url]["arya"]["last_section"] = res + responses[asoiaf.api_url]["arya"]["html"] = pg.html # jon snow - pg = asoiaf.page('jon snow') - responses[asoiaf.api_url]['jon-snow'] = dict() - responses[asoiaf.api_url]['jon-snow']['title'] = pg.title - responses[asoiaf.api_url]['jon-snow']['pageid'] = pg.pageid - responses[asoiaf.api_url]['jon-snow']['revision_id'] = pg.revision_id - responses[asoiaf.api_url]['jon-snow']['parent_id'] = pg.parent_id - responses[asoiaf.api_url]['jon-snow']['content'] = pg.content - responses[asoiaf.api_url]['jon-snow']['url'] = pg.url + pg = asoiaf.page("jon snow") + responses[asoiaf.api_url]["jon-snow"] = dict() + responses[asoiaf.api_url]["jon-snow"]["title"] = pg.title + responses[asoiaf.api_url]["jon-snow"]["pageid"] = pg.pageid + responses[asoiaf.api_url]["jon-snow"]["revision_id"] = pg.revision_id + responses[asoiaf.api_url]["jon-snow"]["parent_id"] = pg.parent_id + responses[asoiaf.api_url]["jon-snow"]["content"] = pg.content + responses[asoiaf.api_url]["jon-snow"]["url"] = pg.url # castos - pg = asoiaf.page('Castos') - responses[asoiaf.api_url]['castos'] = dict() - res = pg.section('References and Notes') - responses[asoiaf.api_url]['castos']['section'] = res + pg = asoiaf.page("Castos") + responses[asoiaf.api_url]["castos"] = dict() + res = pg.section("References and Notes") + responses[asoiaf.api_url]["castos"]["section"] = res # other pages as they will be in the response object - asoiaf.page('arya', auto_suggest=False) + asoiaf.page("arya", auto_suggest=False) # lang links property (standard wikipedia) - pg = site.page('Nobel Prize in Chemistry') - responses[site.api_url]['nobel_chemistry'] = dict() - responses[site.api_url]['nobel_chemistry']['langlinks'] = pg.langlinks + pg = site.page("Nobel Prize in Chemistry") + responses[site.api_url]["nobel_chemistry"] = dict() + responses[site.api_url]["nobel_chemistry"]["langlinks"] = pg.langlinks print("Completed pulling pages and properties") if PULL_ALL is True or PULL_LOGOS is True: # single logo - res = wikipedia.page('Chess').logos - responses[wikipedia.api_url]['chess_logos'] = res + res = wikipedia.page("Chess").logos + responses[wikipedia.api_url]["chess_logos"] = res # multiple logos - res = wikipedia.page('Sony Music').logos - responses[wikipedia.api_url]['sony_music_logos'] = res + res = wikipedia.page("Sony Music").logos + responses[wikipedia.api_url]["sony_music_logos"] = res # no infobox - res = wikipedia.page('Antivirus Software').logos - responses[wikipedia.api_url]['antivirus_software_logos'] = res + res = wikipedia.page("Antivirus Software").logos + responses[wikipedia.api_url]["antivirus_software_logos"] = res print("Completed pulling logos") if PULL_ALL is True or PULL_PREVIEWS is True: - res = wikipedia.page('Chess').preview - responses[wikipedia.api_url]['chess_preview'] = res + res = wikipedia.page("Chess").preview + responses[wikipedia.api_url]["chess_preview"] = res print("Completed pulling previews") if PULL_ALL is True or PULL_HATNOTES is True: # contains hatnotes - res = wikipedia.page('Chess').hatnotes - responses[wikipedia.api_url]['chess_hatnotes'] = res + res = wikipedia.page("Chess").hatnotes + responses[wikipedia.api_url]["chess_hatnotes"] = res # no hatnotes - page_name = ('List of Battlestar Galactica (1978 TV series) and ' - 'Galactica 1980 episodes') + page_name = "List of Battlestar Galactica (1978 TV series) and " "Galactica 1980 episodes" res = wikipedia.page(page_name).hatnotes - responses[wikipedia.api_url]['page_no_hatnotes'] = res + responses[wikipedia.api_url]["page_no_hatnotes"] = res print("Completed pulling hat notes") if PULL_ALL is True or PULL_SECTION_LINKS is True: # contains external links - pg = wikipedia.page('''McDonald's''') - res = pg.parse_section_links('EXTERNAL LINKS') - responses[wikipedia.api_url]['mcy_ds_external_links'] = res + pg = wikipedia.page("""McDonald's""") + res = pg.parse_section_links("EXTERNAL LINKS") + responses[wikipedia.api_url]["mcy_ds_external_links"] = res res = pg.parse_section_links(None) - responses[wikipedia.api_url]['mcy_ds_external_links_none'] = res + responses[wikipedia.api_url]["mcy_ds_external_links_none"] = res # doesn't contain external links - pg = wikipedia.page('Tropical rainforest conservation') - res = pg.parse_section_links('EXTERNAL LINKS') - responses[wikipedia.api_url]['page_no_sec_links'] = res - + pg = wikipedia.page("Tropical rainforest conservation") + res = pg.parse_section_links("EXTERNAL LINKS") + responses[wikipedia.api_url]["page_no_sec_links"] = res - pg = asoiaf.page('arya') + pg = asoiaf.page("arya") for section in pg.sections: links = pg.parse_section_links(section) - responses[asoiaf.api_url]['arya_{}_links'.format(section)] = links + responses[asoiaf.api_url]["arya_{}_links".format(section)] = links print("Completed pulling the section links") if PULL_ALL is True or PULL_TABLE_OF_CONTENTS is True: - pg = wikipedia.page('New York City') + pg = wikipedia.page("New York City") res = pg.sections - responses[wikipedia.api_url]['new_york_city_sections'] = res + responses[wikipedia.api_url]["new_york_city_sections"] = res res = pg.table_of_contents - responses[wikipedia.api_url]['new_york_city_toc'] = res - responses[wikipedia.api_url]['new_york_city_air_quality'] = pg.section('Air quality') - responses[wikipedia.api_url]['new_york_city_none'] = pg.section(None) - responses[wikipedia.api_url]['new_york_city_last_sec'] = pg.section('External links') + responses[wikipedia.api_url]["new_york_city_toc"] = res + responses[wikipedia.api_url]["new_york_city_air_quality"] = pg.section("Air quality") + responses[wikipedia.api_url]["new_york_city_none"] = pg.section(None) + responses[wikipedia.api_url]["new_york_city_last_sec"] = pg.section("External links") print("Completed pulling Table of Content data") if PULL_ALL is True or PULL_LOGIN is True: - pg = wikipedia.login(username='badusername', password='fakepassword') + pg = wikipedia.login(username="badusername", password="fakepassword") print("Completed pulling login") if PULL_ALL is True or PULL_ISSUE_14 is True: - res = site.page('One Two Three... Infinity').images - responses[wikipedia.api_url]['hidden_images'] = res + res = site.page("One Two Three... Infinity").images + responses[wikipedia.api_url]["hidden_images"] = res # missing http got lumped into this issue... - page = site.page('Minneapolis') - responses[site.api_url]['references_without_http'] = page.references + page = site.page("Minneapolis") + responses[site.api_url]["references_without_http"] = page.references print("Completed pulling issue 14") if PULL_ALL is True or PULL_ISSUE_15 is True: - res = site.page('Rober Eryol').images - responses[wikipedia.api_url]['infinite_loop_images'] = res - res = site.page('List of named minor planets (numerical)').links - responses[wikipedia.api_url]['large_continued_query'] = res - res = wikipedia.page('B8 polytope').images - responses[wikipedia.api_url]['large_continued_query_images'] = res + res = site.page("Rober Eryol").images + responses[wikipedia.api_url]["infinite_loop_images"] = res + res = site.page("List of named minor planets (numerical)").links + responses[wikipedia.api_url]["large_continued_query"] = res + res = wikipedia.page("B8 polytope").images + responses[wikipedia.api_url]["large_continued_query_images"] = res print("Completed pulling issue 15") if PULL_ALL is True or PULL_ISSUE_35 is True: try: - site.page('Leaching') + site.page("Leaching") except DisambiguationError as ex: - responses[wikipedia.api_url]['missing_title_disamb_dets'] = ex.details - responses[wikipedia.api_url]['missing_title_disamb_msg'] = str(ex) + responses[wikipedia.api_url]["missing_title_disamb_dets"] = ex.details + responses[wikipedia.api_url]["missing_title_disamb_msg"] = str(ex) print("Completed pulling issue 35") if PULL_ALL is True or PULL_ISSUE_39 is True: - res = plants.categorymembers('Plant', results=None, subcategories=False) - responses[plants.api_url]['query-continue-find'] = res + res = plants.categorymembers("Plant", results=None, subcategories=False) + responses[plants.api_url]["query-continue-find"] = res print("Completed pulling issue 39") # dump data to file -with open(RESPONSES_FILE, 'w') as mock: +with open(RESPONSES_FILE, "w") as mock: json.dump(responses, mock, ensure_ascii=False, indent=1, sort_keys=True) diff --git a/tests/mediawiki_test.py b/tests/mediawiki_test.py index 5c00440..8868f9e 100644 --- a/tests/mediawiki_test.py +++ b/tests/mediawiki_test.py @@ -2,19 +2,27 @@ ''' Unittest class ''' +import json import time import unittest -import json -from datetime import (timedelta) -from decimal import (Decimal) - -from mediawiki import (MediaWiki, MediaWikiPage, PageError, RedirectError, - DisambiguationError, MediaWikiAPIURLError, - MediaWikiGeoCoordError, HTTPTimeoutError, - MediaWikiException, MediaWikiCategoryTreeError, - MediaWikiLoginError) +from datetime import timedelta +from decimal import Decimal + import mediawiki -from .utilities import find_depth, FunctionUseCounter +from mediawiki import ( + DisambiguationError, + HTTPTimeoutError, + MediaWiki, + MediaWikiAPIURLError, + MediaWikiCategoryTreeError, + MediaWikiException, + MediaWikiGeoCoordError, + MediaWikiLoginError, + MediaWikiPage, + PageError, + RedirectError, +) +from tests.utilities import FunctionUseCounter, find_depth class MediaWikiOverloaded(MediaWiki): From c507698fd0cf9bfc1f3cbbbbd9263f88e1e0d0c0 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 10:27:19 -0500 Subject: [PATCH 03/17] typing --- mediawiki/mediawiki.py | 249 ++++--- mediawiki/mediawikipage.py | 254 +++---- tests/mediawiki_test.py | 1385 ++++++++++++++++++------------------ 3 files changed, 937 insertions(+), 951 deletions(-) diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 6ecca3c..315c851 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -8,6 +8,7 @@ from datetime import datetime, timedelta from decimal import Decimal, DecimalException from json import JSONDecodeError +from typing import Any, Dict, List, Optional, Tuple import requests import requests.exceptions as rex @@ -21,31 +22,30 @@ MediaWikiLoginError, PageError, ) +from mediawiki.mediawikipage import MediaWikiPage +from mediawiki.utilities import memoize -from .mediawikipage import MediaWikiPage -from .utilities import memoize - -URL = "https://github.com/barrust/mediawiki" -VERSION = "0.7.3" +URL: str = "https://github.com/barrust/mediawiki" +VERSION: str = "0.7.3" class MediaWiki(object): - """ MediaWiki API Wrapper Instance - - Args: - url (str): API URL of the MediaWiki site; defaults to Wikipedia - lang (str): Language of the MediaWiki site; used to help change API URL - timeout (float): HTTP timeout setting; None means no timeout - rate_limit (bool): Use rate limiting to limit calls to the site - rate_limit_wait (timedelta): Amount of time to wait between requests - cat_prefix (str): The prefix for categories used by the mediawiki site; defaults to Category (en) - user_agent (str): The user agent string to use when making requests; defaults to a library version but \ - per the MediaWiki API documentation it recommends setting a unique one and not using the \ - library's default user-agent string - username (str): The username to use to log into the MediaWiki - password (str): The password to use to log into the MediaWiki - proxies (str): A dictionary of specific proxies to use in the Requests libary - verify_ssl (bool|str): Verify SSL Certificates to be passed directly into the Requests library""" + """MediaWiki API Wrapper Instance + + Args: + url (str): API URL of the MediaWiki site; defaults to Wikipedia + lang (str): Language of the MediaWiki site; used to help change API URL + timeout (float): HTTP timeout setting; None means no timeout + rate_limit (bool): Use rate limiting to limit calls to the site + rate_limit_wait (timedelta): Amount of time to wait between requests + cat_prefix (str): The prefix for categories used by the mediawiki site; defaults to Category (en) + user_agent (str): The user agent string to use when making requests; defaults to a library version \ + but per the MediaWiki API documentation it recommends setting a unique one and not using the library's \ + default user-agent string + username (str): The username to use to log into the MediaWiki + password (str): The password to use to log into the MediaWiki + proxies (str): A dictionary of specific proxies to use in the Requests libary + verify_ssl (bool|str): Verify SSL Certificates to be passed directly into the Requests library""" __slots__ = [ "_version", @@ -74,17 +74,17 @@ class MediaWiki(object): def __init__( self, - url="https://{lang}.wikipedia.org/w/api.php", - lang="en", - timeout=15.0, - rate_limit=False, - rate_limit_wait=timedelta(milliseconds=50), - cat_prefix="Category", - user_agent=None, - username=None, - password=None, - proxies=None, - verify_ssl=True, + url: str = "https://{lang}.wikipedia.org/w/api.php", + lang: str = "en", + timeout: float = 15.0, + rate_limit: bool = False, + rate_limit_wait: timedelta = timedelta(milliseconds=50), + cat_prefix: str = "Category", + user_agent: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, + proxies: Optional[Dict] = None, + verify_ssl: bool = True, ): """Init Function""" self._version = VERSION @@ -133,7 +133,7 @@ def __init__( # non-settable properties @property - def version(self): + def version(self) -> str: """str: The version of the pymediawiki library Note: @@ -141,7 +141,7 @@ def version(self): return self._version @property - def api_version(self): + def api_version(self) -> str: """str: API Version of the MediaWiki site Note: @@ -149,7 +149,7 @@ def api_version(self): return self._api_version_str @property - def base_url(self): + def base_url(self) -> str: """str: Base URL for the MediaWiki site Note: @@ -157,7 +157,7 @@ def base_url(self): return self._base_url @property - def extensions(self): + def extensions(self) -> List[str]: """list: Extensions installed on the MediaWiki site Note: @@ -166,24 +166,24 @@ def extensions(self): # settable properties @property - def rate_limit(self): + def rate_limit(self) -> bool: """bool: Turn on or off Rate Limiting""" return self._rate_limit @rate_limit.setter - def rate_limit(self, rate_limit): + def rate_limit(self, rate_limit: bool): """Turn on or off rate limiting""" self._rate_limit = bool(rate_limit) self._rate_limit_last_call = None self.clear_memoized() @property - def proxies(self): + def proxies(self) -> Optional[Dict]: """dict: Turn on, off, or set proxy use with the Requests library""" return self._proxies @proxies.setter - def proxies(self, proxies): + def proxies(self, proxies: Optional[Dict]): """Turn on, off, or set proxy use through the Requests library""" if proxies and isinstance(proxies, dict): self._proxies = proxies @@ -192,18 +192,17 @@ def proxies(self, proxies): self._reset_session() @property - def use_cache(self): - """ bool: Whether caching should be used; on (**True**) or off \ - (**False**) """ + def use_cache(self) -> bool: + """bool: Whether caching should be used; on (**True**) or off (**False**)""" return self._use_cache @use_cache.setter - def use_cache(self, use_cache): + def use_cache(self, use_cache: bool): """toggle using the cache or not""" self._use_cache = bool(use_cache) @property - def rate_limit_min_wait(self): + def rate_limit_min_wait(self) -> timedelta: """timedelta: Time to wait between calls Note: @@ -211,13 +210,13 @@ def rate_limit_min_wait(self): return self._min_wait @rate_limit_min_wait.setter - def rate_limit_min_wait(self, min_wait): + def rate_limit_min_wait(self, min_wait: timedelta): """Set minimum wait to use for rate limiting""" self._min_wait = min_wait self._rate_limit_last_call = None @property - def timeout(self): + def timeout(self) -> float: """float: Response timeout for API requests Note: @@ -225,7 +224,7 @@ def timeout(self): return self._timeout @timeout.setter - def timeout(self, timeout): + def timeout(self, timeout: float): """Set request timeout in seconds (or fractions of a second)""" if timeout is None: @@ -234,12 +233,12 @@ def timeout(self, timeout): self._timeout = float(timeout) # allow the exception to be raised @property - def verify_ssl(self): + def verify_ssl(self) -> bool: """bool | str: Verify SSL when using requests or path to cert file""" return self._verify_ssl @verify_ssl.setter - def verify_ssl(self, verify_ssl): + def verify_ssl(self, verify_ssl: bool | str): """Set request verify SSL parameter; defaults to True if issue""" self._verify_ssl = True if isinstance(verify_ssl, (bool, str)): @@ -247,19 +246,17 @@ def verify_ssl(self, verify_ssl): self._reset_session() @property - def language(self): - """ str: The API URL language, if possible this will update the API \ - URL + def language(self) -> str: + """str: The API URL language, if possible this will update the API URL - Note: - Use correct language titles with the updated API URL - Note: - Some API URLs do not encode language; unable to update if \ - this is the case """ + Note: + Use correct language titles with the updated API URL + Note: + Some API URLs do not encode language; unable to update if this is the case""" return self._lang @language.setter - def language(self, lang): + def language(self, lang: str): """Set the language to use; attempts to change the API URL""" lang = lang.lower() if self._lang == lang: @@ -273,7 +270,7 @@ def language(self, lang): self.clear_memoized() @property - def category_prefix(self): + def category_prefix(self) -> str: """str: The category prefix to use when using category based functions Note: @@ -281,31 +278,29 @@ def category_prefix(self): return self._cat_prefix @category_prefix.setter - def category_prefix(self, prefix): + def category_prefix(self, prefix: str): """Set the category prefix correctly""" if prefix[-1:] == ":": prefix = prefix[:-1] self._cat_prefix = prefix @property - def user_agent(self): - """ str: User agent string + def user_agent(self) -> str: + """str: User agent string - Note: If using in as part of another project, this should be \ - changed """ + Note: If using in as part of another project, this should be changed""" return self._user_agent @user_agent.setter - def user_agent(self, user_agent): - """ Set the new user agent string + def user_agent(self, user_agent: str): + """Set the new user agent string - Note: Will need to re-log into the MediaWiki if user agent string \ - is changed """ + Note: Will need to re-log into the MediaWiki if user agent string is changed""" self._user_agent = user_agent self._reset_session() @property - def api_url(self): + def api_url(self) -> str: """str: API URL of the MediaWiki site Note: @@ -313,7 +308,7 @@ def api_url(self): return self._api_url @property - def memoized(self): + def memoized(self) -> Dict[Any, Any]: """dict: Return the memoize cache Note: @@ -322,33 +317,34 @@ def memoized(self): return self._cache @property - def refresh_interval(self): + def refresh_interval(self) -> int: """int: The interval at which the memoize cache is to be refresh""" return self._refresh_interval @refresh_interval.setter - def refresh_interval(self, refresh_interval): + def refresh_interval(self, refresh_interval: int): """Set the new cache refresh interval""" if isinstance(refresh_interval, int) and refresh_interval > 0: self._refresh_interval = refresh_interval else: self._refresh_interval = None - def login(self, username, password, strict=True): - """ Login as specified user + def login(self, username: str, password: str, strict: bool = True) -> bool: + """Login as specified user - Args: - username (str): The username to log in with - password (str): The password for the user - strict (bool): `True` to throw an error on failure - Returns: - bool: `True` if successfully logged in; `False` otherwise - Raises: - :py:func:`mediawiki.exceptions.MediaWikiLoginError`: if unable to login + Args: + username (str): The username to log in with + password (str): The password for the user + strict (bool): `True` to throw an error on failure + Returns: + bool: `True` if successfully logged in; `False` otherwise + Raises: + :py:func:`mediawiki.exceptions.MediaWikiLoginError`: if unable to login - Note: - Per the MediaWiki API, one should use the `bot password`; \ - see https://www.mediawiki.org/wiki/API:Login for more information """ + Note: + Per the MediaWiki API, one should use the `bot password`; \ + see https://www.mediawiki.org/wiki/API:Login for more information + """ # get login token params = { "action": "query", @@ -382,21 +378,22 @@ def login(self, username, password, strict=True): # non-properties def set_api_url( self, - api_url="https://{lang}.wikipedia.org/w/api.php", - lang="en", - username=None, - password=None, + api_url: str = "https://{lang}.wikipedia.org/w/api.php", + lang: str = "en", + username: Optional[str] = None, + password: Optional[str] = None, ): - """ Set the API URL and language + """Set the API URL and language - Args: - api_url (str): API URL to use - lang (str): Language of the API URL - username (str): The username, if needed, to log into the MediaWiki site - password (str): The password, if needed, to log into the MediaWiki site - Raises: - :py:func:`mediawiki.exceptions.MediaWikiAPIURLError`: if the \ - url is not a valid MediaWiki site or login fails """ + Args: + api_url (str): API URL to use + lang (str): Language of the API URL + username (str): The username, if needed, to log into the MediaWiki site + password (str): The password, if needed, to log into the MediaWiki site + Raises: + :py:func:`mediawiki.exceptions.MediaWikiAPIURLError`: if the \ + url is not a valid MediaWiki site or login fails + """ old_api_url = self._api_url old_lang = self._lang self._lang = lang.lower() @@ -436,7 +433,7 @@ def clear_memoized(self): # non-setup functions @property - def supported_languages(self): + def supported_languages(self) -> Dict[str, str]: """dict: All supported language prefixes on the MediaWiki site Note: @@ -449,7 +446,7 @@ def supported_languages(self): return self.__supported_languages @property - def available_languages(self): + def available_languages(self) -> Dict[str, bool]: """dict: All available language prefixes on the MediaWiki site Note: @@ -466,11 +463,11 @@ def available_languages(self): return self.__available_languages @property - def logged_in(self): + def logged_in(self) -> bool: """bool: Returns if logged into the MediaWiki site""" return self._is_logged_in - def random(self, pages=1): + def random(self, pages: int = 1) -> str | List[str]: """Request a random page title or list of random titles Args: @@ -490,7 +487,7 @@ def random(self, pages=1): return titles @memoize - def allpages(self, query="", results=10): + def allpages(self, query: str = "", results: int = 10) -> List[str]: """Request all pages from mediawiki instance Args: @@ -513,7 +510,7 @@ def allpages(self, query="", results=10): return titles @memoize - def search(self, query, results=10, suggestion=False): + def search(self, query: str, results: int = 10, suggestion: bool = False) -> List[str] | Tuple[List[str], str]: """Search for similar titles Args: @@ -554,7 +551,7 @@ def search(self, query, results=10, suggestion=False): return search_results @memoize - def suggest(self, query): + def suggest(self, query: str) -> Optional[str]: """Gather suggestions based on the provided title or None if no suggestions found @@ -573,13 +570,13 @@ def suggest(self, query): @memoize def geosearch( self, - latitude=None, - longitude=None, - radius=1000, - title=None, - auto_suggest=True, - results=10, - ): + latitude: Decimal | float | None = None, + longitude: Decimal | float | None = None, + radius: int = 1000, + title: str = None, + auto_suggest: bool = True, + results: str = 10, + ) -> List[str]: """Search for pages that relate to the provided geocoords or near the page @@ -635,7 +632,7 @@ def test_lat_long(val): return [d["title"] for d in raw_results["query"]["geosearch"]] @memoize - def opensearch(self, query, results=10, redirect=True): + def opensearch(self, query: str, results: int = 10, redirect: bool = True) -> List[str]: """Execute a MediaWiki opensearch request, similar to search box suggestions and conforming to the OpenSearch specification @@ -672,7 +669,7 @@ def opensearch(self, query, results=10, redirect=True): return res @memoize - def prefixsearch(self, prefix, results=10): + def prefixsearch(self, prefix: str, results: int = 10) -> List[str]: """ Perform a prefix search using the provided prefix string Args: @@ -707,7 +704,7 @@ def prefixsearch(self, prefix, results=10): return [rec["title"] for rec in raw_results["query"]["prefixsearch"]] @memoize - def summary(self, title, sentences=0, chars=0, auto_suggest=True, redirect=True): + def summary(self, title: str, sentences: int = 0, chars: int = 0, auto_suggest: bool = True, redirect: bool = True): """ Get the summary for the title in question Args: @@ -727,7 +724,9 @@ def summary(self, title, sentences=0, chars=0, auto_suggest=True, redirect=True) return page_info.summarize(sentences, chars) @memoize - def categorymembers(self, category, results=10, subcategories=True): + def categorymembers( + self, category: str, results: int = 10, subcategories: bool = True + ) -> List[str] | Tuple[List[str], List[str]]: """Get information about a category: pages and subcategories Args: @@ -793,7 +792,7 @@ def categorymembers(self, category, results=10, subcategories=True): return pages, subcats return pages - def categorytree(self, category, depth=5): + def categorytree(self, category: str, depth: int = 5) -> Dict[str, Any]: """Generate the Category Tree for the given categories Args: @@ -858,7 +857,7 @@ def page(self, title=None, pageid=None, auto_suggest=True, redirect=True, preloa return MediaWikiPage(self, title, redirect=redirect, preload=preload) return MediaWikiPage(self, pageid=pageid, preload=preload) - def wiki_request(self, params): + def wiki_request(self, params: Dict[str, Any]) -> Dict[str, Any]: """ Make a request to the MediaWiki API using the given search parameters @@ -889,7 +888,7 @@ def wiki_request(self, params): return req # Protected functions - def _get_site_info(self): + def _get_site_info(self) -> List[str]: """Parse out the Wikimedia site information including API Version and Extensions""" response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"}) @@ -925,7 +924,7 @@ def _get_site_info(self): # end _get_site_info @staticmethod - def _check_error_response(response, query): + def _check_error_response(response, query: str): """check for default error messages and throw correct exception""" if "error" in response: http_error = ["HTTP request timed out.", "Pool queue is full"] @@ -942,13 +941,13 @@ def _check_error_response(response, query): raise MediaWikiException(err) @staticmethod - def _check_query(value, message): + def _check_query(value, message: str): """check if the query is 'valid'""" if value is None or value.strip() == "": raise ValueError(message) @staticmethod - def __category_parameter_verification(cats, depth, category): + def __category_parameter_verification(cats: str, depth: int, category: str): # parameter verification if len(cats) == 1 and (cats[0] is None or cats[0] == ""): msg = ( @@ -1009,14 +1008,14 @@ def __cat_tree_rec(self, cat, depth, tree, level, categories, links): links, ) - def _get_response(self, params): + def _get_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap the call to the requests package""" try: return self._session.get(self._api_url, params=params, timeout=self._timeout).json() except JSONDecodeError: return {} - def _post_response(self, params): + def _post_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap a post call to the requests package""" try: return self._session.post(self._api_url, data=params, timeout=self._timeout).json() diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 6123dd2..64dd249 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -7,6 +7,7 @@ import re from collections import OrderedDict from decimal import Decimal +from typing import Any, Dict, Generator, List, Optional, Tuple from bs4 import BeautifulSoup, Tag @@ -22,29 +23,23 @@ class MediaWikiPage(object): - """ MediaWiki Page Instance - - Args: - mediawiki (MediaWiki): MediaWiki class object from which to pull - title (str): Title of page to retrieve - pageid (int): MediaWiki site pageid to retrieve - redirect (bool): **True:** Follow redirects - preload (bool): **True:** Load most properties after getting page - original_title (str): Not to be used from the caller; used to \ - help follow redirects - Raises: - :py:func:`mediawiki.exceptions.PageError`: if page provided does \ - not exist - Raises: - :py:func:`mediawiki.exceptions.DisambiguationError`: if page \ - provided is a disambiguation page - Raises: - :py:func:`mediawiki.exceptions.RedirectError`: if redirect is \ - **False** and the pageid or title provided redirects to another \ - page - Warning: - This should never need to be used directly! Please use \ - :func:`mediawiki.MediaWiki.page` """ + """MediaWiki Page Instance + + Args: + mediawiki (MediaWiki): MediaWiki class object from which to pull + title (str): Title of page to retrieve + pageid (int): MediaWiki site pageid to retrieve + redirect (bool): **True:** Follow redirects + preload (bool): **True:** Load most properties after getting page + original_title (str): Not to be used from the caller; used to help follow redirects + Raises: + :py:func:`mediawiki.exceptions.PageError`: if page provided does not exist + Raises: + :py:func:`mediawiki.exceptions.DisambiguationError`: if page provided is a disambiguation page + Raises: + :py:func:`mediawiki.exceptions.RedirectError`: if redirect is **False** and the pageid or title provided redirects to another page + Warning: + This should never need to be used directly! Please use :func:`mediawiki.MediaWiki.page`""" __slots__ = [ "mediawiki", @@ -77,14 +72,14 @@ class MediaWikiPage(object): def __init__( self, mediawiki, - title=None, - pageid=None, - redirect=True, - preload=False, - original_title="", + title: Optional[str] = None, + pageid: Optional[int] = None, + redirect: bool = True, + preload: bool = False, + original_title: str = "", ): self.mediawiki = mediawiki - self.url = None + self.url: Optional[str] = None if title is not None: self.title = title self.original_title = original_title or title @@ -154,7 +149,7 @@ def __eq__(self, other): return False # Properties - def _pull_content_revision_parent(self): + def _pull_content_revision_parent(self) -> Tuple[str, int, int]: """combine the pulling of these three properties""" if self._revision_id is None: @@ -176,7 +171,7 @@ def _pull_content_revision_parent(self): return self._content, self._revision_id, self._parent_id @property - def content(self): + def content(self) -> str: """str: The page content in text format Note: @@ -188,7 +183,7 @@ def content(self): return self._content @property - def revision_id(self): + def revision_id(self) -> int: """int: The current revision id of the page Note: @@ -200,7 +195,7 @@ def revision_id(self): return self._revision_id @property - def parent_id(self): + def parent_id(self) -> int: """int: The parent id of the page Note: @@ -212,7 +207,7 @@ def parent_id(self): return self._parent_id @property - def html(self): + def html(self) -> str: """str: HTML representation of the page Note: @@ -234,7 +229,7 @@ def html(self): return self._html @property - def wikitext(self): + def wikitext(self) -> str: """str: Wikitext representation of the page Note: @@ -251,7 +246,7 @@ def wikitext(self): return self._wikitext @property - def images(self): + def images(self) -> List[str]: """list: Images on the page Note: @@ -271,16 +266,15 @@ def images(self): return self._images @property - def logos(self): - """ list: Parse images within the infobox signifying either the main \ - image or logo - - Note: - Not settable - Note: - Side effect is to also pull the html which can be slow - Note: - This is a parsing operation and not part of the standard API""" + def logos(self) -> List[str]: + """list: Parse images within the infobox signifying either the main image or logo + + Note: + Not settable + Note: + Side effect is to also pull the html which can be slow + Note: + This is a parsing operation and not part of the standard API""" if self._logos is None: self._logos = list() # Cache the results of parsing the html, so that multiple calls happen much faster @@ -294,7 +288,7 @@ def logos(self): return self._logos @property - def hatnotes(self): + def hatnotes(self) -> List[str]: """list: Parse hatnotes from the HTML Note: @@ -321,21 +315,19 @@ def hatnotes(self): return self._hatnotes @property - def references(self): - """ list: External links, or references, listed anywhere on the \ - MediaWiki page - Note: - Not settable - Note - May include external links within page that are not \ - technically cited anywhere """ + def references(self) -> List[str]: + """list: External links, or references, listed anywhere on the MediaWiki page + Note: + Not settable + Note + May include external links within page that are not technically cited anywhere""" if self._references is None: self._references = list() self.__pull_combined_properties() return self._references @property - def categories(self): + def categories(self) -> List[str]: """list: Non-hidden categories on the page Note: @@ -346,21 +338,20 @@ def categories(self): return self._categories @property - def coordinates(self): - """ Tuple: GeoCoordinates of the place referenced; results in \ - lat/long tuple or None if no geocoordinates present - - Note: - Not settable - Note: - Requires the GeoData extension to be installed """ + def coordinates(self) -> Optional[Tuple[Decimal, Decimal]]: + """Tuple: GeoCoordinates of the place referenced; results in lat/long tuple or None if no geocoordinates present + + Note: + Not settable + Note: + Requires the GeoData extension to be installed""" if self._coordinates is False: self._coordinates = None self.__pull_combined_properties() return self._coordinates @property - def links(self): + def links(self) -> List[str]: """list: List of all MediaWiki page links on the page Note: @@ -371,19 +362,18 @@ def links(self): return self._links @property - def redirects(self): - """ list: List of all redirects to this page; **i.e.,** the titles \ - listed here will redirect to this page title + def redirects(self) -> List[str]: + """list: List of all redirects to this page; **i.e.,** the titles listed here will redirect to this page title - Note: - Not settable """ + Note: + Not settable""" if self._redirects is None: self._redirects = list() self.__pull_combined_properties() return self._redirects @property - def backlinks(self): + def backlinks(self) -> List[str]: """list: Pages that link to this page Note: @@ -403,17 +393,14 @@ def backlinks(self): return self._backlinks @property - def langlinks(self): - """ dict: Names of the page in other languages for which page is \ - where the key is the language code and the page name is the name \ - of the page in that language. + def langlinks(self) -> Dict[str, str]: + """dict: Names of the page in other languages for which page is where the key is the language code and the page name is the name of the page in that language. Note: Not settable Note: - list of all language links from the provided pages to other \ - languages according to: \ - https://www.mediawiki.org/wiki/API:Langlinks """ + list of all language links from the provided pages to other + languages according to: https://www.mediawiki.org/wiki/API:Langlinks""" if self._langlinks is None: params = {"prop": "langlinks", "cllimit": "max"} @@ -426,7 +413,7 @@ def langlinks(self): return self._langlinks @property - def preview(self): + def preview(self) -> Dict[str, str]: """dict: Page preview information that builds the preview hover""" if self._preview is None: params = { @@ -447,7 +434,7 @@ def preview(self): return self._preview @property - def summary(self): + def summary(self) -> str: """str: Default page summary Note: @@ -456,20 +443,17 @@ def summary(self): self.__pull_combined_properties() return self._summary - def summarize(self, sentences=0, chars=0): - """ Summarize page either by number of sentences, chars, or first - section (**default**) - - Args: - sentences (int): Number of sentences to use in summary \ - (first `x` sentences) - chars (int): Number of characters to use in summary \ - (first `x` characters) - Returns: - str: The summary of the MediaWiki page - Note: - Precedence for parameters: sentences then chars; if both are \ - 0 then the entire first section is returned """ + def summarize(self, sentences: int = 0, chars: int = 0) -> str: + """Summarize page either by number of sentences, chars, or first + section (**default**) + + Args: + sentences (int): Number of sentences to use in summary (first `x` sentences) + chars (int): Number of characters to use in summary (first `x` characters) + Returns: + str: The summary of the MediaWiki page + Note: + Precedence for parameters: sentences then chars; if both are 0 then the entire first section is returned""" query_params = {"prop": "extracts", "explaintext": "", "titles": self.title} if sentences: query_params["exsentences"] = 10 if sentences > 10 else sentences @@ -483,7 +467,7 @@ def summarize(self, sentences=0, chars=0): return summary @property - def sections(self): + def sections(self) -> List[str]: """list: Table of contents sections Note: @@ -496,7 +480,7 @@ def sections(self): return self._sections @property - def table_of_contents(self): + def table_of_contents(self) -> OrderedDict[str, Any]: """OrderedDict: Dictionary of sections and sub-sections Note: @@ -508,23 +492,21 @@ def table_of_contents(self): self._parse_sections() return self._table_of_contents - def section(self, section_title): - """ Plain text section content - - Args: - section_title (str): Name of the section to pull or None \ - for the header section - Returns: - str: The content of the section - Note: - Use **None** if the header section is desired - Note: - Returns **None** if section title is not found; only text \ - between title and next section or sub-section title is returned - Note: - Side effect is to also pull the content which can be slow - Note: - This is a parsing operation and not part of the standard API""" + def section(self, section_title: str) -> Optional[str]: + """Plain text section content + + Args: + section_title (str): Name of the section to pull or None for the header section + Returns: + str: The content of the section + Note: + Use **None** if the header section is desired + Note: + Returns **None** if section title is not found; only text between title and next section or sub-section title is returned + Note: + Side effect is to also pull the content which can be slow + Note: + This is a parsing operation and not part of the standard API""" if not section_title: try: content = self.content @@ -557,23 +539,21 @@ def section(self, section_title): return self.content[index:next_index].lstrip("=").strip() - def parse_section_links(self, section_title): - """ Parse all links within a section - - Args: - section_title (str): Name of the section to pull or, if \ - None is provided, the links between the main heading and \ - the first section - Returns: - list: List of (title, url) tuples - Note: - Use **None** to pull the links from the header section - Note: - Returns **None** if section title is not found - Note: - Side effect is to also pull the html which can be slow - Note: - This is a parsing operation and not part of the standard API""" + def parse_section_links(self, section_title: str) -> List[Tuple[str, str]]: + """Parse all links within a section + + Args: + section_title (str): Name of the section to pull or, if None is provided, the links between the main heading and the first section + Returns: + list: List of (title, url) tuples + Note: + Use **None** to pull the links from the header section + Note: + Returns **None** if section title is not found + Note: + Side effect is to also pull the html which can be slow + Note: + This is a parsing operation and not part of the standard API""" # Cache the results of parsing the html, so that multiple calls happen much faster if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") @@ -596,7 +576,7 @@ def parse_section_links(self, section_title): return None # Protected Methods - def __load(self, redirect=True, preload=False): + def __load(self, redirect: bool = True, preload: bool = False): """load the basic page information""" query_params = { "prop": "info|pageprops", @@ -633,7 +613,7 @@ def _raise_page_error(self): raise PageError(title=self.title) raise PageError(pageid=self.pageid) - def _raise_disambiguation_error(self, page, pageid): + def _raise_disambiguation_error(self, page: str, pageid: int): """parse and throw a disambiguation error""" query_params = { "prop": "revisions", @@ -667,7 +647,7 @@ def _raise_disambiguation_error(self, page, pageid): disambiguation, ) - def _handle_redirect(self, redirect, preload, query, page): + def _handle_redirect(self, redirect: bool, preload: bool, query: str, page: Dict[str, Any]): """handle redirect""" if redirect: redirects = query["redirects"][0] @@ -695,7 +675,7 @@ def _handle_redirect(self, redirect, preload, query, page): else: raise RedirectError(getattr(self, "title", page["title"])) - def _continued_query(self, query_params, key="pages"): + def _continued_query(self, query_params: Dict[str, Any], key: str = "pages"): """Based on https://www.mediawiki.org/wiki/API:Query#Continuing_queries""" query_params.update(self.__title_query_param()) @@ -728,7 +708,7 @@ def _continued_query(self, query_params, key="pages"): last_cont = request["continue"] - def _parse_section_links(self, id_tag): + def _parse_section_links(self, id_tag: str) -> List[str]: """given a section id, parse the links in the unordered list""" all_links = list() @@ -769,7 +749,7 @@ def _parse_section_links(self, id_tag): all_links.append(self.__parse_link_info(link)) return all_links - def __parse_link_info(self, link): + def __parse_link_info(self, link: str) -> Tuple[str, str]: """parse the tag for the link""" href = link.get("href", "") txt = link.string or href @@ -829,7 +809,7 @@ def _list_to_dict(_dict, path, sec): self._table_of_contents = res - def __title_query_param(self): + def __title_query_param(self) -> Dict[str, str | int]: """util function to determine which parameter method to use""" if getattr(self, "title", None) is not None: return {"titles": self.title} diff --git a/tests/mediawiki_test.py b/tests/mediawiki_test.py index 8868f9e..46f79aa 100644 --- a/tests/mediawiki_test.py +++ b/tests/mediawiki_test.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -''' +""" Unittest class -''' +""" import json import time import unittest @@ -26,167 +26,180 @@ class MediaWikiOverloaded(MediaWiki): - ''' Overload the MediaWiki class to change how wiki_request works ''' - def __init__(self, url='https://{lang}.wikipedia.org/w/api.php', lang='en', - timeout=15, rate_limit=False, - rate_limit_wait=timedelta(milliseconds=50), - cat_prefix='Category', user_agent=None): - ''' new init ''' - - with open('./tests/mock_requests.json', 'r') as file_handle: + """Overload the MediaWiki class to change how wiki_request works""" + + def __init__( + self, + url="https://{lang}.wikipedia.org/w/api.php", + lang="en", + timeout=15, + rate_limit=False, + rate_limit_wait=timedelta(milliseconds=50), + cat_prefix="Category", + user_agent=None, + ): + """new init""" + + with open("./tests/mock_requests.json", "r") as file_handle: self.requests = json.load(file_handle) - with open('./tests/mock_responses.json', 'r') as file_handle: + with open("./tests/mock_responses.json", "r") as file_handle: self.responses = json.load(file_handle) - self.tree_path = './tests/mock_categorytree.json' - - MediaWiki.__init__(self, url=url, lang=lang, timeout=timeout, - rate_limit=rate_limit, - rate_limit_wait=rate_limit_wait, - cat_prefix=cat_prefix, user_agent=user_agent) + self.tree_path = "./tests/mock_categorytree.json" + + MediaWiki.__init__( + self, + url=url, + lang=lang, + timeout=timeout, + rate_limit=rate_limit, + rate_limit_wait=rate_limit_wait, + cat_prefix=cat_prefix, + user_agent=user_agent, + ) def _get_response(self, params): - ''' override the __get_response method ''' + """override the __get_response method""" new_params = json.dumps(tuple(sorted(params.items()))) return self.requests[self.api_url][new_params] def _post_response(self, params): - ''' override the __get_response method ''' + """override the __get_response method""" new_params = json.dumps(tuple(sorted(params.items()))) return self.requests[self.api_url][new_params] class TestMediaWiki(unittest.TestCase): - ''' test the MediaWiki Class Basic functionality ''' + """test the MediaWiki Class Basic functionality""" + def test_version(self): - ''' test version information ''' + """test version information""" site = MediaWikiOverloaded() self.assertEqual(site.version, mediawiki.__version__) def test_api_url(self): - ''' test the original api ''' + """test the original api""" site = MediaWikiOverloaded() - self.assertEqual(site.api_url, 'https://en.wikipedia.org/w/api.php') + self.assertEqual(site.api_url, "https://en.wikipedia.org/w/api.php") def test_base_url(self): - ''' test that the base url is parsed correctly ''' + """test that the base url is parsed correctly""" site = MediaWikiOverloaded() - self.assertEqual(site.base_url, 'https://en.wikipedia.org') + self.assertEqual(site.base_url, "https://en.wikipedia.org") def test_base_url_no_http(self): - ''' test that the base url is parsed correctly without http ''' - site = MediaWikiOverloaded(url='https://awoiaf.westeros.org/api.php') - self.assertEqual(site.base_url, 'https://awoiaf.westeros.org') + """test that the base url is parsed correctly without http""" + site = MediaWikiOverloaded(url="https://awoiaf.westeros.org/api.php") + self.assertEqual(site.base_url, "https://awoiaf.westeros.org") def test_base_url_switch(self): - ''' test that the base url is parsed correctly when switching sites ''' + """test that the base url is parsed correctly when switching sites""" site = MediaWikiOverloaded() - self.assertEqual(site.base_url, 'https://en.wikipedia.org') - site.set_api_url('https://awoiaf.westeros.org/api.php') - self.assertEqual(site.base_url, 'https://awoiaf.westeros.org') + self.assertEqual(site.base_url, "https://en.wikipedia.org") + site.set_api_url("https://awoiaf.westeros.org/api.php") + self.assertEqual(site.base_url, "https://awoiaf.westeros.org") def test_api_url_set(self): - ''' test the api url being set at creation time ''' - site = MediaWikiOverloaded(url='https://awoiaf.westeros.org/api.php') + """test the api url being set at creation time""" + site = MediaWikiOverloaded(url="https://awoiaf.westeros.org/api.php") response = site.responses[site.api_url] - self.assertEqual(site.api_url, 'https://awoiaf.westeros.org/api.php') - self.assertEqual(site.api_version, response['api_version']) - self.assertEqual(sorted(site.extensions), sorted(response['extensions'])) + self.assertEqual(site.api_url, "https://awoiaf.westeros.org/api.php") + self.assertEqual(site.api_version, response["api_version"]) + self.assertEqual(sorted(site.extensions), sorted(response["extensions"])) def test_change_lang(self): - ''' test changing the language ''' + """test changing the language""" site = MediaWikiOverloaded() - site.language = 'FR' - self.assertEqual(site.language, 'fr') - self.assertEqual(site.api_url, 'https://fr.wikipedia.org/w/api.php') + site.language = "FR" + self.assertEqual(site.language, "fr") + self.assertEqual(site.api_url, "https://fr.wikipedia.org/w/api.php") def test_change_lang_same(self): - ''' test changing the language to the same lang ''' - site = MediaWikiOverloaded(url='https://fr.wikipedia.org/w/api.php', - lang='fr') - site.language = 'FR' - self.assertEqual(site.language, 'fr') - self.assertEqual(site.api_url, 'https://fr.wikipedia.org/w/api.php') + """test changing the language to the same lang""" + site = MediaWikiOverloaded(url="https://fr.wikipedia.org/w/api.php", lang="fr") + site.language = "FR" + self.assertEqual(site.language, "fr") + self.assertEqual(site.api_url, "https://fr.wikipedia.org/w/api.php") def test_api_lang_no_url(self): - ''' test setting the language on init without api_url ''' - site = MediaWikiOverloaded(lang='fr') - self.assertEqual(site.language, 'fr') - self.assertEqual(site.api_url, 'https://fr.wikipedia.org/w/api.php') + """test setting the language on init without api_url""" + site = MediaWikiOverloaded(lang="fr") + self.assertEqual(site.language, "fr") + self.assertEqual(site.api_url, "https://fr.wikipedia.org/w/api.php") def test_api_lang_no_url_upper(self): - ''' test setting the language on init without api_url upper case ''' - site = MediaWikiOverloaded(lang='FR') - self.assertEqual(site.language, 'fr') - self.assertEqual(site.api_url, 'https://fr.wikipedia.org/w/api.php') + """test setting the language on init without api_url upper case""" + site = MediaWikiOverloaded(lang="FR") + self.assertEqual(site.language, "fr") + self.assertEqual(site.api_url, "https://fr.wikipedia.org/w/api.php") def test_change_lang_no_change(self): - ''' test changing the language when url will not change ''' - site = MediaWikiOverloaded(url='https://awoiaf.westeros.org/api.php') - site.language = 'FR' - self.assertEqual(site.language, 'fr') - self.assertEqual(site.api_url, 'https://awoiaf.westeros.org/api.php') + """test changing the language when url will not change""" + site = MediaWikiOverloaded(url="https://awoiaf.westeros.org/api.php") + site.language = "FR" + self.assertEqual(site.language, "fr") + self.assertEqual(site.api_url, "https://awoiaf.westeros.org/api.php") def test_api_version(self): - ''' test api version parsed correctly''' + """test api version parsed correctly""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.api_version, response['api_version']) + self.assertEqual(site.api_version, response["api_version"]) def test_extensions(self): - ''' test parsing extensions correctly ''' + """test parsing extensions correctly""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.extensions, response['extensions']) + self.assertEqual(site.extensions, response["extensions"]) def test_change_api_url(self): - ''' test switching the api url ''' + """test switching the api url""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.api_url, 'https://en.wikipedia.org/w/api.php') - self.assertEqual(site.api_version, response['api_version']) - self.assertEqual(sorted(site.extensions), sorted(response['extensions'])) + self.assertEqual(site.api_url, "https://en.wikipedia.org/w/api.php") + self.assertEqual(site.api_version, response["api_version"]) + self.assertEqual(sorted(site.extensions), sorted(response["extensions"])) - site.set_api_url('https://awoiaf.westeros.org/api.php', lang='en') + site.set_api_url("https://awoiaf.westeros.org/api.php", lang="en") response = site.responses[site.api_url] - self.assertEqual(site.api_url, 'https://awoiaf.westeros.org/api.php') - self.assertEqual(site.api_version, response['api_version']) - self.assertEqual(sorted(site.extensions), sorted(response['extensions'])) + self.assertEqual(site.api_url, "https://awoiaf.westeros.org/api.php") + self.assertEqual(site.api_version, response["api_version"]) + self.assertEqual(sorted(site.extensions), sorted(response["extensions"])) def test_change_api_url_lang(self): - ''' test changing the api url with only language ''' + """test changing the api url with only language""" site = MediaWikiOverloaded() - self.assertEqual(site.api_url, 'https://en.wikipedia.org/w/api.php') - site.set_api_url(lang='fr') - self.assertEqual(site.api_url, 'https://fr.wikipedia.org/w/api.php') - self.assertEqual(site.language, 'fr') + self.assertEqual(site.api_url, "https://en.wikipedia.org/w/api.php") + site.set_api_url(lang="fr") + self.assertEqual(site.api_url, "https://fr.wikipedia.org/w/api.php") + self.assertEqual(site.language, "fr") def test_change_api_url_lang_upper(self): - ''' test changing the api url with only language upper case ''' + """test changing the api url with only language upper case""" site = MediaWikiOverloaded() - self.assertEqual(site.api_url, 'https://en.wikipedia.org/w/api.php') - site.set_api_url(lang='FR') - self.assertEqual(site.api_url, 'https://fr.wikipedia.org/w/api.php') - self.assertEqual(site.language, 'fr') + self.assertEqual(site.api_url, "https://en.wikipedia.org/w/api.php") + site.set_api_url(lang="FR") + self.assertEqual(site.api_url, "https://fr.wikipedia.org/w/api.php") + self.assertEqual(site.language, "fr") def test_change_user_agent(self): - ''' test changing the user agent ''' + """test changing the user agent""" site = MediaWikiOverloaded() - site.user_agent = 'test-user-agent' - self.assertEqual(site.user_agent, 'test-user-agent') + site.user_agent = "test-user-agent" + self.assertEqual(site.user_agent, "test-user-agent") def test_init_user_agent(self): - ''' test initializing the user agent ''' - site = MediaWikiOverloaded(user_agent='test-user-agent') - self.assertEqual(site.user_agent, 'test-user-agent') + """test initializing the user agent""" + site = MediaWikiOverloaded(user_agent="test-user-agent") + self.assertEqual(site.user_agent, "test-user-agent") def test_languages(self): - ''' test pulling wikimedia supported languages ''' + """test pulling wikimedia supported languages""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.supported_languages, response['languages']) + self.assertEqual(site.supported_languages, response["languages"]) def test_rate_limit(self): - ''' test setting rate limiting ''' + """test setting rate limiting""" site = MediaWikiOverloaded() site.rate_limit = True self.assertEqual(site.rate_limit, True) @@ -194,7 +207,7 @@ def test_rate_limit(self): self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=50)) def test_rate_limit_min_wait(self): - ''' test setting rate limiting min wait ''' + """test setting rate limiting min wait""" site = MediaWikiOverloaded() site.rate_limit_min_wait = timedelta(milliseconds=150) self.assertEqual(site.rate_limit, False) @@ -202,7 +215,7 @@ def test_rate_limit_min_wait(self): self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=150)) def test_rate_limit_min_wait_reset(self): - ''' test setting rate limiting ''' + """test setting rate limiting""" site = MediaWikiOverloaded(rate_limit=True) self.assertNotEqual(site._rate_limit_last_call, None) # should be set site.rate_limit_min_wait = timedelta(milliseconds=150) @@ -211,792 +224,796 @@ def test_rate_limit_min_wait_reset(self): self.assertEqual(site.rate_limit_min_wait, timedelta(milliseconds=150)) def test_default_timeout(self): - ''' test default timeout ''' + """test default timeout""" site = MediaWikiOverloaded() self.assertEqual(site.timeout, 15) def test_set_timeout(self): - ''' test setting timeout ''' + """test setting timeout""" site = MediaWikiOverloaded() site.timeout = 30 self.assertEqual(site.timeout, 30) def test_set_timeout_none(self): - ''' test setting timeout to None ''' + """test setting timeout to None""" site = MediaWikiOverloaded() site.timeout = None self.assertEqual(site.timeout, None) def test_set_timeout_bad(self): - ''' test that we raise the ValueError ''' - self.assertRaises(ValueError, - lambda: MediaWikiOverloaded(timeout='foo')) + """test that we raise the ValueError""" + self.assertRaises(ValueError, lambda: MediaWikiOverloaded(timeout="foo")) def test_memoized(self): - ''' test returning the memoized cache ''' + """test returning the memoized cache""" site = MediaWikiOverloaded() self.assertEqual(site.memoized, dict()) def test_memoized_not_empty(self): - ''' test returning the memoized cache; not empty ''' + """test returning the memoized cache; not empty""" site = MediaWikiOverloaded() - site.search('chest set') + site.search("chest set") self.assertNotEqual(site.memoized, dict()) def test_clear_memoized(self): - ''' test clearing the memoized cache ''' + """test clearing the memoized cache""" site = MediaWikiOverloaded() - site.search('chest set') + site.search("chest set") self.assertNotEqual(site.memoized, dict()) site.clear_memoized() self.assertEqual(site.memoized, dict()) def test_no_memoized(self): - ''' test changing the caching of results ''' + """test changing the caching of results""" site = MediaWikiOverloaded() self.assertTrue(site.use_cache) site.use_cache = False self.assertFalse(site.use_cache) - site.search('chest set') + site.search("chest set") self.assertEqual(site.memoized, dict()) site.use_cache = True - site.search('chest set') + site.search("chest set") self.assertNotEqual(site.memoized, dict()) def test_refresh_interval(self): - ''' test not setting refresh interval ''' + """test not setting refresh interval""" site = MediaWikiOverloaded() self.assertEqual(site.refresh_interval, None) def test_refresh_interval_set(self): - ''' test setting refresh interval ''' + """test setting refresh interval""" site = MediaWikiOverloaded() site.refresh_interval = 5 self.assertEqual(site.refresh_interval, 5) def test_refresh_interval_neg(self): - ''' test setting refresh interval to invalid number ''' + """test setting refresh interval to invalid number""" site = MediaWikiOverloaded() site.refresh_interval = -5 self.assertEqual(site.refresh_interval, None) def test_refresh_interval_str(self): - ''' test setting refresh interval to invalid type ''' + """test setting refresh interval to invalid type""" site = MediaWikiOverloaded() site.refresh_interval = "something" self.assertEqual(site.refresh_interval, None) def test_memoized_refresh_no(self): - ''' test refresh interval for memoized cache when too quick ''' + """test refresh interval for memoized cache when too quick""" site = MediaWikiOverloaded() site.refresh_interval = 2 - site.search('chest set') - key1 = list(site.memoized['search'])[0] # get first key - time1 = site.memoized['search'][key1] - site.search('chest set') - key2 = list(site.memoized['search'])[0] # get first key - time2 = site.memoized['search'][key2] + site.search("chest set") + key1 = list(site.memoized["search"])[0] # get first key + time1 = site.memoized["search"][key1] + site.search("chest set") + key2 = list(site.memoized["search"])[0] # get first key + time2 = site.memoized["search"][key2] self.assertEqual(time1, time2) def test_memoized_refresh(self): - ''' test refresh interval for memoized cache ''' + """test refresh interval for memoized cache""" site = MediaWikiOverloaded() site.refresh_interval = 2 - site.search('chest set') - key1 = list(site.memoized['search'])[0] # get first key - time1 = site.memoized['search'][key1] + site.search("chest set") + key1 = list(site.memoized["search"])[0] # get first key + time1 = site.memoized["search"][key1] time.sleep(5) - site.search('chest set') - key2 = list(site.memoized['search'])[0] # get first key - time2 = site.memoized['search'][key2] + site.search("chest set") + key2 = list(site.memoized["search"])[0] # get first key + time2 = site.memoized["search"][key2] self.assertNotEqual(time1, time2) self.assertGreater(time2, time1) def test_cat_prefix(self): - ''' test the default category prefix''' + """test the default category prefix""" site = MediaWikiOverloaded() - self.assertEqual(site.category_prefix, 'Category') + self.assertEqual(site.category_prefix, "Category") def test_cat_prefix_change(self): - ''' test changing the category prefix ''' + """test changing the category prefix""" site = MediaWikiOverloaded() - self.assertEqual(site.category_prefix, 'Category') - site.category_prefix = 'Something:' - self.assertEqual(site.category_prefix, 'Something') + self.assertEqual(site.category_prefix, "Category") + site.category_prefix = "Something:" + self.assertEqual(site.category_prefix, "Something") + class TestMediaWikiLogin(unittest.TestCase): - ''' Test login functionality ''' + """Test login functionality""" def test_successful_login(self): - ''' test login success!''' + """test login success!""" site = MediaWikiOverloaded() - res = site.login('username', 'fakepassword') + res = site.login("username", "fakepassword") self.assertEqual(site.logged_in, True) self.assertEqual(res, True) def test_failed_login(self): - ''' test that login failure throws the correct exception ''' + """test that login failure throws the correct exception""" site = MediaWikiOverloaded() try: - res = site.login('badusername', 'fakepassword') + res = site.login("badusername", "fakepassword") except MediaWikiLoginError as ex: self.assertEqual(site.logged_in, False) - msg = 'MediaWiki login failure: Incorrect username or password entered. Please try again.' + msg = "MediaWiki login failure: Incorrect username or password entered. Please try again." self.assertEqual(ex.error, msg) else: self.assertEqual(True, False) def test_failed_login_no_strict(self): - ''' test that login failure with strict off works ''' + """test that login failure with strict off works""" site = MediaWikiOverloaded() - res = site.login('badusername', 'fakepassword', strict=False) + res = site.login("badusername", "fakepassword", strict=False) self.assertEqual(site.logged_in, False) self.assertEqual(res, False) + class TestMediaWikiRandom(unittest.TestCase): - ''' test Random Functionality ''' + """test Random Functionality""" + def test_random(self): - ''' test pulling random pages ''' + """test pulling random pages""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.random(pages=1), response['random_1']) + self.assertEqual(site.random(pages=1), response["random_1"]) def test_random_2(self): - ''' test pulling random pages ''' + """test pulling random pages""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.random(pages=2), response['random_2']) + self.assertEqual(site.random(pages=2), response["random_2"]) def test_random_10(self): - ''' test pulling random pages ''' + """test pulling random pages""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.random(pages=10), response['random_10']) + self.assertEqual(site.random(pages=10), response["random_10"]) def test_random_202(self): - ''' test pulling 202 random pages ''' + """test pulling 202 random pages""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.random(pages=202), response['random_202']) - msg = ("\nNOTE: This is supposed to be limited to 20 by the API, per " - "the documentation, but it isn't...") + self.assertEqual(site.random(pages=202), response["random_202"]) + msg = "\nNOTE: This is supposed to be limited to 20 by the API, per " "the documentation, but it isn't..." print(msg) - self.assertEqual(len(response['random_202']), 202) # limit to 20 + self.assertEqual(len(response["random_202"]), 202) # limit to 20 def test_random_value_err_msg(self): - ''' test that ValueError message thrown from random''' + """test that ValueError message thrown from random""" site = MediaWikiOverloaded() try: site.random(pages=None) except ValueError as ex: - msg = 'Number of pages must be greater than 0' + msg = "Number of pages must be greater than 0" self.assertEqual(str(ex), msg) def test_random_value_err(self): - ''' test that ValueError is thrown from random''' + """test that ValueError is thrown from random""" site = MediaWikiOverloaded() self.assertRaises(ValueError, lambda: site.random(pages=None)) class TestMediaWikiAllPages(unittest.TestCase): - ''' test Mediawiki AllPages functionality ''' + """test Mediawiki AllPages functionality""" + def test_allpages(self): - ''' test using the all page query ''' + """test using the all page query""" site = MediaWikiOverloaded() - response = site.responses[site.api_url]['all_pages_query_a'] + response = site.responses[site.api_url]["all_pages_query_a"] res = site.allpages("a") self.assertEqual(response, res) def test_allpages_num_results(self): - ''' test using the all page query with a limiting number ''' + """test using the all page query with a limiting number""" site = MediaWikiOverloaded() - response = site.responses[site.api_url]['all_pages_query_a_1'] + response = site.responses[site.api_url]["all_pages_query_a_1"] res = site.allpages("a", results=1) self.assertEqual(response, res) class TestMediaWikiSearch(unittest.TestCase): - ''' test MediaWiki Page Search Functionality ''' + """test MediaWiki Page Search Functionality""" + def test_search_no_sug(self): - ''' test searching without suggestion ''' + """test searching without suggestion""" site = MediaWikiOverloaded() response = site.responses[site.api_url] # test that default is suggestion False - api_url = response['search_without_suggestion'] - sws = response['search_without_suggestion'] - self.assertEqual(site.search('chest set'), api_url) - self.assertEqual(site.search('chest set', suggestion=False), sws) + api_url = response["search_without_suggestion"] + sws = response["search_without_suggestion"] + self.assertEqual(site.search("chest set"), api_url) + self.assertEqual(site.search("chest set", suggestion=False), sws) def test_search_sug_found(self): - ''' test searching with suggestion where found ''' + """test searching with suggestion where found""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - sws = response['search_with_suggestion_found'] - self.assertEqual(list(site.search('chest set', suggestion=True)), sws) + sws = response["search_with_suggestion_found"] + self.assertEqual(list(site.search("chest set", suggestion=True)), sws) def test_search_sug_not_found(self): - ''' test searching with suggestion where not found ''' + """test searching with suggestion where not found""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - ssnf = response['search_with_suggestion_not_found'] - self.assertEqual(list(site.search('chess set', suggestion=True)), ssnf) + ssnf = response["search_with_suggestion_not_found"] + self.assertEqual(list(site.search("chess set", suggestion=True)), ssnf) def test_search_sug_not_found_sm(self): - ''' test searching with small result limit test ''' + """test searching with small result limit test""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.search('chess set', results=3, - suggestion=False), - response['search_with_suggestion_not_found_small']) - num_res = len(response['search_with_suggestion_not_found_small']) + self.assertEqual( + site.search("chess set", results=3, suggestion=False), response["search_with_suggestion_not_found_small"] + ) + num_res = len(response["search_with_suggestion_not_found_small"]) self.assertEqual(num_res, 3) # limit to 500 def test_search_sug_not_found_lg(self): - ''' test searching without suggestion limited to the correct number ''' + """test searching without suggestion limited to the correct number""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.search('chess set', results=505, - suggestion=False), - response['search_with_suggestion_not_found_large']) - num_res = len(response['search_with_suggestion_not_found_large']) + self.assertEqual( + site.search("chess set", results=505, suggestion=False), response["search_with_suggestion_not_found_large"] + ) + num_res = len(response["search_with_suggestion_not_found_large"]) self.assertEqual(num_res, 500) # limit to 500 class TestMediaWikiSuggest(unittest.TestCase): - ''' test the suggest functionality ''' + """test the suggest functionality""" + def test_suggest(self): - ''' test suggest fixes capitalization ''' + """test suggest fixes capitalization""" site = MediaWikiOverloaded() - self.assertEqual(site.suggest('new york'), 'New York') + self.assertEqual(site.suggest("new york"), "New York") def test_suggest_yonkers(self): - ''' test suggest finds page ''' + """test suggest finds page""" site = MediaWikiOverloaded() - self.assertEqual(site.suggest('yonkers'), 'Yonkers, New York') + self.assertEqual(site.suggest("yonkers"), "Yonkers, New York") def test_suggest_no_results(self): - ''' test suggest finds no results ''' + """test suggest finds no results""" site = MediaWikiOverloaded() - self.assertEqual(site.suggest('gobbilygook'), None) + self.assertEqual(site.suggest("gobbilygook"), None) class TestMediaWikiGeoSearch(unittest.TestCase): - ''' test GeoSearch Functionality ''' + """test GeoSearch Functionality""" + def test_geosearch_decimals(self): - ''' test geosearch with Decimals lat / long ''' + """test geosearch with Decimals lat / long""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.geosearch(latitude=Decimal('0.0'), - longitude=Decimal('0.0')), - response['geosearch_decimals']) + self.assertEqual( + site.geosearch(latitude=Decimal("0.0"), longitude=Decimal("0.0")), response["geosearch_decimals"] + ) def test_geosearch_mix_types(self): - ''' test geosearch with mix type lat / long ''' + """test geosearch with mix type lat / long""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.geosearch(latitude=Decimal('0.0'), - longitude='0.0'), - response['geosearch_mix_types']) + self.assertEqual(site.geosearch(latitude=Decimal("0.0"), longitude="0.0"), response["geosearch_mix_types"]) def test_geo_page_inv_lat_long(self): - ''' test geosearch using page with invalid lat / long ''' + """test geosearch using page with invalid lat / long""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - self.assertEqual(site.geosearch(title='new york city', - latitude=Decimal('-9999999999.999'), - longitude=Decimal('0.0'), results=22, - radius=10000), - response['geosearch_page_invalid_lat_long']) + self.assertEqual( + site.geosearch( + title="new york city", + latitude=Decimal("-9999999999.999"), + longitude=Decimal("0.0"), + results=22, + radius=10000, + ), + response["geosearch_page_invalid_lat_long"], + ) def test_geo_page_rad_res_set(self): - ''' test geosearch with radius and result set ''' + """test geosearch with radius and result set""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.geosearch(title='new york city', results=22, radius=10000) - self.assertEqual(res, response['geosearch_page_radius_results_set']) + res = site.geosearch(title="new york city", results=22, radius=10000) + self.assertEqual(res, response["geosearch_page_radius_results_set"]) self.assertEqual(len(res), 22) def test_geo_page_rad_res(self): - ''' test geosearch with radius set ''' + """test geosearch with radius set""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.geosearch(title='new york city', radius=10000) - self.assertEqual(res, response['geosearch_page_radius_results']) + res = site.geosearch(title="new york city", radius=10000) + self.assertEqual(res, response["geosearch_page_radius_results"]) self.assertEqual(len(res), 10) def test_geo_page(self): - ''' test geosearch using just page ''' + """test geosearch using just page""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.geosearch(title='new york city') - self.assertEqual(res, response['geosearch_page']) + res = site.geosearch(title="new york city") + self.assertEqual(res, response["geosearch_page"]) self.assertEqual(len(res), 10) class TestMediaWikiOpenSearch(unittest.TestCase): - ''' test OpenSearch Functionality ''' + """test OpenSearch Functionality""" + def test_opensearch(self): - ''' test opensearch with default values ''' + """test opensearch with default values""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.opensearch('new york') + res = site.opensearch("new york") for i, item in enumerate(res): res[i] = list(item) - self.assertEqual(res, response['opensearch_new_york']) + self.assertEqual(res, response["opensearch_new_york"]) self.assertEqual(len(res), 10) def test_opensearch_result(self): - ''' test opensearch with result set ''' + """test opensearch with result set""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.opensearch('new york', results=5) + res = site.opensearch("new york", results=5) for i, item in enumerate(res): res[i] = list(item) - self.assertEqual(res, response['opensearch_new_york_result']) + self.assertEqual(res, response["opensearch_new_york_result"]) self.assertEqual(len(res), 5) def test_opensearch_redirect(self): - ''' test opensearch with redirect set ''' + """test opensearch with redirect set""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.opensearch('new york', redirect=False) + res = site.opensearch("new york", redirect=False) for i, item in enumerate(res): res[i] = list(item) - self.assertEqual(res, response['opensearch_new_york_redirect']) + self.assertEqual(res, response["opensearch_new_york_redirect"]) self.assertEqual(len(res), 10) def test_opensearch_res_red_set(self): - ''' test opensearch with result and redirect set ''' + """test opensearch with result and redirect set""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.opensearch('new york', results=5, redirect=False) + res = site.opensearch("new york", results=5, redirect=False) for i, item in enumerate(res): res[i] = list(item) - self.assertEqual(res, response['opensearch_new_york_result_redirect']) + self.assertEqual(res, response["opensearch_new_york_result_redirect"]) self.assertEqual(len(res), 5) class TestMediaWikiPrefixSearch(unittest.TestCase): - ''' test PrefixSearch Functionality ''' + """test PrefixSearch Functionality""" + def test_prefix_search(self): - ''' test basic prefix search ''' + """test basic prefix search""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.prefixsearch('ar') - self.assertEqual(res, response['prefixsearch_ar']) + res = site.prefixsearch("ar") + self.assertEqual(res, response["prefixsearch_ar"]) self.assertEqual(len(res), 10) def test_prefix_search_ba(self): - ''' test prefix search results 10 ''' + """test prefix search results 10""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.prefixsearch('ba', results=10) - self.assertEqual(res, response['prefixsearch_ba']) + res = site.prefixsearch("ba", results=10) + self.assertEqual(res, response["prefixsearch_ba"]) self.assertEqual(len(res), 10) def test_prefix_search_5(self): - ''' test prefix search results 5 ''' + """test prefix search results 5""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.prefixsearch('ba', results=5) - self.assertEqual(res, response['prefixsearch_ba_5']) + res = site.prefixsearch("ba", results=5) + self.assertEqual(res, response["prefixsearch_ba_5"]) self.assertEqual(len(res), 5) def test_prefix_search_30(self): - ''' test prefix search results 30 ''' + """test prefix search results 30""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = site.prefixsearch('ba', results=30) - self.assertEqual(res, response['prefixsearch_ba_30']) + res = site.prefixsearch("ba", results=30) + self.assertEqual(res, response["prefixsearch_ba_30"]) self.assertEqual(len(res), 30) class TestMediaWikiSummary(unittest.TestCase): - ''' test the summary functionality ''' + """test the summary functionality""" + def test_summarize_chars(self): - ''' test summarize number chars ''' + """test summarize number chars""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['summarize_chars_50'] - sumr = site.summary('chess', chars=50) + res = response["summarize_chars_50"] + sumr = site.summary("chess", chars=50) self.assertEqual(res, sumr) self.assertEqual(len(res), 54) # add the ellipses def test_summarize_sents(self): - ''' test summarize number sentences ''' + """test summarize number sentences""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['summarize_sent_5'] - sumr = site.summary('chess', sentences=5) + res = response["summarize_sent_5"] + sumr = site.summary("chess", sentences=5) self.assertEqual(res, sumr) # self.assertEqual(len(res), 466) def test_summarize_paragraph(self): - ''' test summarize based on first section ''' + """test summarize based on first section""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['summarize_first_paragraph'] - sumr = site.summary('chess') + res = response["summarize_first_paragraph"] + sumr = site.summary("chess") self.assertEqual(res, sumr) def test_page_summary_chars(self): - ''' test page summarize - chars ''' + """test page summarize - chars""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['summarize_chars_50'] - pag = site.page('chess') + res = response["summarize_chars_50"] + pag = site.page("chess") sumr = pag.summarize(chars=50) self.assertEqual(res, sumr) self.assertEqual(len(res), 54) def test_page_summary_sents(self): - ''' test page summarize - sentences ''' + """test page summarize - sentences""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['summarize_sent_5'] - pag = site.page('chess') + res = response["summarize_sent_5"] + pag = site.page("chess") sumr = pag.summarize(sentences=5) self.assertEqual(res, sumr) # self.assertEqual(len(res), 466) class TestMediaWikiCategoryMembers(unittest.TestCase): - ''' test CategoryMember Functionality ''' + """test CategoryMember Functionality""" + def test_cat_mems_with_subcats(self): - ''' test categorymember with subcategories ''' + """test categorymember with subcategories""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['category_members_with_subcategories'] - ctm = site.categorymembers('Chess', results=15, subcategories=True) + res = response["category_members_with_subcategories"] + ctm = site.categorymembers("Chess", results=15, subcategories=True) self.assertEqual(list(ctm), res) # list since json doesn't keep tuple def test_cat_mems_subcat_default(self): - ''' test categorymember with default subcategories (True) ''' + """test categorymember with default subcategories (True)""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['category_members_with_subcategories'] - self.assertEqual(list(site.categorymembers('Chess', results=15)), res) + res = response["category_members_with_subcategories"] + self.assertEqual(list(site.categorymembers("Chess", results=15)), res) def test_cat_mems_wo_subcats(self): - ''' test categorymember without subcategories ''' + """test categorymember without subcategories""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['category_members_without_subcategories'] - ctm = site.categorymembers('Chess', results=15, subcategories=False) + res = response["category_members_without_subcategories"] + ctm = site.categorymembers("Chess", results=15, subcategories=False) self.assertEqual(list(ctm), res) def test_cat_mems_w_subcats_lim(self): - ''' test categorymember without subcategories limited ''' + """test categorymember without subcategories limited""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['category_members_without_subcategories_5'] - ctm = site.categorymembers('Chess', results=5, subcategories=False) + res = response["category_members_without_subcategories_5"] + ctm = site.categorymembers("Chess", results=5, subcategories=False) self.assertEqual(list(ctm), res) self.assertEqual(len(res), 5) def test_cat_mems_very_large(self): - ''' test category members that is larger than the max allowed ''' + """test category members that is larger than the max allowed""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - res = response['category_members_very_large'] - ctm = site.categorymembers('Disambiguation categories', results=None) + res = response["category_members_very_large"] + ctm = site.categorymembers("Disambiguation categories", results=None) self.assertEqual(list(ctm), res) self.assertEqual(len(res[0]), 0) self.assertEqual(len(res[1]), 1629) # difficult if it changes sizes class TestMediaWikiExceptions(unittest.TestCase): - ''' test MediaWiki Exceptions ''' + """test MediaWiki Exceptions""" + def test_page_error(self): - ''' test that page error is thrown correctly ''' + """test that page error is thrown correctly""" site = MediaWikiOverloaded() - self.assertRaises(PageError, lambda: site.page('gobbilygook')) + self.assertRaises(PageError, lambda: site.page("gobbilygook")) def test_page_error_message(self): - ''' test that page error is thrown correctly ''' + """test that page error is thrown correctly""" site = MediaWikiOverloaded() response = site.responses[site.api_url] try: - site.page('gobbilygook') + site.page("gobbilygook") except PageError as ex: - self.assertEqual(ex.message, response['page_error_msg']) + self.assertEqual(ex.message, response["page_error_msg"]) def test_page_error_pageid(self): - ''' test that page error is thrown correctly pageid''' + """test that page error is thrown correctly pageid""" site = MediaWikiOverloaded() self.assertRaises(PageError, lambda: site.page(pageid=-1)) def test_page_error_title(self): - ''' test that page error is thrown correctly title''' + """test that page error is thrown correctly title""" site = MediaWikiOverloaded() - self.assertRaises(PageError, - lambda: site.page(title='gobbilygook', - auto_suggest=False)) + self.assertRaises(PageError, lambda: site.page(title="gobbilygook", auto_suggest=False)) def test_page_error_title_msg(self): - ''' test that page error is thrown correctly title''' + """test that page error is thrown correctly title""" site = MediaWikiOverloaded() response = site.responses[site.api_url] try: - site.page(title='gobbilygook', auto_suggest=False) + site.page(title="gobbilygook", auto_suggest=False) except PageError as ex: - self.assertEqual(ex.message, response['page_error_msg_title']) + self.assertEqual(ex.message, response["page_error_msg_title"]) def test_page_error_message_pageid(self): - ''' test that page error is thrown correctly ''' + """test that page error is thrown correctly""" site = MediaWikiOverloaded() response = site.responses[site.api_url] try: site.page(pageid=-1) except PageError as ex: - self.assertEqual(ex.message, response['page_error_msg_pageid']) + self.assertEqual(ex.message, response["page_error_msg_pageid"]) def test_page_error_none_message(self): - ''' test if neither pageid or title is present ''' + """test if neither pageid or title is present""" try: raise PageError(pageid=None, title=None) except PageError as ex: - msg = ('"{0}" does not match any pages. Try another ' - 'query!').format('') + msg = ('"{0}" does not match any pages. Try another ' "query!").format("") self.assertEqual(ex.message, msg) def test_redirect_error(self): - ''' test that redirect error is thrown correctly ''' - site = MediaWikiOverloaded(url='https://awoiaf.westeros.org/api.php') - self.assertRaises(RedirectError, - lambda: site.page('arya', auto_suggest=False, - redirect=False)) + """test that redirect error is thrown correctly""" + site = MediaWikiOverloaded(url="https://awoiaf.westeros.org/api.php") + self.assertRaises(RedirectError, lambda: site.page("arya", auto_suggest=False, redirect=False)) def test_redirect_error_msg(self): - ''' test that redirect error is thrown correctly ''' - site = MediaWikiOverloaded(url='https://awoiaf.westeros.org/api.php') + """test that redirect error is thrown correctly""" + site = MediaWikiOverloaded(url="https://awoiaf.westeros.org/api.php") response = site.responses[site.api_url] try: - site.page('arya', auto_suggest=False, redirect=False) + site.page("arya", auto_suggest=False, redirect=False) except RedirectError as ex: - self.assertEqual(ex.message, response['redirect_error_msg']) + self.assertEqual(ex.message, response["redirect_error_msg"]) def test_disambiguation_error(self): - ''' test that disambiguation error is thrown correctly ''' + """test that disambiguation error is thrown correctly""" site = MediaWikiOverloaded() - self.assertRaises(DisambiguationError, lambda: site.page('bush')) + self.assertRaises(DisambiguationError, lambda: site.page("bush")) def test_disambiguation_error_msg(self): - ''' test that disambiguation error is thrown correctly ''' + """test that disambiguation error is thrown correctly""" site = MediaWikiOverloaded() response = site.responses[site.api_url] try: - site.page('bush') + site.page("bush") except DisambiguationError as ex: - self.assertEqual(ex.message, response['disambiguation_error_msg']) - self.assertEqual(ex.title, 'Bush') - self.assertEqual(ex.url, 'https://en.wikipedia.org/wiki/Bush') + self.assertEqual(ex.message, response["disambiguation_error_msg"]) + self.assertEqual(ex.title, "Bush") + self.assertEqual(ex.url, "https://en.wikipedia.org/wiki/Bush") def test_disamb_error_msg_w_empty(self): - ''' test that disambiguation error is thrown correctly and no - IndexError is thrown ''' + """test that disambiguation error is thrown correctly and no + IndexError is thrown""" site = MediaWikiOverloaded() response = site.responses[site.api_url] try: - site.page('Oasis') + site.page("Oasis") except DisambiguationError as ex: - self.assertEqual(ex.message, - response['disambiguation_error_msg_with_empty']) + self.assertEqual(ex.message, response["disambiguation_error_msg_with_empty"]) def test_geocoord_error(self): - ''' test geocoord error thrown ''' + """test geocoord error thrown""" site = MediaWikiOverloaded() - invalid = Decimal('-9999999999.999') - self.assertRaises(MediaWikiGeoCoordError, - lambda: site.geosearch(latitude=invalid, - longitude=Decimal('0.0'), - results=22, radius=10000)) + invalid = Decimal("-9999999999.999") + self.assertRaises( + MediaWikiGeoCoordError, + lambda: site.geosearch(latitude=invalid, longitude=Decimal("0.0"), results=22, radius=10000), + ) def test_geocoord_error_msg(self): - ''' test that the error geo error message is correct ''' + """test that the error geo error message is correct""" site = MediaWikiOverloaded() response = site.responses[site.api_url] try: - site.geosearch(latitude=Decimal('-9999999999.999'), - longitude=Decimal('0.0'), results=22, radius=10000) + site.geosearch(latitude=Decimal("-9999999999.999"), longitude=Decimal("0.0"), results=22, radius=10000) except MediaWikiGeoCoordError as ex: - self.assertEqual(ex.message, response['invalid_lat_long_geo_msg']) + self.assertEqual(ex.message, response["invalid_lat_long_geo_msg"]) def test_geocoord_value_error(self): - ''' test value error being thrown correctly ''' + """test value error being thrown correctly""" site = MediaWikiOverloaded() - self.assertRaises(ValueError, - lambda: site.geosearch(latitude=None, - longitude=Decimal('0.0'), - results=22, radius=10000)) + self.assertRaises( + ValueError, lambda: site.geosearch(latitude=None, longitude=Decimal("0.0"), results=22, radius=10000) + ) def test_geocoord_value_error_msg(self): - ''' test that the error value error message is correct ''' + """test that the error value error message is correct""" site = MediaWikiOverloaded() response = site.responses[site.api_url] try: - site.geosearch(latitude=None, longitude=Decimal('0.0'), - results=22, radius=10000) + site.geosearch(latitude=None, longitude=Decimal("0.0"), results=22, radius=10000) except ValueError as ex: - self.assertEqual(str(ex), response['invalid_lat_long_value_msg']) + self.assertEqual(str(ex), response["invalid_lat_long_value_msg"]) def test_api_url_error(self): - ''' test changing api url to invalid throws exception ''' + """test changing api url to invalid throws exception""" site = MediaWikiOverloaded() - url = 'https://french.wikipedia.org/w/api.php' - self.assertRaises(MediaWikiAPIURLError, - lambda: site.set_api_url(api_url=url, lang='fr')) + url = "https://french.wikipedia.org/w/api.php" + self.assertRaises(MediaWikiAPIURLError, lambda: site.set_api_url(api_url=url, lang="fr")) def test_api_url_error_msg(self): - ''' test api url error message on set ''' + """test api url error message on set""" site = MediaWikiOverloaded() - url = 'https://french.wikipedia.org/w/api.php' + url = "https://french.wikipedia.org/w/api.php" try: - site.set_api_url(api_url=url, lang='fr') + site.set_api_url(api_url=url, lang="fr") except MediaWikiAPIURLError as ex: response = site.responses[site.api_url] - self.assertEqual(ex.message, response['api_url_error_msg']) + self.assertEqual(ex.message, response["api_url_error_msg"]) def test_api_url_on_init_error(self): - ''' test api url error on init ''' - url = 'https://french.wikipedia.org/w/api.php' - self.assertRaises(MediaWikiAPIURLError, - lambda: MediaWikiOverloaded(url=url, lang='fr')) + """test api url error on init""" + url = "https://french.wikipedia.org/w/api.php" + self.assertRaises(MediaWikiAPIURLError, lambda: MediaWikiOverloaded(url=url, lang="fr")) def test_api_url_on_init_error_msg(self): - ''' test api url error message on init ''' + """test api url error message on init""" site = MediaWikiOverloaded() # something to use to lookup results - url = 'https://french.wikipedia.org/w/api.php' + url = "https://french.wikipedia.org/w/api.php" try: - MediaWikiOverloaded(url=url, lang='fr') + MediaWikiOverloaded(url=url, lang="fr") except MediaWikiAPIURLError as ex: response = site.responses[site.api_url] - self.assertEqual(ex.message, response['api_url_error_msg']) + self.assertEqual(ex.message, response["api_url_error_msg"]) def test_api_url_on_error_reset(self): - ''' test api url error resets to original URL ''' + """test api url error resets to original URL""" site = MediaWikiOverloaded() # something to use to lookup results - url = 'https://french.wikipedia.org/w/api.php' - wiki = 'https://en.wikipedia.org/w/api.php' + url = "https://french.wikipedia.org/w/api.php" + wiki = "https://en.wikipedia.org/w/api.php" try: - MediaWikiOverloaded(url=url, lang='fr') + MediaWikiOverloaded(url=url, lang="fr") except MediaWikiAPIURLError: self.assertNotEqual(site.api_url, url) self.assertEqual(site.api_url, wiki) def test_http_timeout_msg(self): - ''' test the http timeout message ''' - query = 'gobbilygook' + """test the http timeout message""" + query = "gobbilygook" try: raise HTTPTimeoutError(query) except HTTPTimeoutError as ex: - msg = ('Searching for "{0}" resulted in a timeout. Try ' - 'again in a few seconds, and ensure you have rate ' - 'limiting set to True.').format(query) + msg = ( + 'Searching for "{0}" resulted in a timeout. Try ' + "again in a few seconds, and ensure you have rate " + "limiting set to True." + ).format(query) self.assertEqual(ex.message, msg) def test_http_mediawiki_error_msg(self): - ''' test the mediawiki error message ''' - error = 'Unknown Error' + """test the mediawiki error message""" + error = "Unknown Error" try: raise HTTPTimeoutError(error) except HTTPTimeoutError as ex: - msg = ('Searching for "{0}" resulted in a timeout. Try ' - 'again in a few seconds, and ensure you have rate ' - 'limiting set to True.').format(error) + msg = ( + 'Searching for "{0}" resulted in a timeout. Try ' + "again in a few seconds, and ensure you have rate " + "limiting set to True." + ).format(error) self.assertEqual(ex.message, msg) def test_mediawiki_exception(self): - ''' test throwing a MediaWikiBaseException ''' + """test throwing a MediaWikiBaseException""" + def func(): - ''' test function ''' - raise MediaWikiException('new except!') - self.assertRaises(MediaWikiException, - func) + """test function""" + raise MediaWikiException("new except!") + + self.assertRaises(MediaWikiException, func) def test_mediawiki_exception_msg(self): - ''' test that base msg is retained ''' - error = 'Unknown Error' + """test that base msg is retained""" + error = "Unknown Error" try: raise MediaWikiException(error) except MediaWikiException as ex: - msg = ('An unknown error occurred: "{0}". Please report ' - 'it on GitHub!').format(error) + msg = ('An unknown error occurred: "{0}". Please report ' "it on GitHub!").format(error) self.assertEqual(ex.message, msg) def test_mediawiki_except_msg_str(self): - ''' test that base msg is retained ''' - error = 'Unknown Error' + """test that base msg is retained""" + error = "Unknown Error" try: raise MediaWikiException(error) except MediaWikiException as ex: - msg = ('An unknown error occurred: "{0}". Please report ' - 'it on GitHub!').format(error) + msg = ('An unknown error occurred: "{0}". Please report ' "it on GitHub!").format(error) self.assertEqual(str(ex), msg) def test_check_err_res_http_msg(self): - ''' test check query by throwing specific errors ''' + """test check query by throwing specific errors""" site = MediaWikiOverloaded() response = dict() - response['error'] = dict() - response['error']['info'] = 'HTTP request timed out.' - query = 'something' + response["error"] = dict() + response["error"]["info"] = "HTTP request timed out." + query = "something" try: site._check_error_response(response, query) except HTTPTimeoutError as ex: - msg = ('Searching for "{0}" resulted in a timeout. Try ' - 'again in a few seconds, and ensure you have rate ' - 'limiting set to True.').format(query) + msg = ( + 'Searching for "{0}" resulted in a timeout. Try ' + "again in a few seconds, and ensure you have rate " + "limiting set to True." + ).format(query) self.assertEqual(str(ex), msg) def test_check_err_res_http(self): - ''' test check query by throwing specific errors ''' + """test check query by throwing specific errors""" site = MediaWikiOverloaded() response = dict() - response['error'] = dict() - response['error']['info'] = 'HTTP request timed out.' - query = 'something' - self.assertRaises(HTTPTimeoutError, - lambda: site._check_error_response(response, query)) + response["error"] = dict() + response["error"]["info"] = "HTTP request timed out." + query = "something" + self.assertRaises(HTTPTimeoutError, lambda: site._check_error_response(response, query)) def test_check_er_res_media_msg(self): - ''' test check query by throwing specific error message ; mediawiki ''' + """test check query by throwing specific error message ; mediawiki""" site = MediaWikiOverloaded() response = dict() - response['error'] = dict() - response['error']['info'] = 'blah blah' - query = 'something' + response["error"] = dict() + response["error"]["info"] = "blah blah" + query = "something" try: site._check_error_response(response, query) except MediaWikiException as ex: - msg = ('An unknown error occurred: "{0}". Please report ' - 'it on GitHub!').format(response['error']['info']) + msg = ('An unknown error occurred: "{0}". Please report ' "it on GitHub!").format(response["error"]["info"]) self.assertEqual(str(ex), msg) def test_check_err_res_media(self): - ''' test check query by throwing specific errors; mediawiki ''' + """test check query by throwing specific errors; mediawiki""" site = MediaWikiOverloaded() response = dict() - response['error'] = dict() - response['error']['info'] = 'blah blah' - query = 'something' - self.assertRaises(MediaWikiException, - lambda: site._check_error_response(response, query)) + response["error"] = dict() + response["error"]["info"] = "blah blah" + query = "something" + self.assertRaises(MediaWikiException, lambda: site._check_error_response(response, query)) def test_check_query_err(self): - ''' test _check_query value error ''' + """test _check_query value error""" site = MediaWikiOverloaded() query = None - msg = 'Query must be specified' + msg = "Query must be specified" self.assertRaises(ValueError, lambda: site._check_query(query, msg)) def test_check_query_err_msg(self): - ''' test _check_query value error message ''' + """test _check_query value error message""" site = MediaWikiOverloaded() query = None - msg = 'Query must be specified' + msg = "Query must be specified" try: site._check_query(query, msg) except ValueError as ex: @@ -1004,156 +1021,148 @@ def test_check_query_err_msg(self): class TestMediaWikiRequests(unittest.TestCase): - ''' test the actual wiki_request ''' + """test the actual wiki_request""" + def test_wiki_request(self): - ''' test wiki request by testing the timing.... ''' + """test wiki request by testing the timing....""" site = MediaWikiOverloaded() # self.assertEqual(site._rate_limit_last_call, None) site.rate_limit = True site.rate_limit_min_wait = timedelta(seconds=2) - site.search('chest set') + site.search("chest set") start_time = site._rate_limit_last_call - site.opensearch('new york') - site.prefixsearch('ar') + site.opensearch("new york") + site.prefixsearch("ar") end_time = site._rate_limit_last_call self.assertGreater(end_time - start_time, timedelta(seconds=2)) self.assertNotEqual(site._rate_limit_last_call, None) class TestMediaWikiPage(unittest.TestCase): - ''' test MediaWiki Pages ''' + """test MediaWiki Pages""" + def setUp(self): - ''' single function for all the tests (well most of) ''' - api_url = 'https://awoiaf.westeros.org/api.php' + """single function for all the tests (well most of)""" + api_url = "https://awoiaf.westeros.org/api.php" self.site = MediaWikiOverloaded(url=api_url) self.response = self.site.responses[self.site.api_url] - self.pag = self.site.page('arya') + self.pag = self.site.page("arya") def test_call_directly(self): - ''' test calling MediaWikiPage directly ''' + """test calling MediaWikiPage directly""" page = MediaWikiPage(self.site, title="arya") - self.assertEqual(page.title, self.response['arya']['title']) + self.assertEqual(page.title, self.response["arya"]["title"]) def test_call_directly_error(self): - ''' test calling MediaWikiPage directly with error message''' + """test calling MediaWikiPage directly with error message""" try: MediaWikiPage(self.site) except ValueError as ex: - msg = 'Either a title or a pageid must be specified' + msg = "Either a title or a pageid must be specified" self.assertEqual(str(ex), msg) def test_page_value_err(self): - ''' test that ValueError is thrown when error calling mediawikipage - directly ''' + """test that ValueError is thrown when error calling mediawikipage + directly""" self.assertRaises(ValueError, lambda: MediaWikiPage(self.site)) def test_page_value_err_msg(self): - ''' test that ValueError message thrown from random''' + """test that ValueError message thrown from random""" site = MediaWikiOverloaded() try: site.page() except ValueError as ex: - msg = 'Either a title or a pageid must be specified' + msg = "Either a title or a pageid must be specified" self.assertEqual(str(ex), msg) def test_page_value_err_none(self): - ''' test that ValueError is thrown from None ''' + """test that ValueError is thrown from None""" site = MediaWikiOverloaded() self.assertRaises(ValueError, lambda: site.page(None)) def test_page_title(self): - ''' test a page title ''' - self.assertEqual(self.pag.title, self.response['arya']['title']) + """test a page title""" + self.assertEqual(self.pag.title, self.response["arya"]["title"]) def test_page_pageid(self): - ''' test a page pageid ''' - self.assertEqual(self.pag.pageid, self.response['arya']['pageid']) + """test a page pageid""" + self.assertEqual(self.pag.pageid, self.response["arya"]["pageid"]) def test_page_url(self): - ''' test a page url ''' - self.assertEqual(self.pag.url, self.response['arya']['url']) + """test a page url""" + self.assertEqual(self.pag.url, self.response["arya"]["url"]) def test_page_backlinks(self): - ''' test a page backlinks ''' - self.assertEqual(self.pag.backlinks, - self.response['arya']['backlinks']) + """test a page backlinks""" + self.assertEqual(self.pag.backlinks, self.response["arya"]["backlinks"]) def test_page_images(self): - ''' test a page imsages ''' - self.assertEqual(self.pag.images, self.response['arya']['images']) + """test a page imsages""" + self.assertEqual(self.pag.images, self.response["arya"]["images"]) def test_page_redirects(self): - ''' test a page redirects ''' - self.assertEqual(self.pag.redirects, - self.response['arya']['redirects']) + """test a page redirects""" + self.assertEqual(self.pag.redirects, self.response["arya"]["redirects"]) def test_page_links(self): - ''' test a page links ''' - self.assertEqual(self.pag.links, self.response['arya']['links']) + """test a page links""" + self.assertEqual(self.pag.links, self.response["arya"]["links"]) def test_page_categories(self): - ''' test a page categories ''' - self.assertEqual(self.pag.categories, - self.response['arya']['categories']) + """test a page categories""" + self.assertEqual(self.pag.categories, self.response["arya"]["categories"]) def test_page_references(self): - ''' test a page references ''' - self.assertEqual(self.pag.references, - self.response['arya']['references']) + """test a page references""" + self.assertEqual(self.pag.references, self.response["arya"]["references"]) def test_page_references_no_http(self): - ''' test a page references with mixed http ''' + """test a page references with mixed http""" site = MediaWikiOverloaded() - page = site.page('Minneapolis') - response = site.responses[site.api_url]['references_without_http'] + page = site.page("Minneapolis") + response = site.responses[site.api_url]["references_without_http"] self.assertEqual(page.references, response) def test_page_content(self): - ''' test a page content ''' - self.assertEqual(self.pag.content, - self.response['arya']['content']) + """test a page content""" + self.assertEqual(self.pag.content, self.response["arya"]["content"]) def test_page_parent_id(self): - ''' test a page parent_id ''' - self.assertEqual(self.pag.parent_id, - self.response['arya']['parent_id']) + """test a page parent_id""" + self.assertEqual(self.pag.parent_id, self.response["arya"]["parent_id"]) def test_page_revision_id(self): - ''' test a page revision_id ''' - self.assertEqual(self.pag.revision_id, - self.response['arya']['revision_id']) + """test a page revision_id""" + self.assertEqual(self.pag.revision_id, self.response["arya"]["revision_id"]) def test_page_coordinates_none(self): - ''' test a page coordinates none ''' - self.assertEqual(self.pag.coordinates, - self.response['arya']['coordinates']) + """test a page coordinates none""" + self.assertEqual(self.pag.coordinates, self.response["arya"]["coordinates"]) def test_page_coordinates(self): - ''' test a page coordinates where found ''' + """test a page coordinates where found""" site = MediaWikiOverloaded() response = site.responses[site.api_url] - pag = site.page('Washington Monument') + pag = site.page("Washington Monument") coords = pag.coordinates - self.assertEqual([str(coords[0]), str(coords[1])], - response['wash_mon']) + self.assertEqual([str(coords[0]), str(coords[1])], response["wash_mon"]) def test_page_langlinks(self): - ''' test a page language links property ''' + """test a page language links property""" site = MediaWikiOverloaded() - response = site.responses[site.api_url]['nobel_chemistry']['langlinks'] - pag = site.page('Nobel Prize in Chemistry') + response = site.responses[site.api_url]["nobel_chemistry"]["langlinks"] + pag = site.page("Nobel Prize in Chemistry") self.assertEqual(pag.langlinks, response) def test_page_sections(self): - ''' test a page sections ''' - self.assertEqual(self.pag.sections, - self.response['arya']['sections']) + """test a page sections""" + self.assertEqual(self.pag.sections, self.response["arya"]["sections"]) def test_table_of_contents(self): - ''' test a page table of contents ''' + """test a page table of contents""" def _flatten_toc(_dict, res): - ''' flatten the table of contents into a list ''' + """flatten the table of contents into a list""" for key, val in _dict.items(): res.append(key) if val.keys(): @@ -1162,506 +1171,504 @@ def _flatten_toc(_dict, res): toc = self.pag.table_of_contents toc_ord = list() _flatten_toc(toc, toc_ord) - self.assertEqual(toc_ord, self.response['arya']['sections']) + self.assertEqual(toc_ord, self.response["arya"]["sections"]) def test_page_section(self): - ''' test a page returning a section ''' - self.assertEqual(self.pag.section('A Game of Thrones'), - self.response['arya']['section_a_game_of_thrones']) + """test a page returning a section""" + self.assertEqual(self.pag.section("A Game of Thrones"), self.response["arya"]["section_a_game_of_thrones"]) def test_page_section_header(self): - ''' test a page returning the section header ''' + """test a page returning the section header""" res = self.pag.section(None) print(res) - self.assertEqual(self.pag.section(None), - self.response['arya']['section_a_game_of_thrones']) + self.assertEqual(self.pag.section(None), self.response["arya"]["section_a_game_of_thrones"]) def test_page_last_section(self): - ''' test a page returning the last section ''' - self.assertEqual(self.pag.section('External links'), - self.response['arya']['last_section']) + """test a page returning the last section""" + self.assertEqual(self.pag.section("External links"), self.response["arya"]["last_section"]) def test_page_single_section(self): - ''' test a page returning the last section ''' - pag = self.site.page('Castos') - self.assertEqual(pag.section('References and Notes'), - self.response['castos']['section']) + """test a page returning the last section""" + pag = self.site.page("Castos") + self.assertEqual(pag.section("References and Notes"), self.response["castos"]["section"]) def test_page_invalid_section(self): - ''' test a page invalid section ''' - self.assertEqual(self.pag.section('gobbilygook'), None) + """test a page invalid section""" + self.assertEqual(self.pag.section("gobbilygook"), None) def test_page_summary(self): - ''' test page summary ''' - self.assertEqual(self.pag.summary, self.response['arya']['summary']) + """test page summary""" + self.assertEqual(self.pag.summary, self.response["arya"]["summary"]) def test_page_html(self): - ''' test page html ''' - self.assertEqual(self.pag.html, self.response['arya']['html']) + """test page html""" + self.assertEqual(self.pag.html, self.response["arya"]["html"]) def test_page_str(self): - ''' test page string representation ''' - self.assertEqual(str(self.pag), '''''') + """test page string representation""" + self.assertEqual(str(self.pag), """""") def test_page_repr(self): - ''' test page repr without unicode ''' - self.assertEqual(repr(self.pag), '''''') + """test page repr without unicode""" + self.assertEqual(repr(self.pag), """""") def test_page_unicode(self): - ''' test with unicode representation ''' + """test with unicode representation""" site = MediaWikiOverloaded() - page = site.page('Jacques Léonard Muller') - self.assertEqual(str(page), - '''''') + page = site.page("Jacques Léonard Muller") + self.assertEqual(str(page), """""") def test_page_repr_2(self): - ''' test page string representation ''' + """test page string representation""" site = MediaWikiOverloaded() - page = site.page('Jacques Léonard Muller') - name = '''''' + page = site.page("Jacques Léonard Muller") + name = """""" res = repr(page) self.assertEqual(res, name) def test_page_eq(self): - ''' test page equality ''' - tmp = self.site.page('arya') + """test page equality""" + tmp = self.site.page("arya") self.assertEqual(self.pag == tmp, True) def test_page_redirect(self): - ''' test page redirect ''' - tmp = self.site.page('arya', auto_suggest=False) + """test page redirect""" + tmp = self.site.page("arya", auto_suggest=False) self.assertEqual(self.pag == tmp, True) def test_page_redirect_pageid(self): - ''' test page redirect from page id ''' + """test page redirect from page id""" site = MediaWikiOverloaded() pag = site.page(pageid=24337758, auto_suggest=False) self.assertEqual(str(pag), "") self.assertEqual(int(pag.pageid), 4079) - self.assertEqual(pag.title, 'BPP (complexity)') + self.assertEqual(pag.title, "BPP (complexity)") def test_page_neq(self): - ''' test page inequality ''' - tmp = self.site.page('jon snow') + """test page inequality""" + tmp = self.site.page("jon snow") self.assertEqual(self.pag == tmp, False) self.assertEqual(self.pag != tmp, True) def test_page_neq_attr_err(self): - ''' test page inequality by AttributeError ''' - tmp = self.site.page('arya') - delattr(tmp, 'pageid') + """test page inequality by AttributeError""" + tmp = self.site.page("arya") + delattr(tmp, "pageid") self.assertEqual(self.pag != tmp, True) def test_page_preload(self): - ''' test preload of page properties ''' - pag = self.site.page('arya', preload=True) - self.assertNotEqual(getattr(pag, '_content'), None) - self.assertNotEqual(getattr(pag, '_summary'), None) - self.assertNotEqual(getattr(pag, '_images'), None) - self.assertNotEqual(getattr(pag, '_references'), None) - self.assertNotEqual(getattr(pag, '_links'), None) - self.assertNotEqual(getattr(pag, '_sections'), None) - self.assertNotEqual(getattr(pag, '_redirects'), None) - self.assertNotEqual(getattr(pag, '_coordinates'), False) - self.assertNotEqual(getattr(pag, '_backlinks'), None) - self.assertNotEqual(getattr(pag, '_categories'), None) + """test preload of page properties""" + pag = self.site.page("arya", preload=True) + self.assertNotEqual(getattr(pag, "_content"), None) + self.assertNotEqual(getattr(pag, "_summary"), None) + self.assertNotEqual(getattr(pag, "_images"), None) + self.assertNotEqual(getattr(pag, "_references"), None) + self.assertNotEqual(getattr(pag, "_links"), None) + self.assertNotEqual(getattr(pag, "_sections"), None) + self.assertNotEqual(getattr(pag, "_redirects"), None) + self.assertNotEqual(getattr(pag, "_coordinates"), False) + self.assertNotEqual(getattr(pag, "_backlinks"), None) + self.assertNotEqual(getattr(pag, "_categories"), None) def test_page_no_preload(self): - ''' test page properties that are not set ''' - pag = self.site.page('arya', preload=False) - self.assertEqual(getattr(pag, '_content'), None) - self.assertEqual(getattr(pag, '_summary'), None) - self.assertEqual(getattr(pag, '_images'), None) - self.assertEqual(getattr(pag, '_references'), None) - self.assertEqual(getattr(pag, '_links'), None) - self.assertEqual(getattr(pag, '_sections'), None) - self.assertEqual(getattr(pag, '_redirects'), None) - self.assertEqual(getattr(pag, '_coordinates'), False) - self.assertEqual(getattr(pag, '_backlinks'), None) - self.assertEqual(getattr(pag, '_categories'), None) + """test page properties that are not set""" + pag = self.site.page("arya", preload=False) + self.assertEqual(getattr(pag, "_content"), None) + self.assertEqual(getattr(pag, "_summary"), None) + self.assertEqual(getattr(pag, "_images"), None) + self.assertEqual(getattr(pag, "_references"), None) + self.assertEqual(getattr(pag, "_links"), None) + self.assertEqual(getattr(pag, "_sections"), None) + self.assertEqual(getattr(pag, "_redirects"), None) + self.assertEqual(getattr(pag, "_coordinates"), False) + self.assertEqual(getattr(pag, "_backlinks"), None) + self.assertEqual(getattr(pag, "_categories"), None) def test_full_sections_large(self): - ''' test parsing a set of sections - large ''' + """test parsing a set of sections - large""" wiki = MediaWikiOverloaded() - pg = wiki.page('New York City') + pg = wiki.page("New York City") response = wiki.responses[wiki.api_url] - self.assertEqual(pg.sections, response['new_york_city_sections']) + self.assertEqual(pg.sections, response["new_york_city_sections"]) def test_table_of_contents_large(self): - ''' test a page table of contents for nested TOC - large''' + """test a page table of contents for nested TOC - large""" def _flatten_toc(_dict, res): - ''' flatten the table of contents into a list ''' + """flatten the table of contents into a list""" for key, val in _dict.items(): res.append(key) if val.keys(): _flatten_toc(val, res) + wiki = MediaWikiOverloaded() response = wiki.responses[wiki.api_url] - pg = wiki.page('New York City') + pg = wiki.page("New York City") toc = pg.table_of_contents toc_ord = list() _flatten_toc(toc, toc_ord) - self.assertEqual(toc_ord, response['new_york_city_sections']) + self.assertEqual(toc_ord, response["new_york_city_sections"]) def test_page_section_large(self): - ''' test a page returning a section - large ''' + """test a page returning a section - large""" wiki = MediaWikiOverloaded() response = wiki.responses[wiki.api_url] - pg = wiki.page('New York City') - self.assertEqual(pg.section('Air quality'), response['new_york_city_air_quality']) + pg = wiki.page("New York City") + self.assertEqual(pg.section("Air quality"), response["new_york_city_air_quality"]) def test_page_section_header(self): - ''' test a page returning a section - header ''' + """test a page returning a section - header""" wiki = MediaWikiOverloaded() response = wiki.responses[wiki.api_url] - pg = wiki.page('New York City') - self.assertEqual(pg.section(None), response['new_york_city_none']) + pg = wiki.page("New York City") + self.assertEqual(pg.section(None), response["new_york_city_none"]) def test_page_last_section_large(self): - ''' test a page returning the last section - large ''' + """test a page returning the last section - large""" wiki = MediaWikiOverloaded() response = wiki.responses[wiki.api_url] - pg = wiki.page('New York City') - self.assertEqual(pg.section('External links'), response['new_york_city_last_sec']) + pg = wiki.page("New York City") + self.assertEqual(pg.section("External links"), response["new_york_city_last_sec"]) def test_page_wikitext(self): - ''' test wikitext ''' + """test wikitext""" wiki = MediaWikiOverloaded() response = wiki.responses[wiki.api_url] pg = wiki.page(pageid=24337758) - self.assertEqual(pg.wikitext, response['bpp-complexity_wikitext']) + self.assertEqual(pg.wikitext, response["bpp-complexity_wikitext"]) class TestMediaWikiCategoryTree(unittest.TestCase): - ''' test the category tree functionality ''' + """test the category tree functionality""" def test_double_category_tree(self): - ''' test category tree using a list ''' + """test category tree using a list""" site = MediaWikiOverloaded() - with open(site.tree_path, 'r') as fpt: + with open(site.tree_path, "r") as fpt: res = json.load(fpt) - cat = site.categorytree(['Chess', 'Ebola'], depth=None) + cat = site.categorytree(["Chess", "Ebola"], depth=None) self.assertEqual(cat, res) def test_triple_category_tree_none(self): - ''' test category tree using a list but one is blank or None ''' + """test category tree using a list but one is blank or None""" site = MediaWikiOverloaded() - with open(site.tree_path, 'r') as fpt: + with open(site.tree_path, "r") as fpt: res = json.load(fpt) - cat = site.categorytree(['Chess', 'Ebola', None], depth=None) + cat = site.categorytree(["Chess", "Ebola", None], depth=None) self.assertEqual(cat, res) def test_triple_category_tree_bnk(self): - ''' test category tree using a list but one is blank or None ''' + """test category tree using a list but one is blank or None""" site = MediaWikiOverloaded() - with open(site.tree_path, 'r') as fpt: + with open(site.tree_path, "r") as fpt: res = json.load(fpt) - cat = site.categorytree(['Chess', 'Ebola', ''], depth=None) + cat = site.categorytree(["Chess", "Ebola", ""], depth=None) self.assertEqual(cat, res) def test_single_category_tree_list(self): - ''' test category tree using a list with one element ''' + """test category tree using a list with one element""" site = MediaWikiOverloaded() - with open(site.tree_path, 'r') as fpt: + with open(site.tree_path, "r") as fpt: res = json.load(fpt) - cat = site.categorytree(['Chess'], depth=None) - self.assertEqual(cat['Chess'], res['Chess']) + cat = site.categorytree(["Chess"], depth=None) + self.assertEqual(cat["Chess"], res["Chess"]) def test_single_category_tree_str(self): - ''' test category tree using a string ''' + """test category tree using a string""" site = MediaWikiOverloaded() - with open(site.tree_path, 'r') as fpt: + with open(site.tree_path, "r") as fpt: res = json.load(fpt) - cat = site.categorytree('Ebola', depth=None) - self.assertEqual(cat['Ebola'], res['Ebola']) + cat = site.categorytree("Ebola", depth=None) + self.assertEqual(cat["Ebola"], res["Ebola"]) def test_category_tree_valerror_1(self): - ''' test category provided None throws error ''' + """test category provided None throws error""" site = MediaWikiOverloaded() - self.assertRaises(ValueError, - lambda: site.categorytree(None, depth=None)) + self.assertRaises(ValueError, lambda: site.categorytree(None, depth=None)) def test_cattree_error_msg_1(self): - ''' test that ValueError message when None passed as category ''' + """test that ValueError message when None passed as category""" site = MediaWikiOverloaded() category = None try: site.categorytree(category, depth=None) except ValueError as ex: - msg = ("CategoryTree: Parameter 'category' must either " - "be a list of one or more categories or a string; " - "provided: '{}'".format(category)) + msg = ( + "CategoryTree: Parameter 'category' must either " + "be a list of one or more categories or a string; " + "provided: '{}'".format(category) + ) self.assertEqual(str(ex), msg) def test_category_tree_valerror_2(self): - ''' test category provided empty str throws error ''' + """test category provided empty str throws error""" site = MediaWikiOverloaded() - self.assertRaises(ValueError, - lambda: site.categorytree('', depth=None)) + self.assertRaises(ValueError, lambda: site.categorytree("", depth=None)) def test_cattree_error_msg_2(self): - ''' test that ValueError message when '' passed as category: 2 ''' + """test that ValueError message when '' passed as category: 2""" site = MediaWikiOverloaded() - category = '' + category = "" try: site.categorytree(category, depth=None) except ValueError as ex: - msg = ("CategoryTree: Parameter 'category' must either " - "be a list of one or more categories or a string; " - "provided: '{}'".format(category)) + msg = ( + "CategoryTree: Parameter 'category' must either " + "be a list of one or more categories or a string; " + "provided: '{}'".format(category) + ) self.assertEqual(str(ex), msg) def test_category_tree_valerror_3(self): - ''' test category provided empty str throws error ''' + """test category provided empty str throws error""" site = MediaWikiOverloaded() - self.assertRaises(ValueError, - lambda: site.categorytree('Chess', depth=0)) + self.assertRaises(ValueError, lambda: site.categorytree("Chess", depth=0)) def test_cattree_error_msg_3(self): - ''' test that ValueError message when depth < 1 ''' + """test that ValueError message when depth < 1""" site = MediaWikiOverloaded() try: - site.categorytree('Chess', depth=0) + site.categorytree("Chess", depth=0) except ValueError as ex: - msg = ("CategoryTree: Parameter 'depth' must be either None " - "(for the full tree) or be greater than 0") + msg = "CategoryTree: Parameter 'depth' must be either None " "(for the full tree) or be greater than 0" self.assertEqual(str(ex), msg) def test_depth_none_1(self): - ''' test the depth when going full depth ''' + """test the depth when going full depth""" site = MediaWikiOverloaded() - cat = site.categorytree(['Chess'], depth=None) - depth = find_depth(cat['Chess']) + cat = site.categorytree(["Chess"], depth=None) + depth = find_depth(cat["Chess"]) self.assertEqual(depth, 7) def test_depth_none_2(self): - ''' test the depth when going full depth take two ''' + """test the depth when going full depth take two""" site = MediaWikiOverloaded() - cat = site.categorytree(['Ebola'], depth=None) - depth = find_depth(cat['Ebola']) + cat = site.categorytree(["Ebola"], depth=None) + depth = find_depth(cat["Ebola"]) self.assertEqual(depth, 1) def test_depth_limited(self): - ''' test the depth when going partial depth ''' + """test the depth when going partial depth""" site = MediaWikiOverloaded() - cat = site.categorytree(['Chess'], depth=5) - depth = find_depth(cat['Chess']) + cat = site.categorytree(["Chess"], depth=5) + depth = find_depth(cat["Chess"]) self.assertEqual(depth, 5) def test_depth_limited_2(self): - ''' test the depth when going partial depth take two ''' + """test the depth when going partial depth take two""" site = MediaWikiOverloaded() - cat = site.categorytree(['Chess'], depth=2) - depth = find_depth(cat['Chess']) + cat = site.categorytree(["Chess"], depth=2) + depth = find_depth(cat["Chess"]) self.assertEqual(depth, 2) def test_cattree_list_with_none(self): - ''' test the removing None or '' categories from the list ''' + """test the removing None or '' categories from the list""" site = MediaWikiOverloaded() - cat = site.categorytree(['Chess', None], depth=2) - depth = find_depth(cat['Chess']) + cat = site.categorytree(["Chess", None], depth=2) + depth = find_depth(cat["Chess"]) self.assertEqual(depth, 2) self.assertEqual(len(cat.keys()), 1) def test_badcat_tree_pageerror(self): - ''' test category provided bad category throws error ''' + """test category provided bad category throws error""" site = MediaWikiOverloaded() - self.assertRaises(PageError, lambda: site.categorytree('Chess Ebola')) + self.assertRaises(PageError, lambda: site.categorytree("Chess Ebola")) def test_badcat_error_msg(self): - ''' test that ValueError message when depth < 1 ''' + """test that ValueError message when depth < 1""" site = MediaWikiOverloaded() - res = site.responses[site.api_url]['missing_categorytree'] - category = 'Chess Ebola' + res = site.responses[site.api_url]["missing_categorytree"] + category = "Chess Ebola" try: site.categorytree(category) except PageError as ex: self.assertEqual(str(ex), res) def test_unretrievable_cat(self): - ''' test throwing the exception when cannot retrieve category tree ''' + """test throwing the exception when cannot retrieve category tree""" + def new_cattreemem(): - ''' force exception to be thrown ''' + """force exception to be thrown""" raise Exception site = MediaWikiOverloaded() site.categorymembers = new_cattreemem - self.assertRaises(MediaWikiCategoryTreeError, - lambda: site.categorytree('Chess')) + self.assertRaises(MediaWikiCategoryTreeError, lambda: site.categorytree("Chess")) def test_unretrievable_cat_msg(self): - ''' test the exception message when cannot retrieve category tree ''' + """test the exception message when cannot retrieve category tree""" + def new_cattreemem(): - ''' force exception to be thrown ''' + """force exception to be thrown""" raise Exception - category = 'Chess' - msg = ("Categorytree threw an exception for trying to get the " - "same category '{}' too many times. Please try again later " - "and perhaps use the rate limiting option.").format(category) + category = "Chess" + msg = ( + "Categorytree threw an exception for trying to get the " + "same category '{}' too many times. Please try again later " + "and perhaps use the rate limiting option." + ).format(category) site = MediaWikiOverloaded() site.categorymembers = new_cattreemem try: site.categorytree(category) except MediaWikiCategoryTreeError as ex: self.assertEqual(str(ex), msg) - self.assertEqual(ex.category, 'Chess') + self.assertEqual(ex.category, "Chess") class TestMediaWikiLogos(unittest.TestCase): - ''' Add logo tests here ''' + """Add logo tests here""" def test_logo_present(self): - ''' test when single logo or main image present ''' + """test when single logo or main image present""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('Chess') - self.assertEqual(page.logos, res['chess_logos']) + page = site.page("Chess") + self.assertEqual(page.logos, res["chess_logos"]) def test_mult_logo_present(self): - ''' test when multiple main images or logos present ''' + """test when multiple main images or logos present""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('Sony Music') - self.assertEqual(page.logos, res['sony_music_logos']) + page = site.page("Sony Music") + self.assertEqual(page.logos, res["sony_music_logos"]) def test_infobox_not_present(self): - ''' test when no infobox (based on the class name) is found ''' + """test when no infobox (based on the class name) is found""" site = MediaWikiOverloaded() - page = site.page('Antivirus Software') + page = site.page("Antivirus Software") self.assertEqual(page.logos, list()) # should be an empty list class TestMediaWikiPreview(unittest.TestCase): - ''' Preview tests ''' + """Preview tests""" def test_page_preview(self): - ''' test pulling a page preview ''' + """test pulling a page preview""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('Chess') - self.assertEqual(page.preview, res['chess_preview']) + page = site.page("Chess") + self.assertEqual(page.preview, res["chess_preview"]) class TestMediaWikiHatnotes(unittest.TestCase): - ''' Test the pulling of hatnotes from mediawiki pages ''' + """Test the pulling of hatnotes from mediawiki pages""" def test_contains_hatnotes(self): - ''' Test when hatnotes are present ''' + """Test when hatnotes are present""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('Chess') - self.assertEqual(page.hatnotes, res['chess_hatnotes']) + page = site.page("Chess") + self.assertEqual(page.hatnotes, res["chess_hatnotes"]) def test_no_hatnotes(self): - ''' Test when no hatnote is on the page ''' + """Test when no hatnote is on the page""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page_name = ('List of Battlestar Galactica (1978 TV series) and ' - 'Galactica 1980 episodes') + page_name = "List of Battlestar Galactica (1978 TV series) and " "Galactica 1980 episodes" page = site.page(page_name) - self.assertEqual(page.hatnotes, res['page_no_hatnotes']) + self.assertEqual(page.hatnotes, res["page_no_hatnotes"]) class TestMediaWikiParseSectionLinks(unittest.TestCase): - ''' Test the pulling of links from the parse section links ''' + """Test the pulling of links from the parse section links""" def test_contains_ext_links(self): - ''' Test when external links are present ''' + """Test when external links are present""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('''McDonald's''') - tmp = page.parse_section_links('External links') + page = site.page("""McDonald's""") + tmp = page.parse_section_links("External links") for i, item in enumerate(tmp): tmp[i] = list(item) - self.assertEqual(tmp, res['mcy_ds_external_links']) + self.assertEqual(tmp, res["mcy_ds_external_links"]) def test_contains_ext_links_2(self): - ''' Test when external links are present capitalization ''' + """Test when external links are present capitalization""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('''McDonald's''') - tmp = page.parse_section_links('EXTERNAL LINKS') + page = site.page("""McDonald's""") + tmp = page.parse_section_links("EXTERNAL LINKS") for i, item in enumerate(tmp): tmp[i] = list(item) - self.assertEqual(tmp, res['mcy_ds_external_links']) + self.assertEqual(tmp, res["mcy_ds_external_links"]) def test_contains_ext_links_3(self): - ''' Test when external links are present None ''' + """Test when external links are present None""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('''McDonald's''') + page = site.page("""McDonald's""") tmp = page.parse_section_links(None) for i, item in enumerate(tmp): tmp[i] = list(item) - self.assertEqual(tmp, res['mcy_ds_external_links_none']) + self.assertEqual(tmp, res["mcy_ds_external_links_none"]) def test_no_ext_links(self): - ''' Test when no external links on the page ''' + """Test when no external links on the page""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('Tropical rainforest conservation') - self.assertEqual(page.parse_section_links('External links'), None) + page = site.page("Tropical rainforest conservation") + self.assertEqual(page.parse_section_links("External links"), None) def test_song_ice_and_fire_links(self): - site = MediaWikiOverloaded('https://awoiaf.westeros.org/api.php') + site = MediaWikiOverloaded("https://awoiaf.westeros.org/api.php") res = site.responses[site.api_url] - pg = site.page('arya') + pg = site.page("arya") for section in pg.sections: links = pg.parse_section_links(section) for i, item in enumerate(links): links[i] = list(item) - self.assertEqual(links, res['arya_{}_links'.format(section)]) + self.assertEqual(links, res["arya_{}_links".format(section)]) class TestMediaWikiRegressions(unittest.TestCase): - ''' Add regression tests here for special cases ''' + """Add regression tests here for special cases""" def test_hidden_file(self): - ''' test hidden file or no url: issue #14 ''' + """test hidden file or no url: issue #14""" site = MediaWikiOverloaded() res = site.responses[site.api_url] - page = site.page('One Two Three... Infinity') + page = site.page("One Two Three... Infinity") try: page.images except KeyError: self.fail("KeyError exception on hidden file") - self.assertEqual(page.images, res['hidden_images']) + self.assertEqual(page.images, res["hidden_images"]) def test_large_cont_query(self): - ''' test known large continued query with continue='||' ''' + """test known large continued query with continue='||'""" site = MediaWikiOverloaded() - res = site.responses[site.api_url]['large_continued_query'] - page = site.page('List of named minor planets (numerical)') + res = site.responses[site.api_url]["large_continued_query"] + page = site.page("List of named minor planets (numerical)") self.assertEqual(page.links, res) def test_large_cont_query_images(self): - ''' test known large continued query with images ''' + """test known large continued query with images""" site = MediaWikiOverloaded() - res = site.responses[site.api_url]['large_continued_query_images'] - page = site.page('B8 polytope') + res = site.responses[site.api_url]["large_continued_query_images"] + page = site.page("B8 polytope") self.assertEqual(page.images, res) self.assertEqual(len(page.images), 2214) def test_infinit_loop_images(self): - ''' test known image infinite loop: issue #15 ''' + """test known image infinite loop: issue #15""" site = MediaWikiOverloaded() - res = site.responses[site.api_url]['infinite_loop_images'] - page = site.page('Rober Eryol') + res = site.responses[site.api_url]["infinite_loop_images"] + page = site.page("Rober Eryol") site._get_response = FunctionUseCounter(site._get_response) self.assertEqual(page.images, res) self.assertEqual(site._get_response.count, 13) def test_missing_title_disambig(self): - ''' test when title not present for disambiguation error ''' + """test when title not present for disambiguation error""" site = MediaWikiOverloaded() - res0 = site.responses[site.api_url]['missing_title_disamb_dets'] - res1 = site.responses[site.api_url]['missing_title_disamb_msg'] + res0 = site.responses[site.api_url]["missing_title_disamb_dets"] + res1 = site.responses[site.api_url]["missing_title_disamb_msg"] try: - page = site.page('Leaching') + page = site.page("Leaching") except DisambiguationError as ex: self.assertEqual(ex.details, res0) self.assertEqual(str(ex), res1) @@ -1669,24 +1676,24 @@ def test_missing_title_disambig(self): self.assertEqual(True, False) def test_query_continue(self): - site = MediaWikiOverloaded(url='https://practicalplants.org/w/api.php') - res = site.responses[site.api_url]['query-continue-find'] + site = MediaWikiOverloaded(url="https://practicalplants.org/w/api.php") + res = site.responses[site.api_url]["query-continue-find"] - cat_membs = site.categorymembers('Plant', results=None, subcategories=False) + cat_membs = site.categorymembers("Plant", results=None, subcategories=False) self.assertEqual(cat_membs, res) self.assertEqual(len(cat_membs), 7415) class TestMediaWikiUtilities(unittest.TestCase): - ''' some of the utility functions should be tested ''' + """some of the utility functions should be tested""" def test_relative_url(self): - ''' tests of the relative url function ''' - url1 = 'http://www.google.com' - url2 = 'ftp://somewhere.out.there' - url3 = '//cdn.somewhere.out.there/over.js' - url4 = '/wiki/Chess' - url5 = '#Chess_board' # internal to same page + """tests of the relative url function""" + url1 = "http://www.google.com" + url2 = "ftp://somewhere.out.there" + url3 = "//cdn.somewhere.out.there/over.js" + url4 = "/wiki/Chess" + url5 = "#Chess_board" # internal to same page self.assertEqual(mediawiki.utilities.is_relative_url(url1), False) self.assertEqual(mediawiki.utilities.is_relative_url(url2), False) self.assertEqual(mediawiki.utilities.is_relative_url(url3), False) From 49ee45249176627c3549be5c72764dc5c4d2f50b Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 10:36:33 -0500 Subject: [PATCH 04/17] attempt to fix the package testing --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index ccf1af5..18986b4 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -23,6 +23,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest pytest-cov + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi python -m pip install -e . - name: Lint with flake8 run: | From fe90add61d6674068203ade9aa299f826f543528 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 10:48:57 -0500 Subject: [PATCH 05/17] attempt to fix the package testing again --- .github/workflows/python-package.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 18986b4..87c31ca 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -24,7 +24,6 @@ jobs: python -m pip install --upgrade pip python -m pip install flake8 pytest pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - python -m pip install -e . - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names From ecf3671c2064b4906eac4aa6ef6807ee64bfbb06 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 10:52:51 -0500 Subject: [PATCH 06/17] typing of ordereddict --- mediawiki/mediawikipage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 64dd249..300c09f 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -480,7 +480,7 @@ def sections(self) -> List[str]: return self._sections @property - def table_of_contents(self) -> OrderedDict[str, Any]: + def table_of_contents(self) -> Dict[str, Any]: """OrderedDict: Dictionary of sections and sub-sections Note: From 01fb86abd0b9882d1bc6fc8a59ac12b110be1188 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 10:55:08 -0500 Subject: [PATCH 07/17] typing of dict error --- mediawiki/mediawikipage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 300c09f..3c63629 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -809,7 +809,7 @@ def _list_to_dict(_dict, path, sec): self._table_of_contents = res - def __title_query_param(self) -> Dict[str, str | int]: + def __title_query_param(self) -> Dict[str, Any]: """util function to determine which parameter method to use""" if getattr(self, "title", None) is not None: return {"titles": self.title} From aca71cce7202271a0c1d7dc5fa3a2f6fc5442412 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 11:01:01 -0500 Subject: [PATCH 08/17] remove | typing for older pythons --- mediawiki/mediawiki.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 315c851..f126c46 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -8,7 +8,7 @@ from datetime import datetime, timedelta from decimal import Decimal, DecimalException from json import JSONDecodeError -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union import requests import requests.exceptions as rex @@ -233,12 +233,12 @@ def timeout(self, timeout: float): self._timeout = float(timeout) # allow the exception to be raised @property - def verify_ssl(self) -> bool: + def verify_ssl(self) -> Union[bool, str]: """bool | str: Verify SSL when using requests or path to cert file""" return self._verify_ssl @verify_ssl.setter - def verify_ssl(self, verify_ssl: bool | str): + def verify_ssl(self, verify_ssl: Union[bool, str]): """Set request verify SSL parameter; defaults to True if issue""" self._verify_ssl = True if isinstance(verify_ssl, (bool, str)): @@ -467,7 +467,7 @@ def logged_in(self) -> bool: """bool: Returns if logged into the MediaWiki site""" return self._is_logged_in - def random(self, pages: int = 1) -> str | List[str]: + def random(self, pages: int = 1) -> Union[str, List[str]]: """Request a random page title or list of random titles Args: @@ -510,7 +510,9 @@ def allpages(self, query: str = "", results: int = 10) -> List[str]: return titles @memoize - def search(self, query: str, results: int = 10, suggestion: bool = False) -> List[str] | Tuple[List[str], str]: + def search( + self, query: str, results: int = 10, suggestion: bool = False + ) -> Union[List[str], Tuple[List[str], str]]: """Search for similar titles Args: @@ -570,8 +572,8 @@ def suggest(self, query: str) -> Optional[str]: @memoize def geosearch( self, - latitude: Decimal | float | None = None, - longitude: Decimal | float | None = None, + latitude: Union[Decimal, float, None] = None, + longitude: Union[Decimal, float, None] = None, radius: int = 1000, title: str = None, auto_suggest: bool = True, @@ -726,7 +728,7 @@ def summary(self, title: str, sentences: int = 0, chars: int = 0, auto_suggest: @memoize def categorymembers( self, category: str, results: int = 10, subcategories: bool = True - ) -> List[str] | Tuple[List[str], List[str]]: + ) -> Union[List[str], Tuple[List[str], List[str]]]: """Get information about a category: pages and subcategories Args: From b6311b6749cc74661a8fc6d53d4604ea3b4181e8 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 11:15:51 -0500 Subject: [PATCH 09/17] pyupgrade --- mediawiki/__init__.py | 4 ++-- mediawiki/exceptions.py | 38 +++++++++++++++++++------------------- mediawiki/mediawiki.py | 20 ++++++++++---------- mediawiki/mediawikipage.py | 10 +++++----- mediawiki/utilities.py | 2 +- pyproject.toml | 4 ++-- setup.cfg | 7 ++++++- 7 files changed, 45 insertions(+), 40 deletions(-) diff --git a/mediawiki/__init__.py b/mediawiki/__init__.py index 8ba98d8..e34bc36 100644 --- a/mediawiki/__init__.py +++ b/mediawiki/__init__.py @@ -22,8 +22,8 @@ __version__ = VERSION __credits__ = ["Jonathan Goldsmith"] __url__ = URL -__bugtrack_url__ = "{0}/issues".format(__url__) -__download_url__ = "{0}/tarball/v{1}".format(__url__, __version__) +__bugtrack_url__ = f"{__url__}/issues" +__download_url__ = f"{__url__}/tarball/v{__version__}" __all__ = [ "MediaWiki", diff --git a/mediawiki/exceptions.py b/mediawiki/exceptions.py index 8525e74..c6d476b 100644 --- a/mediawiki/exceptions.py +++ b/mediawiki/exceptions.py @@ -20,7 +20,7 @@ class MediaWikiBaseException(Exception): def __init__(self, message: str): self._message = message - super(MediaWikiBaseException, self).__init__(self.message) + super().__init__(self.message) def __unicode__(self): return self.message @@ -42,8 +42,8 @@ class MediaWikiException(MediaWikiBaseException): def __init__(self, error: str): self._error = error - msg = ('An unknown error occurred: "{0}". Please report it on GitHub!').format(self.error) - super(MediaWikiException, self).__init__(msg) + msg = ('An unknown error occurred: "{}". Please report it on GitHub!').format(self.error) + super().__init__(msg) @property def error(self) -> str: @@ -61,14 +61,14 @@ class PageError(MediaWikiBaseException): def __init__(self, title: Optional[str] = None, pageid: Optional[int] = None): if title: self._title = title - msg = ('"{0}" does not match any pages. Try another query!').format(self.title) + msg = ('"{}" does not match any pages. Try another query!').format(self.title) elif pageid: self._pageid = pageid - msg = ('Page id "{0}" does not match any pages. Try another id!').format(self.pageid) + msg = ('Page id "{}" does not match any pages. Try another id!').format(self.pageid) else: self._title = "" - msg = ('"{0}" does not match any pages. Try another query!').format(self.title) - super(PageError, self).__init__(msg) + msg = ('"{}" does not match any pages. Try another query!').format(self.title) + super().__init__(msg) @property def title(self) -> str: @@ -94,10 +94,10 @@ class RedirectError(MediaWikiBaseException): def __init__(self, title: str): self._title = title msg = ( - '"{0}" resulted in a redirect. Set the redirect property to True ' "to allow automatic redirects." + '"{}" resulted in a redirect. Set the redirect property to True ' "to allow automatic redirects." ).format(self.title) - super(RedirectError, self).__init__(msg) + super().__init__(msg) @property def title(self) -> str: @@ -124,8 +124,8 @@ def __init__(self, title: str, may_refer_to: List[str], url: str, details: Optio self._options = sorted(may_refer_to) self._details = details self._url = url - msg = ('\n"{0}" may refer to: \n ' "{1}").format(self.title, "\n ".join(self.options)) - super(DisambiguationError, self).__init__(msg) + msg = ('\n"{}" may refer to: \n ' "{}").format(self.title, "\n ".join(self.options)) + super().__init__(msg) @property def url(self) -> str: @@ -162,11 +162,11 @@ class HTTPTimeoutError(MediaWikiBaseException): def __init__(self, query: str): self._query = query msg = ( - 'Searching for "{0}" resulted in a timeout. ' + 'Searching for "{}" resulted in a timeout. ' "Try again in a few seconds, and ensure you have rate limiting " "set to True." ).format(self.query) - super(HTTPTimeoutError, self).__init__(msg) + super().__init__(msg) @property def query(self) -> str: @@ -182,8 +182,8 @@ class MediaWikiAPIURLError(MediaWikiBaseException): def __init__(self, api_url: str): self._api_url = api_url - msg = "{0} is not a valid MediaWiki API URL".format(self.api_url) - super(MediaWikiAPIURLError, self).__init__(msg) + msg = f"{self.api_url} is not a valid MediaWiki API URL" + super().__init__(msg) @property def api_url(self) -> str: @@ -201,10 +201,10 @@ class MediaWikiGeoCoordError(MediaWikiBaseException): def __init__(self, error: str): self._error = error msg = ( - "GeoData search resulted in the following error: {0}" + "GeoData search resulted in the following error: {}" " - Please use valid coordinates or a proper page title." ).format(self.error) - super(MediaWikiGeoCoordError, self).__init__(msg) + super().__init__(msg) @property def error(self) -> str: @@ -227,7 +227,7 @@ def __init__(self, category: str): "and perhaps use the rate limiting " "option." ).format(self._category) - super(MediaWikiCategoryTreeError, self).__init__(msg) + super().__init__(msg) @property def category(self) -> str: @@ -244,7 +244,7 @@ class MediaWikiLoginError(MediaWikiBaseException): def __init__(self, error: str): self._error = error - super(MediaWikiLoginError, self).__init__(error) + super().__init__(error) @property def error(self) -> str: diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index f126c46..96a13a3 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -29,7 +29,7 @@ VERSION: str = "0.7.3" -class MediaWiki(object): +class MediaWiki: """MediaWiki API Wrapper Instance Args: @@ -96,7 +96,7 @@ def __init__( self.timeout = timeout # requests library parameters self._session = None - self._user_agent = ("python-mediawiki/VERSION-{0}" "/({1})/BOT").format(VERSION, URL) + self._user_agent = ("python-mediawiki/VERSION-{}" "/({})/BOT").format(VERSION, URL) self._proxies = None self._verify_ssl = None self.verify_ssl = verify_ssl @@ -263,7 +263,7 @@ def language(self, lang: str): return url = self._api_url - tmp = url.replace("/{0}.".format(self._lang), "/{0}.".format(lang)) + tmp = url.replace(f"/{self._lang}.", f"/{lang}.") self._api_url = tmp self._lang = lang @@ -371,7 +371,7 @@ def login(self, username: str, password: str, strict: bool = True) -> bool: self._is_logged_in = False reason = res["login"]["reason"] if strict: - msg = "MediaWiki login failure: {}".format(reason) + msg = f"MediaWiki login failure: {reason}" raise MediaWikiLoginError(msg) return False @@ -625,7 +625,7 @@ def test_lat_long(val): else: lat = test_lat_long(latitude) lon = test_lat_long(longitude) - params["gscoord"] = "{0}|{1}".format(lat, lon) + params["gscoord"] = f"{lat}|{lon}" raw_results = self.wiki_request(params) @@ -747,7 +747,7 @@ def categorymembers( "cmprop": "ids|title|type", "cmtype": ("page|subcat|file" if subcategories else "page|file"), "cmlimit": (min(results, max_pull) if results is not None else max_pull), - "cmtitle": "{0}:{1}".format(self.category_prefix, category), + "cmtitle": f"{self.category_prefix}:{category}", } pages = list() subcats = list() @@ -916,9 +916,9 @@ def _get_site_info(self) -> List[str]: if tmp.startswith("http://") or tmp.startswith("https://"): self._base_url = tmp elif gen["base"].startswith("https:"): - self._base_url = "https:{}".format(tmp) + self._base_url = f"https:{tmp}" else: - self._base_url = "http:{}".format(tmp) + self._base_url = f"http:{tmp}" self._extensions = [ext["name"] for ext in query["extensions"]] self._extensions = sorted(list(set(self._extensions))) @@ -978,13 +978,13 @@ def __cat_tree_rec(self, cat, depth, tree, level, categories, links): if tries > 10: raise MediaWikiCategoryTreeError(cat) try: - pag = self.page("{0}:{1}".format(self.category_prefix, cat)) + pag = self.page(f"{self.category_prefix}:{cat}") categories[cat] = pag parent_cats = categories[cat].categories links[cat] = self.categorymembers(cat, results=None, subcategories=True) break except PageError: - raise PageError("{0}:{1}".format(self.category_prefix, cat)) + raise PageError(f"{self.category_prefix}:{cat}") except KeyboardInterrupt: raise except Exception: diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 3c63629..b21bdc1 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -22,7 +22,7 @@ from mediawiki.utilities import is_relative_url, str_or_unicode -class MediaWikiPage(object): +class MediaWikiPage: """MediaWiki Page Instance Args: @@ -135,7 +135,7 @@ def __repr__(self): def __unicode__(self): """python 2.7 unicode""" - return """""".format(self.title) + return f"""""" def __str__(self): """python > 3 unicode python 2.7 byte str""" @@ -516,7 +516,7 @@ def section(self, section_title: str) -> Optional[str]: except IndexError: pass else: - section = "== {0} ==".format(section_title) + section = f"== {section_title} ==" try: content = self.content index = content.index(section) + len(section) @@ -755,9 +755,9 @@ def __parse_link_info(self, link: str) -> Tuple[str, str]: txt = link.string or href is_rel = is_relative_url(href) if is_rel is True: - tmp = "{0}{1}".format(self.mediawiki.base_url, href) + tmp = f"{self.mediawiki.base_url}{href}" elif is_rel is None: - tmp = "{0}{1}".format(self.url, href) + tmp = f"{self.url}{href}" else: tmp = href return txt, tmp diff --git a/mediawiki/utilities.py b/mediawiki/utilities.py index 123a8f5..203ee71 100644 --- a/mediawiki/utilities.py +++ b/mediawiki/utilities.py @@ -49,7 +49,7 @@ def wrapper(*args, **kwargs): tmp = list() tmp.extend(args[1:]) for k in sorted(defaults.keys()): - tmp.append("({0}: {1})".format(k, defaults[k])) + tmp.append(f"({k}: {defaults[k]})") tmp = [str(x) for x in tmp] key = " - ".join(tmp) diff --git a/pyproject.toml b/pyproject.toml index b4a1d5a..b1289c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "wheel", "setuptools_scm>=6.2", "requests>=2.0.0,<3.0.0", - "beautifulsoup4" + "beautifulsoup4", ] build-backend = "setuptools.build_meta" @@ -13,5 +13,5 @@ profile = "black" [tool.black] line-length = 120 -target-version = ['py36'] +target-version = ['py38'] include = '\.pyi?$' diff --git a/setup.cfg b/setup.cfg index 4123931..08b9b32 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,11 +25,12 @@ classifiers = Operating System :: OS Independent Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 [options] zip_safe = False @@ -54,3 +55,7 @@ max-line-length=120 [pycodestyle] max-line-length = 120 ignore = E203,W503 + +[flake8] +max-line-length = 120 +ignore = E203,W503 \ No newline at end of file From da5922d640b67b02f600c8aa746766fdaed58153 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 11:19:51 -0500 Subject: [PATCH 10/17] add test for python 3.12; --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 87c31ca..bdea5e1 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 From b882ffead82382764b42f3aec6e2e27067eb23d3 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 13:04:12 -0500 Subject: [PATCH 11/17] more pylint --- mediawiki/exceptions.py | 31 +++++++++--------- mediawiki/mediawiki.py | 36 +++++++++++---------- mediawiki/mediawikipage.py | 65 ++++++++++++++++++-------------------- mediawiki/utilities.py | 8 ++--- 4 files changed, 69 insertions(+), 71 deletions(-) diff --git a/mediawiki/exceptions.py b/mediawiki/exceptions.py index c6d476b..a957b45 100644 --- a/mediawiki/exceptions.py +++ b/mediawiki/exceptions.py @@ -42,7 +42,7 @@ class MediaWikiException(MediaWikiBaseException): def __init__(self, error: str): self._error = error - msg = ('An unknown error occurred: "{}". Please report it on GitHub!').format(self.error) + msg = f'An unknown error occurred: "{self.error}". Please report it on GitHub!' super().__init__(msg) @property @@ -61,13 +61,13 @@ class PageError(MediaWikiBaseException): def __init__(self, title: Optional[str] = None, pageid: Optional[int] = None): if title: self._title = title - msg = ('"{}" does not match any pages. Try another query!').format(self.title) + msg = f'"{self.title}" does not match any pages. Try another query!' elif pageid: self._pageid = pageid - msg = ('Page id "{}" does not match any pages. Try another id!').format(self.pageid) + msg = f'Page id "{self.pageid}" does not match any pages. Try another id!' else: self._title = "" - msg = ('"{}" does not match any pages. Try another query!').format(self.title) + msg = f'"{self.title}" does not match any pages. Try another query!' super().__init__(msg) @property @@ -94,8 +94,8 @@ class RedirectError(MediaWikiBaseException): def __init__(self, title: str): self._title = title msg = ( - '"{}" resulted in a redirect. Set the redirect property to True ' "to allow automatic redirects." - ).format(self.title) + f'"{self.title}" resulted in a redirect. Set the redirect property to True ' "to allow automatic redirects." + ) super().__init__(msg) @@ -124,7 +124,8 @@ def __init__(self, title: str, may_refer_to: List[str], url: str, details: Optio self._options = sorted(may_refer_to) self._details = details self._url = url - msg = ('\n"{}" may refer to: \n ' "{}").format(self.title, "\n ".join(self.options)) + options_str = "\n ".join(self.options) + msg = f'\n"{self.title}" may refer to: \n {options_str}' super().__init__(msg) @property @@ -162,10 +163,10 @@ class HTTPTimeoutError(MediaWikiBaseException): def __init__(self, query: str): self._query = query msg = ( - 'Searching for "{}" resulted in a timeout. ' + f'Searching for "{self.query}" resulted in a timeout. ' "Try again in a few seconds, and ensure you have rate limiting " "set to True." - ).format(self.query) + ) super().__init__(msg) @property @@ -201,9 +202,9 @@ class MediaWikiGeoCoordError(MediaWikiBaseException): def __init__(self, error: str): self._error = error msg = ( - "GeoData search resulted in the following error: {}" + f"GeoData search resulted in the following error: {self.error}" " - Please use valid coordinates or a proper page title." - ).format(self.error) + ) super().__init__(msg) @property @@ -222,11 +223,9 @@ class MediaWikiCategoryTreeError(MediaWikiBaseException): def __init__(self, category: str): self._category = category msg = ( - "Categorytree threw an exception for trying to get the " - "same category '{}' too many times. Please try again later " - "and perhaps use the rate limiting " - "option." - ).format(self._category) + f"Categorytree threw an exception for trying to get the same category '{self._category}' " + "too many times. Please try again later and perhaps use the rate limiting option." + ) super().__init__(msg) @property diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 96a13a3..983a754 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -96,7 +96,7 @@ def __init__( self.timeout = timeout # requests library parameters self._session = None - self._user_agent = ("python-mediawiki/VERSION-{}" "/({})/BOT").format(VERSION, URL) + self._user_agent = f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT" self._proxies = None self._verify_ssl = None self.verify_ssl = verify_ssl @@ -117,7 +117,7 @@ def __init__( self.__available_languages = None # for memoized results - self._cache = dict() + self._cache = {} self._refresh_interval = None self._use_cache = True @@ -665,7 +665,7 @@ def opensearch(self, query: str, results: int = 10, redirect: bool = True) -> Li self._check_error_response(results, query) - res = list() + res = [] for i, item in enumerate(results[1]): res.append((item, results[2][i], results[3][i])) return res @@ -749,11 +749,11 @@ def categorymembers( "cmlimit": (min(results, max_pull) if results is not None else max_pull), "cmtitle": f"{self.category_prefix}:{category}", } - pages = list() - subcats = list() + pages = [] + subcats = [] returned_results = 0 finished = False - last_cont = dict() + last_cont = {} while not finished: params = search_params.copy() params.update(last_cont) @@ -823,9 +823,9 @@ def categorytree(self, category: str, depth: int = 5) -> Dict[str, Any]: self.__category_parameter_verification(cats, depth, category) - results = dict() - categories = dict() - links = dict() + results = {} + categories = {} + links = {} for cat in [x for x in cats if x]: self.__cat_tree_rec(cat, depth, results, 0, categories, links) @@ -955,22 +955,24 @@ def __category_parameter_verification(cats: str, depth: int, category: str): msg = ( "CategoryTree: Parameter 'category' must either " "be a list of one or more categories or a string; " - "provided: '{}'".format(category) + f"provided: '{category}'" ) raise ValueError(msg) if depth is not None and depth < 1: - msg = "CategoryTree: Parameter 'depth' must be either None " "(for the full tree) or be greater than 0" + msg = "CategoryTree: Parameter 'depth' must be either None (for the full tree) or be greater than 0" raise ValueError(msg) - def __cat_tree_rec(self, cat, depth, tree, level, categories, links): + def __cat_tree_rec( + self, cat: str, depth: int, tree: Dict[str, Any], level: int, categories: List[str], links: List[str] + ): """recursive function to build out the tree""" - tree[cat] = dict() + tree[cat] = {} tree[cat]["depth"] = level - tree[cat]["sub-categories"] = dict() - tree[cat]["links"] = list() - tree[cat]["parent-categories"] = list() - parent_cats = list() + tree[cat]["sub-categories"] = {} + tree[cat]["links"] = [] + tree[cat]["parent-categories"] = [] + parent_cats = [] if cat not in categories: tries = 0 diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index b21bdc1..b3a2e43 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -7,7 +7,7 @@ import re from collections import OrderedDict from decimal import Decimal -from typing import Any, Dict, Generator, List, Optional, Tuple +from typing import Any, Dict, Iterator, List, Optional, Tuple from bs4 import BeautifulSoup, Tag @@ -252,7 +252,7 @@ def images(self) -> List[str]: Note: Not settable""" if self._images is None: - self._images = list() + self._images = [] params = { "generator": "images", "gimlimit": "max", @@ -276,7 +276,7 @@ def logos(self) -> List[str]: Note: This is a parsing operation and not part of the standard API""" if self._logos is None: - self._logos = list() + self._logos = [] # Cache the results of parsing the html, so that multiple calls happen much faster if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") @@ -298,14 +298,14 @@ def hatnotes(self) -> List[str]: Note: This is a parsing operation and not part of the standard API""" if self._hatnotes is None: - self._hatnotes = list() + self._hatnotes = [] # Cache the results of parsing the html, so that multiple calls happen much faster if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") notes = self._soup.find_all("div", class_="hatnote") if notes is not None: for note in notes: - tmp = list() + tmp = [] for child in note.children: if hasattr(child, "text"): tmp.append(child.text) @@ -322,7 +322,7 @@ def references(self) -> List[str]: Note May include external links within page that are not technically cited anywhere""" if self._references is None: - self._references = list() + self._references = [] self.__pull_combined_properties() return self._references @@ -333,7 +333,7 @@ def categories(self) -> List[str]: Note: Not settable""" if self._categories is None: - self._categories = list() + self._categories = [] self.__pull_combined_properties() return self._categories @@ -357,7 +357,7 @@ def links(self) -> List[str]: Note: Not settable""" if self._links is None: - self._links = list() + self._links = [] self.__pull_combined_properties() return self._links @@ -368,7 +368,7 @@ def redirects(self) -> List[str]: Note: Not settable""" if self._redirects is None: - self._redirects = list() + self._redirects = [] self.__pull_combined_properties() return self._redirects @@ -379,7 +379,7 @@ def backlinks(self) -> List[str]: Note: Not settable""" if self._backlinks is None: - self._backlinks = list() + self._backlinks = [] params = { "action": "query", "list": "backlinks", @@ -406,7 +406,7 @@ def langlinks(self) -> Dict[str, str]: params = {"prop": "langlinks", "cllimit": "max"} query_result = self._continued_query(params) - langlinks = dict() + langlinks = {} for lang_info in query_result: langlinks[lang_info["lang"]] = lang_info["*"] self._langlinks = langlinks @@ -430,7 +430,7 @@ def preview(self) -> Dict[str, str]: "titles": self.title, } raw = self.mediawiki.wiki_request(params) - self._preview = raw.get("query", dict()).get("pages", list())[0] + self._preview = raw.get("query", {}).get("pages", [])[0] return self._preview @property @@ -626,13 +626,13 @@ def _raise_disambiguation_error(self, page: str, pageid: int): html = request["query"]["pages"][pageid]["revisions"][0]["*"] lis = BeautifulSoup(html, "html.parser").find_all("li") - filtered_lis = [li for li in lis if "tocsection" not in "".join(li.get("class", list()))] + filtered_lis = [li for li in lis if "tocsection" not in "".join(li.get("class", []))] may_refer_to = [li.a.get_text() for li in filtered_lis if li.a] - disambiguation = list() + disambiguation = [] for lis_item in filtered_lis: item = lis_item.find_all("a") - one_disambiguation = dict() + one_disambiguation = {} one_disambiguation["description"] = lis_item.text if item and item[0].has_attr("title"): one_disambiguation["title"] = item[0]["title"] @@ -675,12 +675,12 @@ def _handle_redirect(self, redirect: bool, preload: bool, query: str, page: Dict else: raise RedirectError(getattr(self, "title", page["title"])) - def _continued_query(self, query_params: Dict[str, Any], key: str = "pages"): + def _continued_query(self, query_params: Dict[str, Any], key: str = "pages") -> Iterator[Dict[Any, Any]]: """Based on https://www.mediawiki.org/wiki/API:Query#Continuing_queries""" query_params.update(self.__title_query_param()) - last_cont = dict() + last_cont = {} prop = query_params.get("prop") while True: @@ -694,23 +694,20 @@ def _continued_query(self, query_params: Dict[str, Any], key: str = "pages"): pages = request["query"][key] if "generator" in query_params: - for datum in pages.values(): - yield datum + yield from pages.values() elif isinstance(pages, list): - for datum in list(enumerate(pages)): - yield datum[1] + yield from [v for x, v in enumerate(pages)] else: - for datum in pages[self.pageid].get(prop, list()): - yield datum + yield from pages[self.pageid].get(prop, []) if "continue" not in request or request["continue"] == last_cont: break last_cont = request["continue"] - def _parse_section_links(self, id_tag: str) -> List[str]: + def _parse_section_links(self, id_tag: Optional[str]) -> List[str]: """given a section id, parse the links in the unordered list""" - all_links = list() + all_links = [] if id_tag is None: root = self._soup.find("div", {"class": "mw-parser-output"}) @@ -771,12 +768,12 @@ def _list_to_dict(_dict, path, sec): tmp = tmp[elm] tmp[sec] = OrderedDict() - self._sections = list() + self._sections = [] section_regexp = r"\n==* .* ==*\n" # '== {STUFF_NOT_\n} ==' found_obj = re.findall(section_regexp, self.content) res = OrderedDict() - path = list() + path = [] last_depth = 0 for obj in found_obj: depth = obj.count("=") / 2 # this gets us to the single side... @@ -821,7 +818,7 @@ def __pull_combined_properties(self): query_params = { "titles": self.title, "prop": "extracts|redirects|links|coordinates|categories|extlinks", - "continue": dict(), + "continue": {}, # summary "explaintext": "", "exintro": "", # full first section for the summary! @@ -840,8 +837,8 @@ def __pull_combined_properties(self): "ellimit": "max", } - last_cont = dict() - results = dict() + last_cont = {} + results = {} idx = 0 while True: params = query_params.copy() @@ -878,14 +875,14 @@ def __pull_combined_properties(self): last_cont = new_cont # redirects - tmp = [link["title"] for link in results.get("redirects", list())] + tmp = [link["title"] for link in results.get("redirects", [])] self._redirects = sorted(tmp) # summary self._summary = results.get("extract") # links - tmp = [link["title"] for link in results.get("links", list())] + tmp = [link["title"] for link in results.get("links", [])] self._links = sorted(tmp) # categories @@ -896,7 +893,7 @@ def _get_cat(val): return tmp[len(self.mediawiki.category_prefix) + 1 :] return tmp - tmp = [_get_cat(link) for link in results.get("categories", list())] + tmp = [_get_cat(link) for link in results.get("categories", [])] self._categories = sorted(tmp) # coordinates @@ -907,5 +904,5 @@ def _get_cat(val): ) # references - tmp = [link["*"] for link in results.get("extlinks", list())] + tmp = [link["*"] for link in results.get("extlinks", [])] self._references = sorted(tmp) diff --git a/mediawiki/utilities.py b/mediawiki/utilities.py index 203ee71..c39f6f1 100644 --- a/mediawiki/utilities.py +++ b/mediawiki/utilities.py @@ -10,7 +10,7 @@ def parse_all_arguments(func: Callable) -> Dict[str, Any]: """determine all positional and named arguments as a dict""" - args = dict() + args = {} func_args = inspect.signature(func) for itm in list(func_args.parameters)[1:]: @@ -38,15 +38,15 @@ def wrapper(*args, **kwargs): return func(*args, **kwargs) if func.__name__ not in cache: - cache[func.__name__] = dict() + cache[func.__name__] = {} if "defaults" not in cache: - cache["defaults"] = dict() + cache["defaults"] = {} cache["defaults"][func.__name__] = parse_all_arguments(func) # build a key; should also consist of the default values defaults = cache["defaults"][func.__name__].copy() for key, val in kwargs.items(): defaults[key] = val - tmp = list() + tmp = [] tmp.extend(args[1:]) for k in sorted(defaults.keys()): tmp.append(f"({k}: {defaults[k]})") From fa050861068d3da9939d35fc925eb871bb73b9e1 Mon Sep 17 00:00:00 2001 From: barrust Date: Thu, 4 Jan 2024 13:31:00 -0500 Subject: [PATCH 12/17] additional pylint --- .pylintrc | 3 +-- mediawiki/mediawiki.py | 20 ++++++++++---------- mediawiki/mediawikipage.py | 14 +++++++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/.pylintrc b/.pylintrc index 915243e..028fad1 100644 --- a/.pylintrc +++ b/.pylintrc @@ -598,8 +598,7 @@ variable-naming-style=snake_case [EXCEPTIONS] # Exceptions that will emit a warning when caught. -overgeneral-exceptions=BaseException, - Exception +overgeneral-exceptions=builtins.BaseException, builtins.Exception [LOGGING] diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 983a754..966d783 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -128,8 +128,8 @@ def __init__( try: self._get_site_info() - except MediaWikiException: - raise MediaWikiAPIURLError(url) + except MediaWikiException as exc: + raise MediaWikiAPIURLError(url) from exc # non-settable properties @property @@ -406,11 +406,11 @@ def set_api_url( self._get_site_info() self.__supported_languages = None # reset this self.__available_languages = None # reset this - except (rex.ConnectTimeout, MediaWikiException): + except (rex.ConnectTimeout, MediaWikiException) as exc: # reset api url and lang in the event that the exception was caught self._api_url = old_api_url self._lang = old_lang - raise MediaWikiAPIURLError(api_url) + raise MediaWikiAPIURLError(api_url) from exc self.clear_memoized() def _reset_session(self): @@ -609,8 +609,8 @@ def test_lat_long(val): ) try: return Decimal(val) - except (DecimalException, TypeError): - raise ValueError(error) + except (DecimalException, TypeError) as exc: + raise ValueError(error) from exc return val # end local function @@ -985,10 +985,10 @@ def __cat_tree_rec( parent_cats = categories[cat].categories links[cat] = self.categorymembers(cat, results=None, subcategories=True) break - except PageError: - raise PageError(f"{self.category_prefix}:{cat}") - except KeyboardInterrupt: - raise + except PageError as exc: + raise PageError(f"{self.category_prefix}:{cat}") from exc + except KeyboardInterrupt as exc: + raise exc except Exception: tries = tries + 1 time.sleep(1) diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index b3a2e43..4fb64e4 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -37,7 +37,8 @@ class MediaWikiPage: Raises: :py:func:`mediawiki.exceptions.DisambiguationError`: if page provided is a disambiguation page Raises: - :py:func:`mediawiki.exceptions.RedirectError`: if redirect is **False** and the pageid or title provided redirects to another page + :py:func:`mediawiki.exceptions.RedirectError`: if redirect is **False** and the pageid or title \ + provided redirects to another page Warning: This should never need to be used directly! Please use :func:`mediawiki.MediaWiki.page`""" @@ -394,7 +395,8 @@ def backlinks(self) -> List[str]: @property def langlinks(self) -> Dict[str, str]: - """dict: Names of the page in other languages for which page is where the key is the language code and the page name is the name of the page in that language. + """dict: Names of the page in other languages for which page is where the key is the language code + and the page name is the name of the page in that language. Note: Not settable @@ -502,7 +504,8 @@ def section(self, section_title: str) -> Optional[str]: Note: Use **None** if the header section is desired Note: - Returns **None** if section title is not found; only text between title and next section or sub-section title is returned + Returns **None** if section title is not found; only text between title and next \ + section or sub-section title is returned Note: Side effect is to also pull the content which can be slow Note: @@ -543,7 +546,8 @@ def parse_section_links(self, section_title: str) -> List[Tuple[str, str]]: """Parse all links within a section Args: - section_title (str): Name of the section to pull or, if None is provided, the links between the main heading and the first section + section_title (str): Name of the section to pull or, if None is provided, \ + the links between the main heading and the first section Returns: list: List of (title, url) tuples Note: @@ -725,7 +729,7 @@ def _parse_section_links(self, id_tag: Optional[str]) -> List[str]: continue if node.get("role", "") == "navigation": continue - elif "infobox" in node.get("class", []): + if "infobox" in node.get("class", []): continue # If the classname contains "toc", the element is a table of contents. From 941c1d31a68b72d515e95833f1836a432a246618 Mon Sep 17 00:00:00 2001 From: barrust Date: Fri, 5 Jan 2024 19:36:10 -0500 Subject: [PATCH 13/17] additional typing work --- mediawiki/mediawiki.py | 78 +++++++++++++++++----------------- mediawiki/mediawikipage.py | 85 +++++++++++++++++++++++--------------- tests/mediawiki_test.py | 5 +-- 3 files changed, 94 insertions(+), 74 deletions(-) diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 966d783..68473a9 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -84,41 +84,41 @@ def __init__( username: Optional[str] = None, password: Optional[str] = None, proxies: Optional[Dict] = None, - verify_ssl: bool = True, + verify_ssl: Union[bool, str] = True, ): """Init Function""" self._version = VERSION self._lang = lang.lower() self._api_url = url.format(lang=self._lang) - self._cat_prefix = None + self._cat_prefix = "" self.category_prefix = cat_prefix - self._timeout = None + self._timeout = 15.0 self.timeout = timeout # requests library parameters - self._session = None + self._session: Optional[requests.Session] = None self._user_agent = f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT" - self._proxies = None - self._verify_ssl = None + self._proxies: Optional[Dict] = None + self._verify_ssl: Union[bool, str] = True self.verify_ssl = verify_ssl # set libary parameters if user_agent is not None: self.user_agent = user_agent self.proxies = proxies # this will call self._reset_session() - self._rate_limit = None + self._rate_limit = False self.rate_limit = bool(rate_limit) - self._rate_limit_last_call = None + self._rate_limit_last_call: Optional[datetime] = None self._min_wait = rate_limit_wait self._extensions = None self._api_version = None self._api_version_str = None self._base_url = None - self.__supported_languages = None - self.__available_languages = None + self.__supported_languages: Optional[Dict[str, str]] = None + self.__available_languages: Optional[Dict[str, bool]] = None # for memoized results - self._cache = {} - self._refresh_interval = None + self._cache: Dict = {} + self._refresh_interval: Optional[int] = None self._use_cache = True # for login information @@ -141,7 +141,7 @@ def version(self) -> str: return self._version @property - def api_version(self) -> str: + def api_version(self) -> Optional[str]: """str: API Version of the MediaWiki site Note: @@ -154,7 +154,7 @@ def base_url(self) -> str: Note: Not settable""" - return self._base_url + return self._base_url if self._base_url else "" @property def extensions(self) -> List[str]: @@ -162,7 +162,7 @@ def extensions(self) -> List[str]: Note: Not settable""" - return self._extensions + return self._extensions if self._extensions else [] # settable properties @property @@ -185,7 +185,7 @@ def proxies(self) -> Optional[Dict]: @proxies.setter def proxies(self, proxies: Optional[Dict]): """Turn on, off, or set proxy use through the Requests library""" - if proxies and isinstance(proxies, dict): + if isinstance(proxies, dict): self._proxies = proxies else: self._proxies = None @@ -317,7 +317,7 @@ def memoized(self) -> Dict[Any, Any]: return self._cache @property - def refresh_interval(self) -> int: + def refresh_interval(self) -> Optional[int]: """int: The interval at which the memoize cache is to be refresh""" return self._refresh_interval @@ -512,7 +512,7 @@ def allpages(self, query: str = "", results: int = 10) -> List[str]: @memoize def search( self, query: str, results: int = 10, suggestion: bool = False - ) -> Union[List[str], Tuple[List[str], str]]: + ) -> Union[List[str], Tuple[List[str], Optional[str]]]: """Search for similar titles Args: @@ -575,9 +575,9 @@ def geosearch( latitude: Union[Decimal, float, None] = None, longitude: Union[Decimal, float, None] = None, radius: int = 1000, - title: str = None, + title: Optional[str] = None, auto_suggest: bool = True, - results: str = 10, + results: int = 10, ) -> List[str]: """Search for pages that relate to the provided geocoords or near the page @@ -629,12 +629,12 @@ def test_lat_long(val): raw_results = self.wiki_request(params) - self._check_error_response(raw_results, title) + self._check_error_response(raw_results, title if title else "Page Title Not Provided") return [d["title"] for d in raw_results["query"]["geosearch"]] @memoize - def opensearch(self, query: str, results: int = 10, redirect: bool = True) -> List[str]: + def opensearch(self, query: str, results: int = 10, redirect: bool = True) -> List[Tuple[str, str, str]]: """Execute a MediaWiki opensearch request, similar to search box suggestions and conforming to the OpenSearch specification @@ -661,13 +661,13 @@ def opensearch(self, query: str, results: int = 10, redirect: bool = True) -> Li "namespace": "", } - results = self.wiki_request(query_params) + out = self.wiki_request(query_params) - self._check_error_response(results, query) + self._check_error_response(out, query) - res = [] - for i, item in enumerate(results[1]): - res.append((item, results[2][i], results[3][i])) + res: List[Tuple[str, str, str]] = [] + for i, item in enumerate(out[1]): + res.append((item, out[2][i], out[3][i])) return res @memoize @@ -753,7 +753,7 @@ def categorymembers( subcats = [] returned_results = 0 finished = False - last_cont = {} + last_cont: Dict = {} while not finished: params = search_params.copy() params.update(last_cont) @@ -823,9 +823,9 @@ def categorytree(self, category: str, depth: int = 5) -> Dict[str, Any]: self.__category_parameter_verification(cats, depth, category) - results = {} - categories = {} - links = {} + results: Dict = {} + categories: Dict = {} + links: Dict = {} for cat in [x for x in cats if x]: self.__cat_tree_rec(cat, depth, results, 0, categories, links) @@ -859,7 +859,7 @@ def page(self, title=None, pageid=None, auto_suggest=True, redirect=True, preloa return MediaWikiPage(self, title, redirect=redirect, preload=preload) return MediaWikiPage(self, pageid=pageid, preload=preload) - def wiki_request(self, params: Dict[str, Any]) -> Dict[str, Any]: + def wiki_request(self, params: Dict[str, Any]) -> Dict[Any, Any]: """ Make a request to the MediaWiki API using the given search parameters @@ -890,7 +890,7 @@ def wiki_request(self, params: Dict[str, Any]) -> Dict[str, Any]: return req # Protected functions - def _get_site_info(self) -> List[str]: + def _get_site_info(self): """Parse out the Wikimedia site information including API Version and Extensions""" response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"}) @@ -949,7 +949,7 @@ def _check_query(value, message: str): raise ValueError(message) @staticmethod - def __category_parameter_verification(cats: str, depth: int, category: str): + def __category_parameter_verification(cats: list[str], depth: int, category: str): # parameter verification if len(cats) == 1 and (cats[0] is None or cats[0] == ""): msg = ( @@ -964,7 +964,7 @@ def __category_parameter_verification(cats: str, depth: int, category: str): raise ValueError(msg) def __cat_tree_rec( - self, cat: str, depth: int, tree: Dict[str, Any], level: int, categories: List[str], links: List[str] + self, cat: str, depth: int, tree: Dict[str, Any], level: int, categories: Dict[str, Any], links: Dict[str, Any] ): """recursive function to build out the tree""" tree[cat] = {} @@ -1015,14 +1015,18 @@ def __cat_tree_rec( def _get_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap the call to the requests package""" try: - return self._session.get(self._api_url, params=params, timeout=self._timeout).json() + if self._session is not None: + return self._session.get(self._api_url, params=params, timeout=self._timeout).json() + return {} except JSONDecodeError: return {} def _post_response(self, params: Dict[str, Any]) -> Dict[str, Any]: """wrap a post call to the requests package""" try: - return self._session.post(self._api_url, data=params, timeout=self._timeout).json() + if self._session is not None: + return self._session.post(self._api_url, data=params, timeout=self._timeout).json() + return {} except JSONDecodeError: return {} diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 4fb64e4..3aab732 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -7,9 +7,9 @@ import re from collections import OrderedDict from decimal import Decimal -from typing import Any, Dict, Iterator, List, Optional, Tuple +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union -from bs4 import BeautifulSoup, Tag +from bs4 import BeautifulSoup, NavigableString, Tag from mediawiki.exceptions import ( ODD_ERROR_MESSAGE, @@ -89,26 +89,26 @@ def __init__( else: raise ValueError("Either a title or a pageid must be specified") - self._content = None - self._revision_id = None - self._parent_id = None - self._html = False # None signifies nothing returned... - self._images = None - self._references = None - self._categories = None - self._coordinates = False # None signifies nothing returned... - self._links = None - self._redirects = None - self._backlinks = None - self._langlinks = None - self._summary = None - self._sections = None - self._table_of_contents = None - self._logos = None - self._hatnotes = None - self._soup = None - self._wikitext = None - self._preview = None + self._content: Optional[str] = None + self._revision_id: Optional[int] = None + self._parent_id: Optional[int] = None + self._html: Union[bool, str, None] = False # None signifies nothing returned... + self._images: Optional[List[str]] = None + self._references: Optional[List[str]] = None + self._categories: Optional[List[str]] = None + self._coordinates: Union[bool, None, Tuple[Decimal, Decimal]] = False # None signifies nothing returned... + self._links: Optional[List[str]] = None + self._redirects: Optional[List[str]] = None + self._backlinks: Optional[List[str]] = None + self._langlinks: Optional[Dict[str, str]] = None + self._summary: Optional[str] = None + self._sections: Optional[List[str]] = None + self._table_of_contents: Optional[Dict[str, Any]] = None + self._logos: Optional[List[str]] = None + self._hatnotes: Optional[List[str]] = None + self._soup: Optional[BeautifulSoup] = None + self._wikitext: Optional[str] = None + self._preview: Optional[Dict[str, str]] = None self.__load(redirect=redirect, preload=preload) @@ -150,7 +150,7 @@ def __eq__(self, other): return False # Properties - def _pull_content_revision_parent(self) -> Tuple[str, int, int]: + def _pull_content_revision_parent(self) -> Tuple[Optional[str], Optional[int], Optional[int]]: """combine the pulling of these three properties""" if self._revision_id is None: @@ -181,7 +181,7 @@ def content(self) -> str: Side effect is to also get revision_id and parent_id""" if self._content is None: self._pull_content_revision_parent() - return self._content + return self._content # type: ignore @property def revision_id(self) -> int: @@ -193,7 +193,7 @@ def revision_id(self) -> int: Side effect is to also get content and parent_id""" if self._revision_id is None: self._pull_content_revision_parent() - return self._revision_id + return self._revision_id # type: ignore @property def parent_id(self) -> int: @@ -205,10 +205,10 @@ def parent_id(self) -> int: Side effect is to also get content and revision_id""" if self._parent_id is None: self._pull_content_revision_parent() - return self._parent_id + return self._parent_id # type: ignore @property - def html(self) -> str: + def html(self) -> Optional[str]: """str: HTML representation of the page Note: @@ -227,7 +227,7 @@ def html(self) -> str: request = self.mediawiki.wiki_request(query_params) page = request["query"]["pages"][self.pageid] self._html = page["revisions"][0]["*"] - return self._html + return self._html # type: ignore @property def wikitext(self) -> str: @@ -278,6 +278,8 @@ def logos(self) -> List[str]: This is a parsing operation and not part of the standard API""" if self._logos is None: self._logos = [] + if not self.html: + return self._logos # Cache the results of parsing the html, so that multiple calls happen much faster if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") @@ -300,6 +302,8 @@ def hatnotes(self) -> List[str]: This is a parsing operation and not part of the standard API""" if self._hatnotes is None: self._hatnotes = [] + if not self.html: + return self._hatnotes # Cache the results of parsing the html, so that multiple calls happen much faster if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") @@ -436,13 +440,15 @@ def preview(self) -> Dict[str, str]: return self._preview @property - def summary(self) -> str: + def summary(self) -> Optional[str]: """str: Default page summary Note: Not settable""" if self._summary is None: self.__pull_combined_properties() + if self._summary is None: + self._summary = "" return self._summary def summarize(self, sentences: int = 0, chars: int = 0) -> str: @@ -456,7 +462,7 @@ def summarize(self, sentences: int = 0, chars: int = 0) -> str: str: The summary of the MediaWiki page Note: Precedence for parameters: sentences then chars; if both are 0 then the entire first section is returned""" - query_params = {"prop": "extracts", "explaintext": "", "titles": self.title} + query_params: Dict[str, Any] = {"prop": "extracts", "explaintext": "", "titles": self.title} if sentences: query_params["exsentences"] = 10 if sentences > 10 else sentences elif chars: @@ -479,6 +485,8 @@ def sections(self) -> List[str]: # `non-decorated` name instead of using the query api! if self._sections is None: self._parse_sections() + if self._sections is None: + self._sections = [] return self._sections @property @@ -492,6 +500,8 @@ def table_of_contents(self) -> Dict[str, Any]: if self._table_of_contents is None: self._parse_sections() + if self._table_of_contents is None: + self._table_of_contents = {} return self._table_of_contents def section(self, section_title: str) -> Optional[str]: @@ -542,7 +552,7 @@ def section(self, section_title: str) -> Optional[str]: return self.content[index:next_index].lstrip("=").strip() - def parse_section_links(self, section_title: str) -> List[Tuple[str, str]]: + def parse_section_links(self, section_title: str) -> Optional[List[Tuple[str, str]]]: """Parse all links within a section Args: @@ -559,6 +569,8 @@ def parse_section_links(self, section_title: str) -> List[Tuple[str, str]]: Note: This is a parsing operation and not part of the standard API""" # Cache the results of parsing the html, so that multiple calls happen much faster + if not self.html: + return None if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") @@ -709,13 +721,18 @@ def _continued_query(self, query_params: Dict[str, Any], key: str = "pages") -> last_cont = request["continue"] - def _parse_section_links(self, id_tag: Optional[str]) -> List[str]: + def _parse_section_links(self, id_tag: Optional[str]) -> List[Tuple[str, str]]: """given a section id, parse the links in the unordered list""" - all_links = [] + all_links: List[Tuple[str, str]] = [] + + if not self.html: + return all_links + if not self._soup: + self._soup = BeautifulSoup(self.html, "html.parser") if id_tag is None: root = self._soup.find("div", {"class": "mw-parser-output"}) - if root is None: + if root is None or isinstance(root, NavigableString): return all_links candidates = root.children else: diff --git a/tests/mediawiki_test.py b/tests/mediawiki_test.py index 46f79aa..a0b9bad 100644 --- a/tests/mediawiki_test.py +++ b/tests/mediawiki_test.py @@ -1177,10 +1177,9 @@ def test_page_section(self): """test a page returning a section""" self.assertEqual(self.pag.section("A Game of Thrones"), self.response["arya"]["section_a_game_of_thrones"]) - def test_page_section_header(self): - """test a page returning the section header""" + def test_page_top_section_header(self): + """test a page returning the top section header""" res = self.pag.section(None) - print(res) self.assertEqual(self.pag.section(None), self.response["arya"]["section_a_game_of_thrones"]) def test_page_last_section(self): From dfad82c44b09a019ed3165b3e938d504fe409c42 Mon Sep 17 00:00:00 2001 From: barrust Date: Fri, 5 Jan 2024 20:06:20 -0500 Subject: [PATCH 14/17] fix broken test --- mediawiki/mediawiki.py | 2 +- mediawiki/mediawikipage.py | 11 ++++++++--- mediawiki/utilities.py | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/mediawiki/mediawiki.py b/mediawiki/mediawiki.py index 68473a9..5d7a1ae 100644 --- a/mediawiki/mediawiki.py +++ b/mediawiki/mediawiki.py @@ -949,7 +949,7 @@ def _check_query(value, message: str): raise ValueError(message) @staticmethod - def __category_parameter_verification(cats: list[str], depth: int, category: str): + def __category_parameter_verification(cats, depth, category): # parameter verification if len(cats) == 1 and (cats[0] is None or cats[0] == ""): msg = ( diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 3aab732..df01583 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -504,7 +504,7 @@ def table_of_contents(self) -> Dict[str, Any]: self._table_of_contents = {} return self._table_of_contents - def section(self, section_title: str) -> Optional[str]: + def section(self, section_title: Optional[str]) -> Optional[str]: """Plain text section content Args: @@ -550,7 +550,10 @@ def section(self, section_title: str) -> Optional[str]: except ValueError: next_index = len(self.content) - return self.content[index:next_index].lstrip("=").strip() + val = self.content[index:next_index].lstrip("=").strip() + if val == "": + return None + return val def parse_section_links(self, section_title: str) -> Optional[List[Tuple[str, str]]]: """Parse all links within a section @@ -767,9 +770,11 @@ def _parse_section_links(self, id_tag: Optional[str]) -> List[Tuple[str, str]]: all_links.append(self.__parse_link_info(link)) return all_links - def __parse_link_info(self, link: str) -> Tuple[str, str]: + def __parse_link_info(self, link: Tag) -> Tuple[str, str]: """parse the tag for the link""" href = link.get("href", "") + if isinstance(href, list): + href = href[0] txt = link.string or href is_rel = is_relative_url(href) if is_rel is True: diff --git a/mediawiki/utilities.py b/mediawiki/utilities.py index c39f6f1..ba6ae4f 100644 --- a/mediawiki/utilities.py +++ b/mediawiki/utilities.py @@ -5,7 +5,7 @@ import inspect import sys import time -from typing import Any, Callable, Dict +from typing import Any, Callable, Dict, Optional def parse_all_arguments(func: Callable) -> Dict[str, Any]: @@ -73,7 +73,7 @@ def str_or_unicode(text: str) -> str: return text.encode(encoding).decode(encoding) -def is_relative_url(url: str) -> bool: +def is_relative_url(url: str) -> Optional[bool]: """simple method to determine if a url is relative or absolute""" if url.startswith("#"): return None From 13af9c8f51de258f1140a21ca519c664ca891245 Mon Sep 17 00:00:00 2001 From: barrust Date: Fri, 5 Jan 2024 20:34:40 -0500 Subject: [PATCH 15/17] finalize typing; add py.typed file to signal typing is present --- mediawiki/exceptions.py | 6 +++--- mediawiki/mediawikipage.py | 22 +++++++++++++--------- mediawiki/py.typed | 0 3 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 mediawiki/py.typed diff --git a/mediawiki/exceptions.py b/mediawiki/exceptions.py index a957b45..44a036a 100644 --- a/mediawiki/exceptions.py +++ b/mediawiki/exceptions.py @@ -1,7 +1,7 @@ """ MediaWiki Exceptions """ -from typing import List, Optional +from typing import Dict, List, Optional from mediawiki.utilities import str_or_unicode @@ -118,7 +118,7 @@ class DisambiguationError(MediaWikiBaseException): `options` only includes titles that link to valid \ MediaWiki pages """ - def __init__(self, title: str, may_refer_to: List[str], url: str, details: Optional[List[str]] = None): + def __init__(self, title: str, may_refer_to: List[str], url: str, details: Optional[List[Dict]] = None): self._title = title self._unordered_options = may_refer_to self._options = sorted(may_refer_to) @@ -149,7 +149,7 @@ def unordered_options(self) -> List[str]: return self._unordered_options @property - def details(self) -> Optional[List[str]]: + def details(self) -> Optional[List[Dict]]: """list: The details of the proposed non-disambigous pages""" return self._details diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index df01583..06897ca 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -284,7 +284,7 @@ def logos(self) -> List[str]: if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") info = self._soup.find("table", {"class": "infobox"}) - if info is not None: + if info is not None and isinstance(info, Tag): children = info.find_all("a", class_="image") for child in children: self._logos.append("https:" + child.img["src"]) @@ -353,7 +353,7 @@ def coordinates(self) -> Optional[Tuple[Decimal, Decimal]]: if self._coordinates is False: self._coordinates = None self.__pull_combined_properties() - return self._coordinates + return self._coordinates # type: ignore @property def links(self) -> List[str]: @@ -632,7 +632,7 @@ def _raise_page_error(self): raise PageError(title=self.title) raise PageError(pageid=self.pageid) - def _raise_disambiguation_error(self, page: str, pageid: int): + def _raise_disambiguation_error(self, page: Dict, pageid: int): """parse and throw a disambiguation error""" query_params = { "prop": "revisions", @@ -666,7 +666,7 @@ def _raise_disambiguation_error(self, page: str, pageid: int): disambiguation, ) - def _handle_redirect(self, redirect: bool, preload: bool, query: str, page: Dict[str, Any]): + def _handle_redirect(self, redirect: bool, preload: bool, query: Dict, page: Dict[str, Any]): """handle redirect""" if redirect: redirects = query["redirects"][0] @@ -685,7 +685,7 @@ def _handle_redirect(self, redirect: bool, preload: bool, query: str, page: Dict raise MediaWikiException(ODD_ERROR_MESSAGE) # change the title and reload the whole object - self.__init__( + self.__init__( # type: ignore self.mediawiki, title=redirects["to"], redirect=redirect, @@ -699,7 +699,7 @@ def _continued_query(self, query_params: Dict[str, Any], key: str = "pages") -> https://www.mediawiki.org/wiki/API:Query#Continuing_queries""" query_params.update(self.__title_query_param()) - last_cont = {} + last_cont: Dict = {} prop = query_params.get("prop") while True: @@ -742,20 +742,23 @@ def _parse_section_links(self, id_tag: Optional[str]) -> List[Tuple[str, str]]: root = self._soup.find("span", {"id": id_tag}) if root is None: return all_links - candidates = self._soup.find(id=id_tag).parent.next_siblings + candidates = self._soup.find(id=id_tag).parent.next_siblings # type: ignore for node in candidates: if not isinstance(node, Tag): continue if node.get("role", "") == "navigation": continue - if "infobox" in node.get("class", []): + classes = node.get("class", []) + if not isinstance(classes, list): + classes = [classes if classes else ""] + if "infobox" in classes: continue # If the classname contains "toc", the element is a table of contents. # The comprehension is necessary because there are several possible # types of tocs: "toclevel", "toc", ... - toc_classnames = [cname for cname in node.get("class", []) if "toc" in cname] + toc_classnames = [cname for cname in classes if "toc" in cname] if toc_classnames: continue @@ -775,6 +778,7 @@ def __parse_link_info(self, link: Tag) -> Tuple[str, str]: href = link.get("href", "") if isinstance(href, list): href = href[0] + href = "" if href is None else href txt = link.string or href is_rel = is_relative_url(href) if is_rel is True: diff --git a/mediawiki/py.typed b/mediawiki/py.typed new file mode 100644 index 0000000..e69de29 From 19587aa8d9d3313bb706d0289d9d967ebba1cc40 Mon Sep 17 00:00:00 2001 From: barrust Date: Fri, 5 Jan 2024 20:50:13 -0500 Subject: [PATCH 16/17] remove unnecessary self.html tests --- mediawiki/mediawikipage.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 06897ca..431dc4c 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -92,7 +92,7 @@ def __init__( self._content: Optional[str] = None self._revision_id: Optional[int] = None self._parent_id: Optional[int] = None - self._html: Union[bool, str, None] = False # None signifies nothing returned... + self._html: Union[bool, str] = False # None signifies nothing returned... self._images: Optional[List[str]] = None self._references: Optional[List[str]] = None self._categories: Optional[List[str]] = None @@ -208,7 +208,7 @@ def parent_id(self) -> int: return self._parent_id # type: ignore @property - def html(self) -> Optional[str]: + def html(self) -> str: """str: HTML representation of the page Note: @@ -216,7 +216,7 @@ def html(self) -> Optional[str]: Warning: This can be slow for very large pages""" if self._html is False: - self._html = None + self._html = "" query_params = { "prop": "revisions", "rvprop": "content", @@ -278,8 +278,6 @@ def logos(self) -> List[str]: This is a parsing operation and not part of the standard API""" if self._logos is None: self._logos = [] - if not self.html: - return self._logos # Cache the results of parsing the html, so that multiple calls happen much faster if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") @@ -302,8 +300,6 @@ def hatnotes(self) -> List[str]: This is a parsing operation and not part of the standard API""" if self._hatnotes is None: self._hatnotes = [] - if not self.html: - return self._hatnotes # Cache the results of parsing the html, so that multiple calls happen much faster if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") @@ -728,8 +724,6 @@ def _parse_section_links(self, id_tag: Optional[str]) -> List[Tuple[str, str]]: """given a section id, parse the links in the unordered list""" all_links: List[Tuple[str, str]] = [] - if not self.html: - return all_links if not self._soup: self._soup = BeautifulSoup(self.html, "html.parser") From 8c383d57f1aa95ca179dbdb8ebc5db8636047acf Mon Sep 17 00:00:00 2001 From: barrust Date: Fri, 5 Jan 2024 21:03:06 -0500 Subject: [PATCH 17/17] update .pylintrc for codacy --- .pylintrc | 10 +++++----- mediawiki/mediawikipage.py | 4 +--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.pylintrc b/.pylintrc index 028fad1..ca08a93 100644 --- a/.pylintrc +++ b/.pylintrc @@ -175,22 +175,22 @@ ignored-parents= max-args=12 # Maximum number of attributes for a class (see R0902). -max-attributes=7 +max-attributes=35 # Maximum number of boolean expressions in an if statement (see R0916). max-bool-expr=5 -# Maximum number of branch for function / method body. -max-branches=12 +# Maximum number of branch for function / method body (see R0912) +max-branches=15 # Maximum number of locals for function / method body. -max-locals=15 +max-locals=20 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of public methods for a class (see R0904). -max-public-methods=20 +max-public-methods=40 # Maximum number of return / yield for function / method body. max-returns=6 diff --git a/mediawiki/mediawikipage.py b/mediawiki/mediawikipage.py index 431dc4c..4cd01cc 100644 --- a/mediawiki/mediawikipage.py +++ b/mediawiki/mediawikipage.py @@ -739,9 +739,7 @@ def _parse_section_links(self, id_tag: Optional[str]) -> List[Tuple[str, str]]: candidates = self._soup.find(id=id_tag).parent.next_siblings # type: ignore for node in candidates: - if not isinstance(node, Tag): - continue - if node.get("role", "") == "navigation": + if not isinstance(node, Tag) or node.get("role", "") == "navigation": continue classes = node.get("class", []) if not isinstance(classes, list):