From e0533c2025876fc940a8b0ec95a76fa4297781cf Mon Sep 17 00:00:00 2001 From: tihacker Date: Thu, 16 Jul 2015 13:04:17 +0200 Subject: [PATCH] Added python 3 compatibility --- wikipedia_template_parser/__init__.py | 46 ++++++++++++++++----------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/wikipedia_template_parser/__init__.py b/wikipedia_template_parser/__init__.py index 9a6cc50..4917ec5 100644 --- a/wikipedia_template_parser/__init__.py +++ b/wikipedia_template_parser/__init__.py @@ -9,9 +9,19 @@ import urllib from pyquery import PyQuery as pq import mwparserfromhell +import sys logger = logging.getLogger(__name__) +if sys.version_info[0] < 3: + string = basestring + unquote = urllib.unquote + quote = urllib.quote +else: + string = str + unquote = urllib.parse.unquote + quote = urllib.parse.quote + def clean_wiki_links(s): """ @@ -33,7 +43,7 @@ def clean_ref(s): text = pq(s) res = [] for el in text.contents(): - if isinstance(el, basestring): + if isinstance(el, string): res.append(el.strip()) elif el.tag != "ref": res.append(clean_ref(el)) @@ -49,7 +59,7 @@ def get_wikitext_from_api(page, lang='en'): params = { 'action': 'query', 'prop': 'revisions', - 'titles': urllib.unquote(page.replace(' ', '_')), + 'titles': unquote(page.replace(' ', '_')), 'rvprop': 'content', 'rvlimit': '1', 'format': 'json', @@ -61,7 +71,7 @@ def get_wikitext_from_api(page, lang='en'): json_pages = res.json()['query']['pages'] try: - result = json_pages.values()[0]['revisions'][0]['*'] + result = list(json_pages.values())[0]['revisions'][0]['*'] except: raise ValueError('Page {page} does not exist on ' '{lang}.wikipedia'.format(page=page, lang=lang)) @@ -81,7 +91,7 @@ def extract_data_from_coord(template): ] todel = set() - for k, v in template.iteritems(): + for k, v in template.items(): for op in optionalpars: if (op in v) or (op in k): todel.add(k) @@ -333,30 +343,30 @@ def pages_in_category(catname, lang='en', maxdepth=0, return result if __name__ == "__main__": - print pages_with_template("Template:Edificio_religioso", "it") + print(pages_with_template("Template:Edificio_religioso", "it")) print - print pages_in_category("Categoria:Architetture_religiose_d'Italia", + print(pages_in_category("Categoria:Architetture_religiose_d'Italia", "it", maxdepth=20 - ) + )) print - print pages_in_category("Categoria:Chiese_di_Prato", "it") + print(pages_in_category("Categoria:Chiese_di_Prato", "it")) print - print data_from_templates("Chiesa di San Pantaleo (Zoagli)", "it") + print(data_from_templates("Chiesa di San Pantaleo (Zoagli)", "it")) print - print data_from_templates(urllib.quote("Chiesa di San Pantaleo (Zoagli)"), + print(data_from_templates(quote("Chiesa di San Pantaleo (Zoagli)"), "it" - ) + )) print - print get_wikitext_from_api("Chiesa di San Petronio", "it") + print(get_wikitext_from_api("Chiesa di San Petronio", "it")) print - print data_from_templates("Volano_(Italia)", "it") + print(data_from_templates("Volano_(Italia)", "it")) print - print data_from_templates("Cattedrale di San Vigilio", "it") + print(data_from_templates("Cattedrale di San Vigilio", "it")) print - print data_from_templates("Telenorba", "it") + print(data_from_templates("Telenorba", "it")) print - print data_from_templates("Pallavolo Falchi Ugento", "it") + print(data_from_templates("Pallavolo Falchi Ugento", "it")) print pisa_text = get_wikitext_from_api("Torre pendente di Pisa", "it") tmpl_from_text = data_from_templates("Torre pendente di Pisa", @@ -365,6 +375,6 @@ def pages_in_category(catname, lang='en', maxdepth=0, ) tmpl_from_api = data_from_templates("Torre pendente di Pisa", "it") if tmpl_from_text == tmpl_from_api: - print "Templates from text and from API match" + print("Templates from text and from API match") else: - print "W00t?!" + print("W00t?!") \ No newline at end of file