Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added python 3 compatibility. #2

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 28 additions & 18 deletions wikipedia_template_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,19 @@
import urllib
from pyquery import PyQuery as pq
import mwparserfromhell
import sys

logger = logging.getLogger(__name__)

if sys.version_info[0] < 3:
string = basestring
unquote = urllib.unquote
quote = urllib.quote
else:
string = str
unquote = urllib.parse.unquote
quote = urllib.parse.quote


def clean_wiki_links(s):
"""
Expand All @@ -33,7 +43,7 @@ def clean_ref(s):
text = pq(s)
res = []
for el in text.contents():
if isinstance(el, basestring):
if isinstance(el, string):
res.append(el.strip())
elif el.tag != "ref":
res.append(clean_ref(el))
Expand All @@ -49,7 +59,7 @@ def get_wikitext_from_api(page, lang='en'):
params = {
'action': 'query',
'prop': 'revisions',
'titles': urllib.unquote(page.replace(' ', '_')),
'titles': unquote(page.replace(' ', '_')),
'rvprop': 'content',
'rvlimit': '1',
'format': 'json',
Expand All @@ -61,7 +71,7 @@ def get_wikitext_from_api(page, lang='en'):
json_pages = res.json()['query']['pages']

try:
result = json_pages.values()[0]['revisions'][0]['*']
result = list(json_pages.values())[0]['revisions'][0]['*']
except:
raise ValueError('Page {page} does not exist on '
'{lang}.wikipedia'.format(page=page, lang=lang))
Expand All @@ -81,7 +91,7 @@ def extract_data_from_coord(template):
]

todel = set()
for k, v in template.iteritems():
for k, v in template.items():
for op in optionalpars:
if (op in v) or (op in k):
todel.add(k)
Expand Down Expand Up @@ -333,30 +343,30 @@ def pages_in_category(catname, lang='en', maxdepth=0,
return result

if __name__ == "__main__":
print pages_with_template("Template:Edificio_religioso", "it")
print(pages_with_template("Template:Edificio_religioso", "it"))
print
print pages_in_category("Categoria:Architetture_religiose_d'Italia",
print(pages_in_category("Categoria:Architetture_religiose_d'Italia",
"it",
maxdepth=20
)
))
print
print pages_in_category("Categoria:Chiese_di_Prato", "it")
print(pages_in_category("Categoria:Chiese_di_Prato", "it"))
print
print data_from_templates("Chiesa di San Pantaleo (Zoagli)", "it")
print(data_from_templates("Chiesa di San Pantaleo (Zoagli)", "it"))
print
print data_from_templates(urllib.quote("Chiesa di San Pantaleo (Zoagli)"),
print(data_from_templates(quote("Chiesa di San Pantaleo (Zoagli)"),
"it"
)
))
print
print get_wikitext_from_api("Chiesa di San Petronio", "it")
print(get_wikitext_from_api("Chiesa di San Petronio", "it"))
print
print data_from_templates("Volano_(Italia)", "it")
print(data_from_templates("Volano_(Italia)", "it"))
print
print data_from_templates("Cattedrale di San Vigilio", "it")
print(data_from_templates("Cattedrale di San Vigilio", "it"))
print
print data_from_templates("Telenorba", "it")
print(data_from_templates("Telenorba", "it"))
print
print data_from_templates("Pallavolo Falchi Ugento", "it")
print(data_from_templates("Pallavolo Falchi Ugento", "it"))
print
pisa_text = get_wikitext_from_api("Torre pendente di Pisa", "it")
tmpl_from_text = data_from_templates("Torre pendente di Pisa",
Expand All @@ -365,6 +375,6 @@ def pages_in_category(catname, lang='en', maxdepth=0,
)
tmpl_from_api = data_from_templates("Torre pendente di Pisa", "it")
if tmpl_from_text == tmpl_from_api:
print "Templates from text and from API match"
print("Templates from text and from API match")
else:
print "W00t?!"
print("W00t?!")