Skip to content

Commit

Permalink
Add Wd query, more refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
katjabercic committed Nov 23, 2023
1 parent dd90fe0 commit d57d1ff
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 92 deletions.
6 changes: 1 addition & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,7 @@ To update the database with the new model, run:
git pull
source venv/bin/activate
cd web
./manage.py clear_wikidata
./manage.py migrate
./manage.py import_wikidata
./manage.py link_same
./manage.py compute_concepts
./manage.py rebuild_db
sudo systemctl start mathswitch

## WD query examples
Expand Down
1 change: 1 addition & 0 deletions web/concepts/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from . import views

urlpatterns = [
path("<slug:source>/<slug:identifier>", views.redirect_item_to_concept),
path("<str:name>/", views.concept),
]
6 changes: 5 additions & 1 deletion web/concepts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def concept(request, name):
}
return render(request, "detail.html", context)


def home(request):
autocomplete_concepts = [c.name for c in Concept.objects.all()]
context = {"concepts": autocomplete_concepts}
Expand All @@ -25,3 +24,8 @@ def home(request):
def search(request):
search_value = request.GET.get("q")
return redirect("/concept/" + search_value)

def redirect_item_to_concept(request, source, identifier):
# should this be a permanent redirect?
item = get_object_or_404(Item, source=source, identifier=identifier)
return redirect("/concept/" + item.concept.name)
6 changes: 4 additions & 2 deletions web/slurper/management/commands/import_wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
class Command(BaseCommand):
def handle(self, *args, **options):
print("importing wikidata data")
source_wikidata.WD_SLURPER.save_items()
source_wikidata.WD_SLURPER_1.save_items()
source_wikidata.WD_SLURPER_2.save_items()
source_wikidata.WD_NLAB_SLURPER.save_items()
source_wikidata.WD_MATHWORLD_SLURPER.save_items()
source_wikidata.WD_SLURPER.save_links()
source_wikidata.WD_SLURPER_1.save_links()
source_wikidata.WD_SLURPER_2.save_links()
source_wikidata.WD_NLAB_SLURPER.save_links()
source_wikidata.WD_MATHWORLD_SLURPER.save_links()
164 changes: 80 additions & 84 deletions web/slurper/source_wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,41 @@
class WikidataSlurper:
SPARQL_URL = "https://query.wikidata.org/sparql"

SPARQL_QUERY_SELECT = """
SELECT
DISTINCT ?item ?itemLabel ?itemDescription ?image
?mwID ?emID ?nlabID ?pwID
?art
WHERE {
"""

SPARQL_QUERY_OPTIONS = """
OPTIONAL
{ ?item wdt:P18 ?image . }
OPTIONAL
{ ?item wdt:P2812 ?mwID . } # MathWorld
OPTIONAL
{ ?item wdt:P7554 ?emID . } # Encyclopedia of Mathematics
OPTIONAL
{ ?item wdt:P4215 ?nlabID . } # nLab
OPTIONAL
{ ?item wdt:P6781 ?pwID . } # ProofWiki
OPTIONAL
{
?art rdf:type schema:Article;
schema:isPartOf <https://en.wikipedia.org/>;
schema:about ?item .
}
# except for humans
FILTER NOT EXISTS{ ?item wdt:P31 wd:Q5 . }
# collect the label and description
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

def __init__(self, source, query, id_map, url_map, name_map, desc_map):
self.source = source
self.query = query
self.query = self.SPARQL_QUERY_SELECT + query + self.SPARQL_QUERY_OPTIONS
self.id_map = id_map
self.url_map = url_map
self.name_map = name_map
Expand Down Expand Up @@ -46,86 +78,63 @@ def save_items(self):
logging.log(logging.WARNING, f" Link from {item.identifier} repeated.")

def save_links(self):

def source_to_key(source):
"""Map source to WD json key for that source"""
if source == Item.Source.NLAB: return "nlabID"
elif source == Item.Source.MATHWORLD: return "mwID"
else: return None

def save_link(current_item, source, source_id):
try:
destinationItem = Item.objects.get(
source=source, identifier=source_id
)
Link.save_new(current_item, destinationItem, Link.Label.WIKIDATA)
except Item.DoesNotExist:
logging.log(
logging.WARNING,
f" {source} item {source_id} does not exist in the database.",
)

for json_item in self.raw_data:
currentItem = Item.objects.get(
current_item = Item.objects.get(
source=self.source, identifier=self.id_map(json_item)
)
if self.source == Item.Source.WIKIDATA:
# nLab, MathWorld
if "nlabID" in json_item:
nlab_id = json_item["nlabID"]["value"]
try:
destinationItem = Item.objects.get(
source=Item.Source.NLAB, identifier=nlab_id
)
Link.save_new(currentItem, destinationItem, Link.Label.WIKIDATA)
except Item.DoesNotExist:
logging.log(
logging.WARNING,
f" NLab item {nlab_id} does not exist in the database.",
)
elif "mwID" in json_item:
mw_id = json_item["mwID"]["value"]
try:
print("looking for ", mw_id)
destinationItem = Item.objects.get(
source=Item.Source.MATHWORLD, identifier=mw_id
)
Link.save_new(currentItem, destinationItem, Link.Label.WIKIDATA)
except Item.DoesNotExist:
logging.log(
logging.WARNING,
f" MathWorld item {mw_id} does not exist in the database.",
)
for source in [Item.Source.NLAB, Item.Source.MATHWORLD]:
if source_to_key(source) in json_item:
source_id = json_item[source_to_key(source)]["value"]
save_link(current_item, source, source_id)
else: # link back to WD items
wd_id = json_item["item"]["value"].split("/")[-1]
try:
destinationItem = Item.objects.get(
source=Item.Source.WIKIDATA, identifier=wd_id
)
Link.save_new(currentItem, destinationItem, Link.Label.WIKIDATA)
except Item.DoesNotExist:
logging.log(
logging.WARNING,
f" Wikidata item {wd_id} does not exist in the database.",
)


WD_SLURPER = WikidataSlurper(
save_link(current_item, Item.Source.WIKIDATA, wd_id)


WD_SLURPER_1 = WikidataSlurper(
Item.Source.WIKIDATA,
"""
SELECT
DISTINCT ?item ?itemLabel ?itemDescription ?image
?mwID ?emID ?nlabID ?pwID
?art
WHERE {
# anything part of a topic that is studied by mathmatics
?item wdt:P31 ?topic .
?topic wdt:P2579 wd:Q395 .
OPTIONAL
{ ?item wdt:P18 ?image . }
OPTIONAL
{ ?item wdt:P2812 ?mwID . } # MathWorld
OPTIONAL
{ ?item wdt:P7554 ?emID . } # Encyclopedia of Mathematics
OPTIONAL
{ ?item wdt:P4215 ?nlabID . } # nLab
OPTIONAL
{ ?item wdt:P6781 ?pwID . } # ProofWiki
OPTIONAL
{
?art rdf:type schema:Article;
schema:isPartOf <https://en.wikipedia.org/>;
schema:about ?item .
}
# except for natural numbers
filter(?topic != wd:Q21199) .
# except for humans
FILTER NOT EXISTS{ ?item wdt:P31 wd:Q5 . }
# collect the label and description
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
""",
id_map=lambda item: item["item"]["value"].split("/")[-1],
url_map=lambda item: item["item"]["value"],
name_map=lambda item: item["itemLabel"]["value"] if ("itemLabel" in item) else None,
desc_map=lambda item: item["itemDescription"]["value"]
if ("itemDescription" in item)
else None,
)

WD_SLURPER_2 = WikidataSlurper(
Item.Source.WIKIDATA,
"""
# concepts of areas of mathematics
?item p:P31 ?of.
?of ps:P31 wd:Q151885.
?of pq:P642/p:P31/ps:P31 wd:Q1936384
""",
id_map=lambda item: item["item"]["value"].split("/")[-1],
url_map=lambda item: item["item"]["value"],
Expand All @@ -138,16 +147,8 @@ def save_links(self):
WD_NLAB_SLURPER = WikidataSlurper(
Item.Source.NLAB,
"""
SELECT
DISTINCT ?item ?nlabID
WHERE {
# anything that has the nLab identifier property
?item wdt:P4215 ?nlabID .
# except for humans
FILTER NOT EXISTS{ ?item wdt:P31 wd:Q5 . }
# collect the label and description
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
""",
id_map=lambda item: item["nlabID"]["value"],
url_map=lambda item: "https://ncatlab.org/nlab/show/" + item["nlabID"]["value"],
Expand All @@ -158,16 +159,8 @@ def save_links(self):
WD_MATHWORLD_SLURPER = WikidataSlurper(
Item.Source.MATHWORLD,
"""
SELECT
DISTINCT ?item ?mwID
WHERE {
# anything that has the MathWorld identifier property
?item wdt:P4215 ?mwID .
# except for humans
FILTER NOT EXISTS{ ?item wdt:P31 wd:Q5 . }
# collect the label and description
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
""",
id_map=lambda item: item["mwID"]["value"],
url_map=lambda item: "https://mathworld.wolfram.com/"
Expand All @@ -176,3 +169,6 @@ def save_links(self):
name_map=lambda item: item["mwID"]["value"],
desc_map=lambda _: None,
)

# ?concept wdt:P642 ?area .
# ?area wdt:P31 wd:Q1936384 .
1 change: 1 addition & 0 deletions web/web/management/commands/rebuild_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class Command(BaseCommand):
def handle(self, *args, **options):
call_command("clear_agda_unimath")
call_command("clear_wikidata")
call_command("migrate")
call_command("import_wikidata")
call_command("import_agda_unimath")
call_command("link_same")
Expand Down

0 comments on commit d57d1ff

Please sign in to comment.