diff --git a/README.md b/README.md
index a96355a..0517efd 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,21 @@ To update the database with the new model, run:
./manage.py rebuild_db
sudo systemctl start mathswitch
+## WD item JSON example
+
+```
+{
+ 'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q192276'},
+ 'art': {'type': 'uri', 'value': 'https://en.wikipedia.org/wiki/Measure_(mathematics)'},
+ 'image': {'type': 'uri', 'value': 'http://commons.wikimedia.org/wiki/Special:FilePath/Measure%20illustration%20%28Vector%29.svg'},
+ 'mwID': {'type': 'literal', 'value': 'Measure'},
+ 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'measure'},
+ 'itemDescription': {'xml:lang': 'en', 'type': 'literal', 'value': 'function assigning numbers to some subsets of a set, which could be seen as a generalization of length, area, volume and integral'},
+ 'eomID': {'type': 'literal', 'value': 'measure'},
+ 'pwID': {'type': 'literal', 'value': 'Definition:Measure_(Measure_Theory)'
+}
+```
+
## WD query examples
```
diff --git a/web/concepts/templates/index.html b/web/concepts/templates/index.html
index e099b4c..fb5ee60 100644
--- a/web/concepts/templates/index.html
+++ b/web/concepts/templates/index.html
@@ -13,7 +13,8 @@
Examples:
circuit,
mathematical group,
- Stone duality
+ Stone duality,
+ normal
diff --git a/web/concepts/templates/results.html b/web/concepts/templates/results.html
index e8c40e1..33589cb 100644
--- a/web/concepts/templates/results.html
+++ b/web/concepts/templates/results.html
@@ -4,6 +4,8 @@
Search results
+Query: "{{ query }}"
+
{% for concept in results %}
- {{ concept }}
diff --git a/web/concepts/views.py b/web/concepts/views.py
index ac937c1..0cb1953 100644
--- a/web/concepts/views.py
+++ b/web/concepts/views.py
@@ -52,5 +52,8 @@ def redirect_item_to_concept(request, source, identifier):
def results(request, query):
concepts = Concept.objects.filter(name__contains=query)
- context = {"results": [concept.name for concept in concepts]}
+ context = {
+ "query": query,
+ "results": [concept.name for concept in concepts]
+ }
return render(request, "results.html", context)
diff --git a/web/slurper/management/commands/import_wikidata.py b/web/slurper/management/commands/import_wikidata.py
index f3fc650..c4a449f 100644
--- a/web/slurper/management/commands/import_wikidata.py
+++ b/web/slurper/management/commands/import_wikidata.py
@@ -4,10 +4,10 @@
class Command(BaseCommand):
def handle(self, *args, **options):
- print("", end="")
+ print("\r waiting for Wikidata", end="")
n = len(source_wikidata.SLURPERS)
for i, slurper in enumerate(source_wikidata.SLURPERS):
- print(f"\r query {i}/{n}: {slurper.source.label}".ljust(50), end="")
+ print(f"\r items {i}/{n}: {slurper.source.label}".ljust(50), end="")
slurper.save_items()
for i, slurper in enumerate(source_wikidata.SLURPERS):
print(f"\r links {i}/{n}: {slurper.source.label}".ljust(50), end="")
diff --git a/web/slurper/source_wikidata.py b/web/slurper/source_wikidata.py
index 2a780c8..d477444 100644
--- a/web/slurper/source_wikidata.py
+++ b/web/slurper/source_wikidata.py
@@ -1,143 +1,8 @@
import logging
-from typing import Optional
-
import requests
-from concepts.models import Item, Link
+from concepts.models import Item
from django.db.utils import IntegrityError
-
-WD_OTHER_SOURCE = {
- Item.Source.NLAB: {
- "wd_property": "wdt:P4215",
- "json_key": "nlabID",
- },
- Item.Source.MATHWORLD: {
- "wd_property": "wdt:P2812",
- "json_key": "mwID",
- },
- Item.Source.PROOF_WIKI: {
- "wd_property": "wdt:P6781",
- "json_key": "pwID",
- },
- Item.Source.ENCYCLOPEDIA_OF_MATHEMATICS: {
- "wd_property": "wdt:P7554",
- "json_key": "eomID",
- },
-}
-
-
-class BaseWikidataRawItem:
- def __init__(self, source, json_item):
- self.source = source
- self.raw = json_item
-
- def identifier(self):
- pass
-
- def url(self):
- pass
-
- def name(self):
- pass
-
- def description(self):
- pass
-
- def to_item(self) -> Optional[Item]:
- return Item(
- source=self.source,
- identifier=self.identifier(),
- url=self.url(),
- name=self.name(),
- description=self.description(),
- )
-
- @staticmethod
- def get_raw_item(source, json_item):
- match source:
- case Item.Source.WIKIDATA:
- return WdRawItem(json_item)
- case Item.Source.NLAB:
- return nLabRawItem(json_item)
- case Item.Source.MATHWORLD:
- return MWRawItem(json_item)
- case Item.Source.PROOF_WIKI:
- return PWRawItem(json_item)
- case Item.Source.ENCYCLOPEDIA_OF_MATHEMATICS:
- return EoMRawItem(json_item)
-
-
-class WdRawItem(BaseWikidataRawItem):
- def __init__(self, json_item):
- super().__init__(Item.Source.WIKIDATA, json_item)
-
- def identifier(self):
- id = self.raw["item"]["value"].split("/")[-1]
- if id is None:
- print("raw:\n", self.raw)
- return id
-
- def url(self):
- return self.raw["item"]["value"]
-
- def name(self):
- if "itemLabel" in self.raw:
- return self.raw["itemLabel"]["value"]
- else:
- return None
-
- def description(self):
- if "itemDescription" in self.raw:
- return self.raw["itemDescription"]["value"]
- else:
- None
-
-
-class OtherWdRawItem(BaseWikidataRawItem):
- def __init__(self, source, json_item):
- super().__init__(source, json_item)
- self.json_key = WD_OTHER_SOURCE[self.source]["json_key"]
-
- def identifier(self):
- return self.raw[self.json_key]["value"]
-
- def name(self):
- return self.identifier()
-
- def description(self):
- return None
-
-
-class nLabRawItem(OtherWdRawItem):
- def __init__(self, json_item):
- super().__init__(Item.Source.NLAB, json_item)
-
- def url(self):
- return "https://ncatlab.org/nlab/show/" + self.identifier()
-
-
-class MWRawItem(OtherWdRawItem):
- def __init__(self, json_item):
- super().__init__(Item.Source.MATHWORLD, json_item)
-
- def url(self):
- return "https://mathworld.wolfram.com/" + self.identifier() + ".html"
-
-
-class PWRawItem(OtherWdRawItem):
- def __init__(self, json_item):
- super().__init__(Item.Source.PROOF_WIKI, json_item)
-
- def url(self):
- return "https://proofwiki.org/wiki/" + self.identifier()
-
-
-class EoMRawItem(OtherWdRawItem):
- def __init__(self, json_item):
- super().__init__(Item.Source.ENCYCLOPEDIA_OF_MATHEMATICS, json_item)
-
- def url(self):
- return "https://encyclopediaofmath.org/wiki/" + self.identifier()
-
+from slurper.wd_raw_item import WD_OTHER_SOURCES, BaseWdRawItem
class WikidataSlurper:
SPARQL_URL = "https://query.wikidata.org/sparql"
@@ -147,7 +12,7 @@ class WikidataSlurper:
{ ?item wdt:P18 ?image . }
OPTIONAL
{
- ?art rdf:type schema:Article;
+ ?wp_en rdf:type schema:Article;
schema:isPartOf ;
schema:about ?item .
}
@@ -162,12 +27,12 @@ class WikidataSlurper:
}
"""
- def __init__(self, source, query):
+ def __init__(self, source, query, limit=None):
self.source = source
self.query = (
"""
SELECT
- DISTINCT ?item ?itemLabel ?itemDescription ?image ?art
+ DISTINCT ?item ?itemLabel ?itemDescription ?image ?wp_en
"""
+ self._sparql_source_vars_select()
+ """
@@ -176,6 +41,7 @@ def __init__(self, source, query):
+ query
+ self._sparql_source_vars_triples()
+ self.SPARQL_QUERY_OPTIONS
+ + (f"LIMIT {limit}" if limit is not None else "")
)
self.raw_data = self.fetch_json()
@@ -183,7 +49,7 @@ def _sparql_source_vars_select(self):
def to_var(source_dict):
return " ?" + source_dict["json_key"]
- return " ".join(map(to_var, WD_OTHER_SOURCE.values()))
+ return " ".join(map(to_var, WD_OTHER_SOURCES.values()))
def _sparql_source_vars_triples(self):
def to_triple(source_dict):
@@ -193,7 +59,7 @@ def to_triple(source_dict):
source_var_triple += " . }"
return source_var_triple
- return "\n".join(map(to_triple, WD_OTHER_SOURCE.values()))
+ return "\n".join(map(to_triple, WD_OTHER_SOURCES.values()))
def fetch_json(self):
response = requests.get(
@@ -204,46 +70,28 @@ def fetch_json(self):
def get_items(self):
for json_item in self.raw_data:
- yield BaseWikidataRawItem.get_raw_item(self.source, json_item).to_item()
+ raw_item = BaseWdRawItem.raw_item(self.source, json_item)
+ yield raw_item.to_item()
if self.source != Item.Source.WIKIDATA:
- wd_json_item = WdRawItem(json_item)
- if not Item.objects.filter(
- source=Item.Source.WIKIDATA, identifier=wd_json_item.identifier()
- ).exists():
- yield wd_json_item.to_item()
+ raw_item.switch_source_to(Item.Source.WIKIDATA).yield_item_if_not_exists()
+ if raw_item.has_source(Item.Source.WIKIPEDIA_EN):
+ raw_item.switch_source_to(Item.Source.WIKIPEDIA_EN).yield_item_if_not_exists()
def save_items(self):
for item in self.get_items():
try:
item.save()
except IntegrityError:
- logging.log(logging.INFO, f" Link from {item.identifier} repeated.")
-
- def save_links(self):
- def save_link(current_item, source, source_id):
- try:
- destinationItem = Item.objects.get(source=source, identifier=source_id)
- Link.save_new(current_item, destinationItem, Link.Label.WIKIDATA)
- except Item.DoesNotExist:
logging.log(
- logging.WARNING,
- f" Item {source_id} {source} does not exist in the database.",
+ logging.INFO,
+ f"Item {item.source} {item.identifier} is already in the database.",
)
+ def save_links(self):
for json_item in self.raw_data:
- identifier = BaseWikidataRawItem.get_raw_item(
+ BaseWdRawItem.raw_item(
self.source, json_item
- ).identifier()
- current_item = Item.objects.get(source=self.source, identifier=identifier)
- if self.source == Item.Source.WIKIDATA:
- for source in [Item.Source.NLAB, Item.Source.MATHWORLD]:
- source_key = WD_OTHER_SOURCE[source]["json_key"]
- if source_key in json_item:
- source_id = json_item[source_key]["value"]
- save_link(current_item, source, source_id)
- else: # link back to WD items
- wd_id = WdRawItem(json_item).identifier()
- save_link(current_item, Item.Source.WIKIDATA, wd_id)
+ ).save_links()
SLURPERS = [
@@ -278,5 +126,5 @@ def save_link(current_item, source, source_id):
WikidataSlurper(
source, f"?item {source_property['wd_property']} ?{source_property['json_key']}"
)
- for source, source_property in WD_OTHER_SOURCE.items()
+ for source, source_property in WD_OTHER_SOURCES.items()
]
diff --git a/web/slurper/wd_raw_item.py b/web/slurper/wd_raw_item.py
new file mode 100644
index 0000000..6ac2408
--- /dev/null
+++ b/web/slurper/wd_raw_item.py
@@ -0,0 +1,191 @@
+from typing import Optional
+from concepts.models import Item, Link
+
+WD_OTHER_SOURCES = {
+ Item.Source.NLAB: {
+ "wd_property": "wdt:P4215",
+ "json_key": "nlabID",
+ },
+ Item.Source.MATHWORLD: {
+ "wd_property": "wdt:P2812",
+ "json_key": "mwID",
+ },
+ Item.Source.PROOF_WIKI: {
+ "wd_property": "wdt:P6781",
+ "json_key": "pwID",
+ },
+ Item.Source.ENCYCLOPEDIA_OF_MATHEMATICS: {
+ "wd_property": "wdt:P7554",
+ "json_key": "eomID",
+ }
+}
+# Wikipedia is dealt with elsewhere
+
+class BaseWdRawItem:
+
+ def __init__(self, source, json_item):
+ self.source = source
+ self.raw = json_item
+ self.wd_id = self.raw["item"]["value"]
+ self.item = self.get_item()
+
+ def identifier(self):
+ pass
+
+ def url(self):
+ pass
+
+ def name(self):
+ pass
+
+ def description(self):
+ return None
+
+ def has_source(self, source):
+ if source == Item.Source.WIKIPEDIA_EN:
+ return "wp_en" in self.raw
+ else:
+ return WD_OTHER_SOURCES[source]["json_key"] in self.raw
+
+ def switch_source_to(self, source):
+ return BaseWdRawItem.raw_item(source, self.raw)
+
+ def to_item(self) -> Optional[Item]:
+ return Item(
+ source=self.source,
+ identifier=self.identifier(),
+ url=self.url(),
+ name=self.name(),
+ description=self.description(),
+ )
+
+ def _get_item_queryset(self):
+ return Item.objects.filter(source=self.source, identifier=self.identifier())
+
+ def item_exists(self):
+ return self._get_item_queryset().exists
+
+ def get_item(self) -> Optional[Item]:
+ return self._get_item_queryset().first()
+
+ def yield_item_if_not_exists(self):
+ if not self.item_exists():
+ yield self.to_item()
+
+ def save_link_to(self, source):
+ target = self.switch_source_to(source)
+ if target is not None:
+ destinationItem = target.get_item()
+ if self.item is not None and destinationItem is not None:
+ Link.save_new(self.item, destinationItem, Link.Label.WIKIDATA)
+
+ def save_links(self):
+ # always save a link to the Wikipedia item
+ if self.has_source(Item.Source.WIKIPEDIA_EN):
+ self.save_link_to(Item.Source.WIKIPEDIA_EN)
+
+ @staticmethod
+ def raw_item(source, json_item):
+ match source:
+ case Item.Source.WIKIDATA:
+ return WdRawItem(json_item)
+ case Item.Source.NLAB:
+ return nLabRawItem(json_item)
+ case Item.Source.MATHWORLD:
+ return MWRawItem(json_item)
+ case Item.Source.PROOF_WIKI:
+ return PWRawItem(json_item)
+ case Item.Source.ENCYCLOPEDIA_OF_MATHEMATICS:
+ return EoMRawItem(json_item)
+ case Item.Source.WIKIPEDIA_EN:
+ return WpENRawItem(json_item)
+
+
+class WdRawItem(BaseWdRawItem):
+ def __init__(self, json_item):
+ super().__init__(Item.Source.WIKIDATA, json_item)
+
+ def identifier(self):
+ return self.wd_id.split("/")[-1]
+
+ def url(self):
+ return self.wd_id
+
+ def name(self):
+ if "itemLabel" in self.raw:
+ return self.wd_id
+ else:
+ return None
+
+ def description(self):
+ if "itemDescription" in self.raw:
+ return self.raw["itemDescription"]["value"]
+ else:
+ return None
+
+ def save_links(self):
+ super().save_links()
+ for source in WD_OTHER_SOURCES:
+ if self.has_source(source):
+ self.save_link_to(source)
+
+class WpENRawItem(BaseWdRawItem):
+ def __init__(self, json_item):
+ super().__init__(Item.Source.WIKIPEDIA_EN, json_item)
+
+ def identifier(self):
+ return self.url().split("/")[-1]
+
+ def url(self):
+ return self.raw["wp_en"]["value"]
+
+ def name(self):
+ return self.identifier()
+
+
+class OtherWdRawItem(BaseWdRawItem):
+ def __init__(self, source, json_item):
+ super().__init__(source, json_item)
+
+ def identifier(self):
+ json_key = WD_OTHER_SOURCES[self.source]["json_key"]
+ return self.raw[json_key]["value"]
+
+ def name(self):
+ return self.identifier()
+
+ def save_links(self):
+ super().save_links()
+ # link back to WD items
+ self.save_link_to(WdRawItem(self.raw))
+
+class nLabRawItem(OtherWdRawItem):
+ def __init__(self, json_item):
+ super().__init__(Item.Source.NLAB, json_item)
+
+ def url(self):
+ return "https://ncatlab.org/nlab/show/" + self.identifier()
+
+
+class MWRawItem(OtherWdRawItem):
+ def __init__(self, json_item):
+ super().__init__(Item.Source.MATHWORLD, json_item)
+
+ def url(self):
+ return "https://mathworld.wolfram.com/" + self.identifier() + ".html"
+
+
+class PWRawItem(OtherWdRawItem):
+ def __init__(self, json_item):
+ super().__init__(Item.Source.PROOF_WIKI, json_item)
+
+ def url(self):
+ return "https://proofwiki.org/wiki/" + self.identifier()
+
+
+class EoMRawItem(OtherWdRawItem):
+ def __init__(self, json_item):
+ super().__init__(Item.Source.ENCYCLOPEDIA_OF_MATHEMATICS, json_item)
+
+ def url(self):
+ return "https://encyclopediaofmath.org/wiki/" + self.identifier()
\ No newline at end of file