Skip to content

Commit

Permalink
Merge branch 'main' into deploy
Browse files Browse the repository at this point in the history
  • Loading branch information
katjabercic committed Nov 23, 2023
2 parents 5c9a033 + d57d1ff commit d622b05
Show file tree
Hide file tree
Showing 14 changed files with 321 additions and 187 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ To update the database with the new model, run:

python manage.py migrate

## Instructions for Katja to update the live version

sudo systemctl stop mathswitch
cd mathswitch
git pull
source venv/bin/activate
cd web
./manage.py rebuild_db
sudo systemctl start mathswitch

## WD query examples

```
Expand Down
95 changes: 6 additions & 89 deletions web/concepts/management/commands/compute_concepts.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,20 @@
import logging
from typing import Dict, List

from concepts.models import Concept, Item, Link
from concepts.models import Item
from django.core.management.base import BaseCommand
from django.db.models import Q
from django.db.utils import IntegrityError


class UnionFind:
def __init__(self, size):
self.parent = [i for i in range(size)]
self.rank = [0] * size

def find(self, x):
if self.parent[x] != x:
self.parent[x] = self.find(self.parent[x])
return self.parent[x]

def union(self, x, y):
root_x = self.find(x)
root_y = self.find(y)

if root_x != root_y:
if self.rank[root_x] < self.rank[root_y]:
self.parent[root_x] = root_y
elif self.rank[root_x] > self.rank[root_y]:
self.parent[root_y] = root_x
else:
self.parent[root_y] = root_x
self.rank[root_x] += 1

def get_components(self):
components = {}
for i in range(len(self.parent)):
root = self.find(i)
if root not in components:
components[root] = [i]
else:
components[root].append(i)
return components


class Command(BaseCommand):
num_to_id: List[str] = []
id_to_num: Dict[str, int] = {}
id_to_item: Dict[str, Item] = {}

def union(self, uf, link: Link):
uf.union(self.id_to_num[link.source.id], self.id_to_num[link.destination.id])

def handle(self, *args, **options):
print("compute singletons")
# all items that do not appear in an edge are components
singletons = Item.objects.filter(
incoming_items__isnull=True, outgoing_items__isnull=True
)
count_duplicates = 0
for i in singletons:
new_concept = Concept(name=i.name, description=i.description)
try:
new_concept.save()
except IntegrityError:
count_duplicates += 1
logging.log(
logging.WARNING,
f" A concept named '{new_concept.name}' already exists.",
)
i.concept = new_concept
singletons.create_singleton_concepts()

print("compute non-singletons")
# now deal with those that do not have a concept yet
print(
Item.objects.filter(
Q(incoming_items__isnull=False) | Q(outgoing_items__isnull=False)
).query
)
for i in Item.objects.filter(
nonsingletons = Item.objects.filter(
Q(incoming_items__isnull=False) | Q(outgoing_items__isnull=False)
):
self.num_to_id.append(i.id)
self.id_to_item[i.id] = i
for i, id in enumerate(self.num_to_id):
self.id_to_num[id] = i
uf = UnionFind(len(self.num_to_id))
for link in Link.objects.all():
self.union(uf, link)
for v in uf.get_components().values():
# first check if WD is one of the items
items = list(map(lambda i: self.id_to_item[self.num_to_id[i]], v))
items.sort(key=Item.source_key())
new_concept = Concept(name=items[0].name, description=items[0].description)
try:
new_concept.save()
except IntegrityError:
logging.log(
logging.WARNING,
f" A concept named '{new_concept.name}' already exists.",
)
new_concept = Concept.objects.get(name=items[0].name)
print(f"linking {new_concept.id}")
for item in items:
item.concept = new_concept
item.save()
)
nonsingletons.create_concepts()
69 changes: 58 additions & 11 deletions web/concepts/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging

from concepts.utils import UnionFind
from django.db import models
from django.db.utils import IntegrityError

Expand All @@ -12,6 +13,50 @@ class Meta:
ordering = ["name", "description"]


class LinkQuerySet(models.QuerySet):
def to_tuples(self):
for link in self:
yield (link.source, link.destination)


class ItemQuerySet(models.QuerySet):
def create_singleton_concepts(self):
count_duplicates = 0
for item in self:
new_concept = Concept(name=item.name, description=item.description)
try:
new_concept.save()
except IntegrityError:
count_duplicates += 1
logging.log(
logging.WARNING,
f" A concept named '{new_concept.name}' already exists.",
)
item.concept = new_concept

def create_concepts(self):
uf = UnionFind(self.all(), Link.objects.all().to_tuples())
for concept_items in uf.get_item_components(sort_key=Item.Source.key()):
# first check if WD is one of the items
new_concept = Concept(
name=concept_items[0].name, description=concept_items[0].description
)
if new_concept.name == "Alexander polynomial":
print(concept_items)
try:
new_concept.save()
except IntegrityError:
logging.log(
logging.WARNING,
f" A concept named '{new_concept.name}' already exists.",
)
new_concept = Concept.objects.get(name=concept_items[0].name)
# print(f"linking {new_concept.id}")
for item in concept_items:
item.concept = new_concept
item.save()


class Item(models.Model):
class Source(models.TextChoices):
WIKIDATA = "Wd", "Wikidata"
Expand All @@ -20,6 +65,17 @@ class Source(models.TextChoices):
WIKIPEDIA_EN = "WpEN", "Wikipedia (English)"
AGDA_UNIMATH = "AUm", "Agda Unimath"

@staticmethod
def key():
SOURCES = [
Item.Source.WIKIDATA,
Item.Source.NLAB,
Item.Source.MATHWORLD,
Item.Source.WIKIPEDIA_EN,
Item.Source.AGDA_UNIMATH,
]
return lambda item: SOURCES.index(item.source)

source = models.CharField(max_length=4, choices=Source.choices)
identifier = models.CharField(max_length=200)
url = models.URLField(max_length=200)
Expand All @@ -31,22 +87,12 @@ class Source(models.TextChoices):
blank=True,
null=True,
)
objects = ItemQuerySet.as_manager()

class Meta:
ordering = ["name", "source", "identifier"]
unique_together = ["source", "identifier"]

@staticmethod
def source_key():
SOURCES = [
Item.Source.WIKIDATA,
Item.Source.NLAB,
Item.Source.MATHWORLD,
Item.Source.WIKIPEDIA_EN,
Item.Source.AGDA_UNIMATH,
]
return lambda item: SOURCES.index(item.source)

def to_dict(self):
return {"name": self.name, "source": self.get_source_display(), "url": self.url}

Expand Down Expand Up @@ -82,6 +128,7 @@ class Label(models.TextChoices):
Item, on_delete=models.CASCADE, related_name="incoming_items"
)
label = models.CharField(max_length=4, choices=Label.choices)
objects = LinkQuerySet.as_manager()

class Meta:
ordering = ["source", "destination", "label"]
Expand Down
1 change: 1 addition & 0 deletions web/concepts/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from . import views

urlpatterns = [
path("<slug:source>/<slug:identifier>", views.redirect_item_to_concept),
path("<str:name>/", views.concept),
]
59 changes: 59 additions & 0 deletions web/concepts/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import Any, Dict, List, Tuple


class UnionFind:
item_to_element: Dict[Any, int] = {}
components: Dict[int, List[int]] = {}
element_links: List[Tuple[int, int]] = []

def __init__(self, items, links):
"""Initialize union-find with a list of items
and a list of links given as pairs of items."""
self.size = len(items)
self.item_list = list(items)
self.parent = [i for i in range(self.size)]
self.rank = [0] * self.size
for i, item in enumerate(self.item_list):
self.item_to_element[item] = i
for link in links:
elt_link = map(self.item_to_element.get, link)
self.element_links.append(elt_link)
self._compute_components()

# find the root element for given element
def _find_representative(self, x):
if self.parent[x] != x:
self.parent[x] = self._find_representative(self.parent[x])
return self.parent[x]

def _union(self, root_x, root_y):
if root_x != root_y:
if self.rank[root_x] < self.rank[root_y]:
self.parent[root_x] = root_y
elif self.rank[root_x] > self.rank[root_y]:
self.parent[root_y] = root_x
else:
self.parent[root_y] = root_x
self.rank[root_x] += 1

def _connect_elements(self):
for link in self.element_links:
roots = map(self._find_representative, link)
self._union(*roots)

def _compute_components(self):
self._connect_elements()
for i in range(self.size):
root = self._find_representative(i)
if root not in self.components:
self.components[root] = [i]
else:
self.components[root].append(i)

def get_item_components(self, sort_key):
def elements_to_sorted_items(elements: List[int]):
items = list(map(lambda e: self.item_list[e], elements))
items.sort(key=sort_key)
return items

return list(map(elements_to_sorted_items, self.components.values()))
6 changes: 5 additions & 1 deletion web/concepts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def concept(request, name):
}
return render(request, "detail.html", context)


def home(request):
autocomplete_concepts = [c.name for c in Concept.objects.all()]
context = {"concepts": autocomplete_concepts}
Expand All @@ -25,3 +24,8 @@ def home(request):
def search(request):
search_value = request.GET.get("q")
return redirect("/concept/" + search_value)

def redirect_item_to_concept(request, source, identifier):
# should this be a permanent redirect?
item = get_object_or_404(Item, source=source, identifier=identifier)
return redirect("/concept/" + item.concept.name)
8 changes: 8 additions & 0 deletions web/slurper/management/commands/clear_agda_unimath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from concepts.models import Item
from django.core.management.base import BaseCommand


class Command(BaseCommand):
def handle(self, *args, **options):
print("clearing agda-unimath data")
Item.objects.filter(source=Item.Source.AGDA_UNIMATH).delete()
1 change: 1 addition & 0 deletions web/slurper/management/commands/clear_wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

class Command(BaseCommand):
def handle(self, *args, **options):
print("clearing wikidata data")
Item.objects.filter(source=Item.Source.WIKIDATA).delete()
Item.objects.filter(source=Item.Source.NLAB).delete()
Item.objects.filter(source=Item.Source.MATHWORLD).delete()
8 changes: 8 additions & 0 deletions web/slurper/management/commands/import_agda_unimath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from django.core.management.base import BaseCommand
from slurper import source_agda_unimath


class Command(BaseCommand):
def handle(self, *args, **options):
print("importing agda-unimath data")
source_agda_unimath.AU_SLURPER.save_items()
7 changes: 5 additions & 2 deletions web/slurper/management/commands/import_wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

class Command(BaseCommand):
def handle(self, *args, **options):
source_wikidata.WD_SLURPER.save_items()
print("importing wikidata data")
source_wikidata.WD_SLURPER_1.save_items()
source_wikidata.WD_SLURPER_2.save_items()
source_wikidata.WD_NLAB_SLURPER.save_items()
source_wikidata.WD_MATHWORLD_SLURPER.save_items()
source_wikidata.WD_SLURPER.save_links()
source_wikidata.WD_SLURPER_1.save_links()
source_wikidata.WD_SLURPER_2.save_links()
source_wikidata.WD_NLAB_SLURPER.save_links()
source_wikidata.WD_MATHWORLD_SLURPER.save_links()
Loading

0 comments on commit d622b05

Please sign in to comment.