From 85490df51b148be8c9867316ea0a0c5a5d02f529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Katja=20Ber=C4=8Di=C4=8D?= Date: Tue, 21 Nov 2023 14:31:42 +0100 Subject: [PATCH] Implement naive concepts, basic autocomplete --- README.md | 2 +- web/concepts/management/commands/__init__.py | 0 .../management/commands/compute_concepts.py | 103 ++++++++++++++++++ web/concepts/management/commands/link_same.py | 18 +++ .../migrations/0006_remove_item_links_link.py | 62 +++++++++++ .../migrations/0007_concept_item_concept.py | 42 +++++++ web/concepts/models.py | 81 +++++++++++++- web/concepts/templates/base.html | 30 +++++ web/concepts/templates/detail.html | 30 ++--- web/concepts/templates/index.html | 52 +++------ web/concepts/urls.py | 2 +- web/concepts/views.py | 29 +++-- web/slurper/source_wikidata.py | 23 ++-- web/web/settings.py | 7 +- web/web/urls.py | 5 +- 15 files changed, 401 insertions(+), 85 deletions(-) create mode 100644 web/concepts/management/commands/__init__.py create mode 100644 web/concepts/management/commands/compute_concepts.py create mode 100644 web/concepts/management/commands/link_same.py create mode 100644 web/concepts/migrations/0006_remove_item_links_link.py create mode 100644 web/concepts/migrations/0007_concept_item_concept.py create mode 100644 web/concepts/templates/base.html diff --git a/README.md b/README.md index b15e458..10b6b7e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Infrastructure for relaying and exchanging mathematical concepts. -For a demonstration of a page with at least one link, see for example `{baseurl}/concept/Q1369621/`. +For a demonstration of a page with at least one link, see for example `{baseurl}/concept/Schwartz%20space/`. ## Notes on installation and usage diff --git a/web/concepts/management/commands/__init__.py b/web/concepts/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/concepts/management/commands/compute_concepts.py b/web/concepts/management/commands/compute_concepts.py new file mode 100644 index 0000000..eb2d362 --- /dev/null +++ b/web/concepts/management/commands/compute_concepts.py @@ -0,0 +1,103 @@ +import logging +from typing import Dict, List + +from concepts.models import Concept, Item, Link +from django.core.management.base import BaseCommand +from django.db.models import Q +from django.db.utils import IntegrityError + + +class UnionFind: + def __init__(self, size): + self.parent = [i for i in range(size)] + self.rank = [0] * size + + def find(self, x): + if self.parent[x] != x: + self.parent[x] = self.find(self.parent[x]) + return self.parent[x] + + def union(self, x, y): + root_x = self.find(x) + root_y = self.find(y) + + if root_x != root_y: + if self.rank[root_x] < self.rank[root_y]: + self.parent[root_x] = root_y + elif self.rank[root_x] > self.rank[root_y]: + self.parent[root_y] = root_x + else: + self.parent[root_y] = root_x + self.rank[root_x] += 1 + + def get_components(self): + components = {} + for i in range(len(self.parent)): + root = self.find(i) + if root not in components: + components[root] = [i] + else: + components[root].append(i) + return components + + +class Command(BaseCommand): + num_to_id: List[str] = [] + id_to_num: Dict[str, int] = {} + id_to_item: Dict[str, Item] = {} + + def union(self, uf, link: Link): + uf.union(self.id_to_num[link.source.id], self.id_to_num[link.destination.id]) + + def handle(self, *args, **options): + # all items that do not appear in an edge are components + singletons = Item.objects.filter( + incoming_items__isnull=True, outgoing_items__isnull=True + ) + count_duplicates = 0 + for i in singletons: + new_concept = Concept(name=i.name, description=i.description) + try: + new_concept.save() + except IntegrityError: + count_duplicates += 1 + logging.log( + logging.WARNING, + f" A concept named '{new_concept.name}' already exists.", + ) + i.concept = new_concept + + print("compute non-singletons") + # now deal with those that do not have a concept yet + print( + Item.objects.filter( + Q(incoming_items__isnull=False) | Q(outgoing_items__isnull=False) + ).query + ) + for i in Item.objects.filter( + Q(incoming_items__isnull=False) | Q(outgoing_items__isnull=False) + ): + self.num_to_id.append(i.id) + self.id_to_item[i.id] = i + for i, id in enumerate(self.num_to_id): + self.id_to_num[id] = i + uf = UnionFind(len(self.num_to_id)) + for link in Link.objects.all(): + self.union(uf, link) + for v in uf.get_components().values(): + # first check if WD is one of the items + items = list(map(lambda i: self.id_to_item[self.num_to_id[i]], v)) + items.sort(key=Item.source_key()) + new_concept = Concept(name=items[0].name, description=items[0].description) + try: + new_concept.save() + except IntegrityError: + logging.log( + logging.WARNING, + f" A concept named '{new_concept.name}' already exists.", + ) + new_concept = Concept.objects.get(name=items[0].name) + print(f"linking {new_concept.id}") + for item in items: + item.concept = new_concept + item.save() diff --git a/web/concepts/management/commands/link_same.py b/web/concepts/management/commands/link_same.py new file mode 100644 index 0000000..895664c --- /dev/null +++ b/web/concepts/management/commands/link_same.py @@ -0,0 +1,18 @@ +from concepts.models import Item, Link +from django.core.management.base import BaseCommand +from django.db.models import Count + + +class Command(BaseCommand): + def handle(self, *args, **options): + print("link same") + for name_count in ( + Item.objects.all() + .values("name") + .annotate(total=Count("name")) + .filter(total__gte=2) + ): + items = Item.objects.filter(name=name_count["name"]) + for i in range(len(items) - 1): + for j in range(i + 1, len(items)): + Link.save_new(items[i], items[j], Link.Label.NAME_EQ) diff --git a/web/concepts/migrations/0006_remove_item_links_link.py b/web/concepts/migrations/0006_remove_item_links_link.py new file mode 100644 index 0000000..40ef5c5 --- /dev/null +++ b/web/concepts/migrations/0006_remove_item_links_link.py @@ -0,0 +1,62 @@ +# Generated by Django 4.2.6 on 2023-11-17 18:33 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ("concepts", "0005_alter_item_options_alter_item_source"), + ] + + operations = [ + migrations.RemoveField( + model_name="item", + name="links", + ), + migrations.CreateModel( + name="Link", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "label", + models.CharField( + choices=[ + ("Wd", "Wikidata"), + ("AUm", "Agda Unimath"), + ("eq", "same name"), + ], + max_length=4, + ), + ), + ( + "destination", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="incoming_items", + to="concepts.item", + ), + ), + ( + "source", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="outgoing_items", + to="concepts.item", + ), + ), + ], + options={ + "ordering": ["source", "destination", "label"], + "unique_together": {("source", "destination", "label")}, + }, + ), + ] diff --git a/web/concepts/migrations/0007_concept_item_concept.py b/web/concepts/migrations/0007_concept_item_concept.py new file mode 100644 index 0000000..78ab044 --- /dev/null +++ b/web/concepts/migrations/0007_concept_item_concept.py @@ -0,0 +1,42 @@ +# Generated by Django 4.2.6 on 2023-11-18 11:48 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ("concepts", "0006_remove_item_links_link"), + ] + + operations = [ + migrations.CreateModel( + name="Concept", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=200, null=True, unique=True)), + ("description", models.TextField(null=True)), + ], + options={ + "ordering": ["name", "description"], + }, + ), + migrations.AddField( + model_name="item", + name="concept", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="concepts.concept", + ), + ), + ] diff --git a/web/concepts/models.py b/web/concepts/models.py index 45cd004..e798ae2 100644 --- a/web/concepts/models.py +++ b/web/concepts/models.py @@ -1,4 +1,15 @@ +import logging + from django.db import models +from django.db.utils import IntegrityError + + +class Concept(models.Model): + name = models.CharField(max_length=200, null=True, unique=True) + description = models.TextField(null=True) + + class Meta: + ordering = ["name", "description"] class Item(models.Model): @@ -14,20 +25,80 @@ class Source(models.TextChoices): url = models.URLField(max_length=200) name = models.CharField(max_length=200, null=True) description = models.TextField(null=True) - links = models.ManyToManyField("self", blank=True) + concept = models.ForeignKey( + Concept, + models.SET_NULL, + blank=True, + null=True, + ) class Meta: ordering = ["name", "source", "identifier"] unique_together = ["source", "identifier"] - def get_link(self): - return {"name": self.name, "source": self.source, "url": self.url} + @staticmethod + def source_key(): + SOURCES = [ + Item.Source.WIKIDATA, + Item.Source.NLAB, + Item.Source.MATHWORLD, + Item.Source.WIKIPEDIA_EN, + Item.Source.AGDA_UNIMATH, + ] + return lambda item: SOURCES.index(item.source) + + def to_dict(self): + return {"name": self.name, "source": self.get_source_display(), "url": self.url} - def get_links(self): - return [linked_item.get_link() for linked_item in self.links.all()] + def get_linked_items(self): + linked_destinations = Link.objects.filter(source=self.id).map( + lambda link: link.destination + ) + linked_sources = Link.objects.filter(destination=self.id).map( + lambda link: link.source + ) + return set(linked_sources + linked_destinations) + + def get_linked_item_urls(self): + return [i.get_url() for i in self.get_linked_items()] def __str__(self): if self.name: return f"{self.get_source_display()}: {self.identifier} ({self.name})" else: return f"{self.get_source_display()}: {self.identifier}" + + +class Link(models.Model): + class Label(models.TextChoices): + WIKIDATA = "Wd", "Wikidata" + AGDA_UNIMATH = "AUm", "Agda Unimath" + NAME_EQ = "eq", "same name" + + source = models.ForeignKey( + Item, on_delete=models.CASCADE, related_name="outgoing_items" + ) + destination = models.ForeignKey( + Item, on_delete=models.CASCADE, related_name="incoming_items" + ) + label = models.CharField(max_length=4, choices=Label.choices) + + class Meta: + ordering = ["source", "destination", "label"] + unique_together = ["source", "destination", "label"] + + @staticmethod + def save_new(source: Item, destination: Item, label: Label): + try: + new_link = Link.objects.create( + source=source, destination=destination, label=label + ) + new_link.save() + except IntegrityError: + logging.log( + logging.WARNING, + f" Link from {source} to {destination} repeated in {label}.", + ) + + def __str__(self): + return f"{self.source} -[{self.get_label_display()}]-> {self.destination}" diff --git a/web/concepts/templates/base.html b/web/concepts/templates/base.html new file mode 100644 index 0000000..2e0e89a --- /dev/null +++ b/web/concepts/templates/base.html @@ -0,0 +1,30 @@ + +{% load static %} + + + + + + + mathswitch + + + +
+
+

mathswitch

+

Infrastructure for relaying and exchanging mathematical concepts.

+
+
+
+ {% block content %} + {% endblock %} +
+ + + \ No newline at end of file diff --git a/web/concepts/templates/detail.html b/web/concepts/templates/detail.html index f55faa4..88c6fe8 100644 --- a/web/concepts/templates/detail.html +++ b/web/concepts/templates/detail.html @@ -1,19 +1,11 @@ - - - - - - - mathswitch - - -
- {{ item.identifier }}
- {{ item.name }}
- {{ item.description }}
- {% for link in item.links %} - {{ link.source }}: {{ link.name }}
- {% endfor %} -
- - \ No newline at end of file +{% extends 'base.html' %} + +{% block content %} +

{{ concept.name }}

+ {% if concept.description != None %} +

{{ concept.description }}

+ {% endif %} + {% for item in concept.items %} + {{ item.source }}: {{ item.name }}
+ {% endfor %} +{% endblock %} diff --git a/web/concepts/templates/index.html b/web/concepts/templates/index.html index 9dfaa45..69ead73 100644 --- a/web/concepts/templates/index.html +++ b/web/concepts/templates/index.html @@ -1,34 +1,18 @@ - -{% load static %} - - - - - - - mathswitch - - - -
-
-

mathswitch

-

Infrastructure for relaying and exchanging mathematical concepts.

-
-
-
-
- - -
-

Examples

-

By Wikidata identifier: Q1369621

-
- - - \ No newline at end of file +{% extends 'base.html' %} + +{% block content %} + +
+ + + + {% for c in concepts %} + + +
+

Examples

+

Schwartz space

+ +{% endblock %} diff --git a/web/concepts/urls.py b/web/concepts/urls.py index 45a71bf..61bdaa5 100644 --- a/web/concepts/urls.py +++ b/web/concepts/urls.py @@ -3,5 +3,5 @@ from . import views urlpatterns = [ - path("//", views.concept), + path("/", views.concept), ] diff --git a/web/concepts/views.py b/web/concepts/views.py index 4d610d1..a6a510d 100644 --- a/web/concepts/views.py +++ b/web/concepts/views.py @@ -1,20 +1,27 @@ -from concepts.models import Item -from django.shortcuts import get_object_or_404, render +from concepts.models import Concept, Item +from django.shortcuts import get_object_or_404, redirect, render -def concept(request, source, item_id): - item = get_object_or_404(Item, source=source, identifier=item_id) +def concept(request, name): + concept = get_object_or_404(Concept, name=name) context = { - "item": { - "identifier": item.identifier, - "name": item.name, - "description": item.description, - "url": item.url, - "links": item.get_links(), + "concept": { + "name": concept.name, + "description": concept.description, + "items": [ + item.to_dict() for item in Item.objects.filter(concept=concept.id) + ], } } return render(request, "detail.html", context) def home(request): - return render(request, "index.html") + autocomplete_concepts = [c.name for c in Concept.objects.all()] + context = {"concepts": autocomplete_concepts} + return render(request, "index.html", context) + + +def search(request): + search_value = request.GET.get("q") + return redirect("/concept/" + search_value) diff --git a/web/slurper/source_wikidata.py b/web/slurper/source_wikidata.py index 965681f..898417d 100644 --- a/web/slurper/source_wikidata.py +++ b/web/slurper/source_wikidata.py @@ -2,7 +2,7 @@ from typing import Optional import requests -from concepts.models import Item +from concepts.models import Item, Link from django.db.utils import IntegrityError @@ -43,7 +43,7 @@ def save_items(self): try: item.save() except IntegrityError: - logging.log(logging.WARNING, f" Item {item.identifier} repeated.") + logging.log(logging.WARNING, f" Link from {item.identifier} repeated.") def save_links(self): for json_item in self.raw_data: @@ -55,11 +55,11 @@ def save_links(self): if "nlabID" in json_item: nlab_id = json_item["nlabID"]["value"] try: - linkToItem = Item.objects.get( + destinationItem = Item.objects.get( source=Item.Source.NLAB, identifier=nlab_id ) - currentItem.links.add(linkToItem) - except IntegrityError: + Link.save_new(currentItem, destinationItem, Link.Label.WIKIDATA) + except Item.DoesNotExist: logging.log( logging.WARNING, f" NLab item {nlab_id} does not exist in the database.", @@ -67,11 +67,12 @@ def save_links(self): elif "mwID" in json_item: mw_id = json_item["mwID"]["value"] try: - linkToItem = Item.objects.get( + print("looking for ", mw_id) + destinationItem = Item.objects.get( source=Item.Source.MATHWORLD, identifier=mw_id ) - currentItem.links.add(linkToItem) - except IntegrityError: + Link.save_new(currentItem, destinationItem, Link.Label.WIKIDATA) + except Item.DoesNotExist: logging.log( logging.WARNING, f" MathWorld item {mw_id} does not exist in the database.", @@ -79,11 +80,11 @@ def save_links(self): else: # link back to WD items wd_id = json_item["item"]["value"].split("/")[-1] try: - linkToItem = Item.objects.get( + destinationItem = Item.objects.get( source=Item.Source.WIKIDATA, identifier=wd_id ) - currentItem.links.add(linkToItem) - except IntegrityError: + Link.save_new(currentItem, destinationItem, Link.Label.WIKIDATA) + except Item.DoesNotExist: logging.log( logging.WARNING, f" Wikidata item {wd_id} does not exist in the database.", diff --git a/web/web/settings.py b/web/web/settings.py index 94d6398..1a65e20 100644 --- a/web/web/settings.py +++ b/web/web/settings.py @@ -10,6 +10,7 @@ https://docs.djangoproject.com/en/4.2/ref/settings/ """ +from os import path from pathlib import Path # Build paths inside the project like this: BASE_DIR / 'subdir'. @@ -23,9 +24,9 @@ SECRET_KEY = "django-insecure-9wy9w#vf^tde0262doyy_j19=64c()_qub!1)f+fh-b^=7ndw*" # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = False +DEBUG = True -ALLOWED_HOSTS = ["0.0.0.0"] +ALLOWED_HOSTS = ["*"] # Application definition @@ -119,6 +120,8 @@ STATIC_URL = "static/" +STATIC_ROOT = "/var/www/mathswitch/static" + # Default primary key field type # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field diff --git a/web/web/urls.py b/web/web/urls.py index 4e15fc0..c0405dd 100644 --- a/web/web/urls.py +++ b/web/web/urls.py @@ -15,11 +15,14 @@ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ from concepts import views +from django.conf import settings +from django.conf.urls.static import static from django.contrib import admin from django.urls import include, path urlpatterns = [ path("", views.home), path("concept/", include("concepts.urls")), + path("search/", views.search), path("admin/", admin.site.urls), -] +] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)