Skip to content

Commit

Permalink
Implement naive concepts, basic autocomplete
Browse files Browse the repository at this point in the history
  • Loading branch information
katjabercic committed Nov 21, 2023
1 parent 0fc46c5 commit 85490df
Show file tree
Hide file tree
Showing 15 changed files with 401 additions and 85 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Infrastructure for relaying and exchanging mathematical concepts.

For a demonstration of a page with at least one link, see for example `{baseurl}/concept/Q1369621/`.
For a demonstration of a page with at least one link, see for example `{baseurl}/concept/Schwartz%20space/`.

## Notes on installation and usage

Expand Down
Empty file.
103 changes: 103 additions & 0 deletions web/concepts/management/commands/compute_concepts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import logging
from typing import Dict, List

from concepts.models import Concept, Item, Link
from django.core.management.base import BaseCommand
from django.db.models import Q
from django.db.utils import IntegrityError


class UnionFind:
def __init__(self, size):
self.parent = [i for i in range(size)]
self.rank = [0] * size

def find(self, x):
if self.parent[x] != x:
self.parent[x] = self.find(self.parent[x])
return self.parent[x]

def union(self, x, y):
root_x = self.find(x)
root_y = self.find(y)

if root_x != root_y:
if self.rank[root_x] < self.rank[root_y]:
self.parent[root_x] = root_y
elif self.rank[root_x] > self.rank[root_y]:
self.parent[root_y] = root_x
else:
self.parent[root_y] = root_x
self.rank[root_x] += 1

def get_components(self):
components = {}
for i in range(len(self.parent)):
root = self.find(i)
if root not in components:
components[root] = [i]
else:
components[root].append(i)
return components


class Command(BaseCommand):
num_to_id: List[str] = []
id_to_num: Dict[str, int] = {}
id_to_item: Dict[str, Item] = {}

def union(self, uf, link: Link):
uf.union(self.id_to_num[link.source.id], self.id_to_num[link.destination.id])

def handle(self, *args, **options):
# all items that do not appear in an edge are components
singletons = Item.objects.filter(
incoming_items__isnull=True, outgoing_items__isnull=True
)
count_duplicates = 0
for i in singletons:
new_concept = Concept(name=i.name, description=i.description)
try:
new_concept.save()
except IntegrityError:
count_duplicates += 1
logging.log(
logging.WARNING,
f" A concept named '{new_concept.name}' already exists.",
)
i.concept = new_concept

print("compute non-singletons")
# now deal with those that do not have a concept yet
print(
Item.objects.filter(
Q(incoming_items__isnull=False) | Q(outgoing_items__isnull=False)
).query
)
for i in Item.objects.filter(
Q(incoming_items__isnull=False) | Q(outgoing_items__isnull=False)
):
self.num_to_id.append(i.id)
self.id_to_item[i.id] = i
for i, id in enumerate(self.num_to_id):
self.id_to_num[id] = i
uf = UnionFind(len(self.num_to_id))
for link in Link.objects.all():
self.union(uf, link)
for v in uf.get_components().values():
# first check if WD is one of the items
items = list(map(lambda i: self.id_to_item[self.num_to_id[i]], v))
items.sort(key=Item.source_key())
new_concept = Concept(name=items[0].name, description=items[0].description)
try:
new_concept.save()
except IntegrityError:
logging.log(
logging.WARNING,
f" A concept named '{new_concept.name}' already exists.",
)
new_concept = Concept.objects.get(name=items[0].name)
print(f"linking {new_concept.id}")
for item in items:
item.concept = new_concept
item.save()
18 changes: 18 additions & 0 deletions web/concepts/management/commands/link_same.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from concepts.models import Item, Link
from django.core.management.base import BaseCommand
from django.db.models import Count


class Command(BaseCommand):
def handle(self, *args, **options):
print("link same")
for name_count in (
Item.objects.all()
.values("name")
.annotate(total=Count("name"))
.filter(total__gte=2)
):
items = Item.objects.filter(name=name_count["name"])
for i in range(len(items) - 1):
for j in range(i + 1, len(items)):
Link.save_new(items[i], items[j], Link.Label.NAME_EQ)
62 changes: 62 additions & 0 deletions web/concepts/migrations/0006_remove_item_links_link.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Generated by Django 4.2.6 on 2023-11-17 18:33

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = [
("concepts", "0005_alter_item_options_alter_item_source"),
]

operations = [
migrations.RemoveField(
model_name="item",
name="links",
),
migrations.CreateModel(
name="Link",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"label",
models.CharField(
choices=[
("Wd", "Wikidata"),
("AUm", "Agda Unimath"),
("eq", "same name"),
],
max_length=4,
),
),
(
"destination",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="incoming_items",
to="concepts.item",
),
),
(
"source",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="outgoing_items",
to="concepts.item",
),
),
],
options={
"ordering": ["source", "destination", "label"],
"unique_together": {("source", "destination", "label")},
},
),
]
42 changes: 42 additions & 0 deletions web/concepts/migrations/0007_concept_item_concept.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Generated by Django 4.2.6 on 2023-11-18 11:48

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = [
("concepts", "0006_remove_item_links_link"),
]

operations = [
migrations.CreateModel(
name="Concept",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=200, null=True, unique=True)),
("description", models.TextField(null=True)),
],
options={
"ordering": ["name", "description"],
},
),
migrations.AddField(
model_name="item",
name="concept",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="concepts.concept",
),
),
]
81 changes: 76 additions & 5 deletions web/concepts/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
import logging

from django.db import models
from django.db.utils import IntegrityError


class Concept(models.Model):
name = models.CharField(max_length=200, null=True, unique=True)
description = models.TextField(null=True)

class Meta:
ordering = ["name", "description"]


class Item(models.Model):
Expand All @@ -14,20 +25,80 @@ class Source(models.TextChoices):
url = models.URLField(max_length=200)
name = models.CharField(max_length=200, null=True)
description = models.TextField(null=True)
links = models.ManyToManyField("self", blank=True)
concept = models.ForeignKey(
Concept,
models.SET_NULL,
blank=True,
null=True,
)

class Meta:
ordering = ["name", "source", "identifier"]
unique_together = ["source", "identifier"]

def get_link(self):
return {"name": self.name, "source": self.source, "url": self.url}
@staticmethod
def source_key():
SOURCES = [
Item.Source.WIKIDATA,
Item.Source.NLAB,
Item.Source.MATHWORLD,
Item.Source.WIKIPEDIA_EN,
Item.Source.AGDA_UNIMATH,
]
return lambda item: SOURCES.index(item.source)

def to_dict(self):
return {"name": self.name, "source": self.get_source_display(), "url": self.url}

def get_links(self):
return [linked_item.get_link() for linked_item in self.links.all()]
def get_linked_items(self):
linked_destinations = Link.objects.filter(source=self.id).map(
lambda link: link.destination
)
linked_sources = Link.objects.filter(destination=self.id).map(
lambda link: link.source
)
return set(linked_sources + linked_destinations)

def get_linked_item_urls(self):
return [i.get_url() for i in self.get_linked_items()]

def __str__(self):
if self.name:
return f"{self.get_source_display()}: {self.identifier} ({self.name})"
else:
return f"{self.get_source_display()}: {self.identifier}"


class Link(models.Model):
class Label(models.TextChoices):
WIKIDATA = "Wd", "Wikidata"
AGDA_UNIMATH = "AUm", "Agda Unimath"
NAME_EQ = "eq", "same name"

source = models.ForeignKey(
Item, on_delete=models.CASCADE, related_name="outgoing_items"
)
destination = models.ForeignKey(
Item, on_delete=models.CASCADE, related_name="incoming_items"
)
label = models.CharField(max_length=4, choices=Label.choices)

class Meta:
ordering = ["source", "destination", "label"]
unique_together = ["source", "destination", "label"]

@staticmethod
def save_new(source: Item, destination: Item, label: Label):
try:
new_link = Link.objects.create(
source=source, destination=destination, label=label
)
new_link.save()
except IntegrityError:
logging.log(
logging.WARNING,
f" Link from {source} to {destination} repeated in {label}.",
)

def __str__(self):
return f"{self.source} -[{self.get_label_display()}]-> {self.destination}"
30 changes: 30 additions & 0 deletions web/concepts/templates/base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<!doctype html>
{% load static %}
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@1/css/pico.min.css">
<link rel="stylesheet" href="{% static 'concepts/mathswitch.css' %}">
<title>mathswitch</title>
<meta
name="description"
content="Infrastructure for relaying and exchanging mathematical concepts."
/>
</head>
<body>
<header class="container">
<hgroup>
<h1>mathswitch</h1>
<p>Infrastructure for relaying and exchanging mathematical concepts.</p>
</hgroup>
</header>
<main class="container">
{% block content %}
{% endblock %}
</main>
<footer class="container">
<img class="powered-by-wikidata" alt="Powered by Wikidata" />
</footer>
</body>
</html>
Loading

0 comments on commit 85490df

Please sign in to comment.