Skip to content

Commit

Permalink
City-of-Helsinki#41 Alternative data model and import for ontologies
Browse files Browse the repository at this point in the history
  • Loading branch information
jukvalim committed Mar 24, 2017
1 parent 9680815 commit a2b77bd
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 42 deletions.
87 changes: 55 additions & 32 deletions services/management/commands/services_import_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,56 @@ def _set_field(self, obj, field_name, val):
setattr(obj, field_name, val)
obj._changed = True

def _save_searchwords(self, obj, info, language):
field_name = 'extra_searchwords_%s' % language
if not field_name in info:
new_kw_set = set()
else:
kws = [x.strip() for x in info[field_name].split(',')]
kws = [x for x in kws if x]
new_kw_set = set()
for kw in kws:
if not kw in self.keywords[language]:
kw_obj = Keyword(name=kw, language=language)
kw_obj.save()
self.keywords[language][kw] = kw_obj
self.keywords_by_id[kw_obj.pk] = kw_obj
else:
kw_obj = self.keywords[language][kw]
new_kw_set.add(kw_obj.pk)

obj.new_keywords |= new_kw_set

def _sync_searchwords(self, obj, info):
obj.new_keywords = set()
for lang in self.supported_languages:
self._save_searchwords(obj, info, lang)

old_kw_set = set(obj.keywords.all().values_list('pk', flat=True))
if old_kw_set == obj.new_keywords:
return

if self.verbosity:
old_kw_str = ', '.join([self.keywords_by_id[x].name for x in old_kw_set])
new_kw_str = ', '.join([self.keywords_by_id[x].name for x in obj.new_keywords])
print("%s keyword set changed: %s -> %s" % (obj, old_kw_str, new_kw_str))
obj.keywords = list(obj.new_keywords)
obj._changed = True

@db.transaction.atomic
def import_services(self):
ontologytrees = self.pk_get('ontologytree')
ontologywords = self.pk_get('ontologyword')
tree = self._build_servicetree(ontologytrees, ontologywords)
#print('top lever ' + str(len(tree)))
#print(str(tree[0]))
nodesyncher = ModelSyncher(ServiceNode.objects.all(), lambda obj: obj.id)
leafsyncher = ModelSyncher(ServiceLeaf.objects.all(), lambda obj: obj.id)
nodesyncher = ModelSyncher(ServiceTreeNode.objects.all(), lambda obj: obj.id)
servicesyncher = ModelSyncher(ServiceType.objects.all(), lambda obj: obj.id)


def handle_servicenode(d):
obj = nodesyncher.get(d['id'])
if not obj:
obj = ServiceNode(id=d['id'])
obj = ServiceTreeNode(id=d['id'])
obj._changed = True
self._save_translated_field(obj, 'name', d, 'name')

Expand All @@ -130,7 +165,7 @@ def handle_servicenode(d):
obj.parent = parent
obj._changed = True

#self._sync_searchwords(obj, d)
self._sync_searchwords(obj, d)

if obj._changed:
#obj.unit_count = obj.get_unit_count()
Expand All @@ -142,24 +177,17 @@ def handle_servicenode(d):
for child_node in d['children']:
handle_servicenode(child_node)

leaf_objs = []
for leaf_node in d.get('leaves', []):
leaf_obj = handle_serviceleaf(leaf_node)
leaf_objs.append(leaf_obj)
if set(obj.leaves.all().values_list('id', flat=True)) != set([l.id for l in leaf_objs]):
obj.leaves.clear()
for l in leaf_objs:
obj.leaves.add(l)


def handle_serviceleaf(d):
obj = leafsyncher.get(d['id'])
def handle_servicetype(d):
obj = servicesyncher.get(d['id'])
if not obj:
obj = ServiceLeaf(id=d['id'])
obj = ServiceType(id=d['id'])
obj._changed = True

self._save_translated_field(obj, 'name', d, 'ontologyword')

self._sync_searchwords(obj, d)

if obj._changed:
#obj.unit_count = obj.get_unit_count()
obj.last_modified_time = datetime.now(UTC_TIMEZONE)
Expand All @@ -169,35 +197,30 @@ def handle_serviceleaf(d):
return obj


tree = self._build_servicetree(ontologytrees)
for d in tree:
handle_servicenode(d)

nodesyncher.finish()

def _build_servicetree(self, ontologytrees, ontologywords):
ontologywords_dict = {ow['id']: ow for ow in ontologywords}
for d in ontologywords:
handle_servicetype(d)

servicesyncher.finish()

def _build_servicetree(self, ontologytrees):
tree = [ot for ot in ontologytrees if not ot.get('parent_id')]
for parent_ot in tree:
self._add_ot_children(parent_ot, ontologytrees, ontologywords_dict)

if parent_ot.get('ontologyword_reference'):
parent_ot['leaves'] = []
for ow_id in parent_ot.get('ontologyword_reference').replace('*', '+').split('+'):
parent_ot['leaves'].append(ontologywords_dict.get(int(ow_id)))
self._add_ot_children(parent_ot, ontologytrees)

return tree

def _add_ot_children(self, parent_ot, ontologytrees, ontologywords_dict):
def _add_ot_children(self, parent_ot, ontologytrees):
parent_ot['children'] = [ot for ot in ontologytrees if
ot.get('parent_id') == parent_ot['id']]

for child_ot in parent_ot['children']:
self._add_ot_children(child_ot, ontologytrees, ontologywords_dict)

if parent_ot.get('ontologyword_reference'):
parent_ot['leaves'] = []
for ow_id in parent_ot.get('ontologyword_reference').replace('*', '+').split('+'):
parent_ot['leaves'].append(ontologywords_dict.get(int(ow_id)))
self._add_ot_children(child_ot, ontologytrees)


def handle(self, **options):
Expand Down
55 changes: 55 additions & 0 deletions services/migrations/0015_auto_20170324_1427.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import migrations, models
import mptt.fields


class Migration(migrations.Migration):

dependencies = [
('services', '0014_auto_20170323_1354'),
]

operations = [
migrations.CreateModel(
name='ServiceTreeNode',
fields=[
('id', models.IntegerField(primary_key=True, serialize=False)),
('name', models.CharField(max_length=200, db_index=True)),
('name_fi', models.CharField(null=True, max_length=200, db_index=True)),
('name_sv', models.CharField(null=True, max_length=200, db_index=True)),
('name_en', models.CharField(null=True, max_length=200, db_index=True)),
('unit_count', models.PositiveIntegerField(null=True)),
('last_modified_time', models.DateTimeField(help_text='Time of last modification', db_index=True)),
('lft', models.PositiveIntegerField(editable=False, db_index=True)),
('rght', models.PositiveIntegerField(editable=False, db_index=True)),
('tree_id', models.PositiveIntegerField(editable=False, db_index=True)),
('level', models.PositiveIntegerField(editable=False, db_index=True)),
('keywords', models.ManyToManyField(to='services.Keyword')),
('parent', mptt.fields.TreeForeignKey(to='services.ServiceTreeNode', null=True, related_name='children')),
],
options={
'abstract': False,
},
),
migrations.RenameModel(
old_name='ServiceLeaf',
new_name='ServiceType',
),
migrations.RemoveField(
model_name='servicenode',
name='keywords',
),
migrations.RemoveField(
model_name='servicenode',
name='leaves',
),
migrations.RemoveField(
model_name='servicenode',
name='parent',
),
migrations.DeleteModel(
name='ServiceNode',
),
]
9 changes: 3 additions & 6 deletions services/models_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
from .models import *


# TODO: When we get rid of the old model, this can be renamed as Service.
# This is named ServiceNode just so it won't conflict with Service.
class ServiceNode(MPTTModel):
class ServiceTreeNode(MPTTModel):
id = models.IntegerField(primary_key=True) # id of ontologytree
name = models.CharField(max_length=200, db_index=True)
parent = TreeForeignKey('self', null=True, related_name='children')
Expand All @@ -36,15 +34,14 @@ class ServiceNode(MPTTModel):

last_modified_time = models.DateTimeField(db_index=True, help_text='Time of last modification')

leaves = models.ManyToManyField("ServiceLeaf")

objects = ServiceManager()

def __str__(self):
return "%s (%s)" % (get_translated(self, 'name'), self.id)


class ServiceLeaf(models.Model):
# TODO: When we get rid of the old model, this can be renamed as Service.
class ServiceType(models.Model):
id = models.IntegerField(primary_key=True) # id of ontologyword
name = models.CharField(max_length=200, db_index=True)

Expand Down
8 changes: 4 additions & 4 deletions services/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ class UnitConnectionTranslationOptions(TranslationOptions):
translator.register(UnitConnection, UnitConnectionTranslationOptions)


class ServiceNodeTranslationOptions(TranslationOptions):
class ServiceTreeNodeTranslationOptions(TranslationOptions):
fields = ('name',)
translator.register(ServiceNode, ServiceNodeTranslationOptions)
translator.register(ServiceTreeNode, ServiceTreeNodeTranslationOptions)

class ServiceLeafTranslationOptions(TranslationOptions):
class ServiceTypeTranslationOptions(TranslationOptions):
fields = ('name',)
translator.register(ServiceLeaf, ServiceLeafTranslationOptions)
translator.register(ServiceType, ServiceTypeTranslationOptions)

0 comments on commit a2b77bd

Please sign in to comment.