Skip to content

Commit

Permalink
City-of-Helsinki#41 First shot at modeling and fetching ontologies fr…
Browse files Browse the repository at this point in the history
…om v4 API (the logical structure isn't right yet)
  • Loading branch information
jukvalim committed Mar 23, 2017
1 parent 2249485 commit 9680815
Show file tree
Hide file tree
Showing 5 changed files with 366 additions and 0 deletions.
247 changes: 247 additions & 0 deletions services/management/commands/services_import_v4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
# -*- coding: utf-8 -*-
import sys
import re
import os
import json
from collections import defaultdict
import csv
from datetime import datetime
from optparse import make_option
import logging
import hashlib
from pprint import pprint

import requests
import requests_cache
import pytz
from django.core.management.base import BaseCommand
from django import db
from django.conf import settings
from django.db import transaction
from django.contrib.gis.geos import Point, Polygon
from django.contrib.gis.gdal import SpatialReference, CoordTransform
from django.utils.translation import activate, get_language

from munigeo.models import Municipality
from munigeo.importer.sync import ModelSyncher
from services.models import *

URL_BASE = 'http://www.hel.fi/palvelukarttaws/rest/v4/'
GK25_SRID = 3879

UTC_TIMEZONE = pytz.timezone('UTC')


class Command(BaseCommand):
help = "Import services from Palvelukartta REST API"
option_list = list(BaseCommand.option_list)
#option_list = list(BaseCommand.option_list + (
# make_option('--cached', dest='cached', action='store_true', help='cache HTTP requests'),
# make_option('--single', dest='single', action='store', metavar='ID', type='string', help='import only single entity'),
#))

importer_types = ['services']
supported_languages = ['fi', 'sv', 'en']

def __init__(self):
super(Command, self).__init__()
for imp in self.importer_types:
method = "import_%s" % imp
assert getattr(self, method, False), "No importer defined for %s" % method
opt = make_option('--%s' % imp, dest=imp, action='store_true', help='import %s' % imp)
self.option_list.append(opt)
self.services = {}
self.existing_service_ids = None


def clean_text(self, text):
#text = text.replace('\n', ' ')
#text = text.replace(u'\u00a0', ' ')
# remove consecutive whitespaces
text = re.sub(r'\s\s+', ' ', text, re.U)
# remove nil bytes
text = text.replace('\u0000', ' ')
text = text.strip()
return text

def pk_get(self, resource_name, res_id=None):
url = "%s%s/" % (URL_BASE, resource_name)
if res_id != None:
url = "%s%s/" % (url, res_id)
resp = requests.get(url)
assert resp.status_code == 200
return resp.json()

def _save_translated_field(self, obj, obj_field_name, info, info_field_name, max_length=None):
args = {}
for lang in ('fi', 'sv', 'en'):
key = '%s_%s' % (info_field_name, lang)
if key in info:
val = self.clean_text(info[key])
else:
val = None
if max_length and val and len(val) > max_length:
if self.verbosity:
self.logger.warning("%s: field '%s' too long" % (obj, obj_field_name))
val = None
obj_key = '%s_%s' % (obj_field_name, lang)
obj_val = getattr(obj, obj_key, None)
if obj_val == val:
continue

setattr(obj, obj_key, val)
if lang == 'fi':
setattr(obj, obj_field_name, val)
obj._changed = True

def _set_field(self, obj, field_name, val):
if not hasattr(obj, field_name):
print(vars(obj))
obj_val = getattr(obj, field_name)
if obj_val == val:
return
setattr(obj, field_name, val)
obj._changed = True

@db.transaction.atomic
def import_services(self):
ontologytrees = self.pk_get('ontologytree')
ontologywords = self.pk_get('ontologyword')
tree = self._build_servicetree(ontologytrees, ontologywords)
#print('top lever ' + str(len(tree)))
#print(str(tree[0]))
nodesyncher = ModelSyncher(ServiceNode.objects.all(), lambda obj: obj.id)
leafsyncher = ModelSyncher(ServiceLeaf.objects.all(), lambda obj: obj.id)


def handle_servicenode(d):
obj = nodesyncher.get(d['id'])
if not obj:
obj = ServiceNode(id=d['id'])
obj._changed = True
self._save_translated_field(obj, 'name', d, 'name')

if 'parent_id' in d:
parent = nodesyncher.get(d['parent_id'])
assert parent
else:
parent = None
if obj.parent != parent:
obj.parent = parent
obj._changed = True

#self._sync_searchwords(obj, d)

if obj._changed:
#obj.unit_count = obj.get_unit_count()
obj.last_modified_time = datetime.now(UTC_TIMEZONE)
obj.save()
self.services_changed = True
nodesyncher.mark(obj)

for child_node in d['children']:
handle_servicenode(child_node)

leaf_objs = []
for leaf_node in d.get('leaves', []):
leaf_obj = handle_serviceleaf(leaf_node)
leaf_objs.append(leaf_obj)
if set(obj.leaves.all().values_list('id', flat=True)) != set([l.id for l in leaf_objs]):
obj.leaves.clear()
for l in leaf_objs:
obj.leaves.add(l)


def handle_serviceleaf(d):
obj = leafsyncher.get(d['id'])
if not obj:
obj = ServiceLeaf(id=d['id'])
obj._changed = True

self._save_translated_field(obj, 'name', d, 'ontologyword')

if obj._changed:
#obj.unit_count = obj.get_unit_count()
obj.last_modified_time = datetime.now(UTC_TIMEZONE)
obj.save()
self.services_changed = True

return obj


for d in tree:
handle_servicenode(d)

nodesyncher.finish()

def _build_servicetree(self, ontologytrees, ontologywords):
ontologywords_dict = {ow['id']: ow for ow in ontologywords}
tree = [ot for ot in ontologytrees if not ot.get('parent_id')]
for parent_ot in tree:
self._add_ot_children(parent_ot, ontologytrees, ontologywords_dict)

if parent_ot.get('ontologyword_reference'):
parent_ot['leaves'] = []
for ow_id in parent_ot.get('ontologyword_reference').replace('*', '+').split('+'):
parent_ot['leaves'].append(ontologywords_dict.get(int(ow_id)))

return tree

def _add_ot_children(self, parent_ot, ontologytrees, ontologywords_dict):
parent_ot['children'] = [ot for ot in ontologytrees if
ot.get('parent_id') == parent_ot['id']]

for child_ot in parent_ot['children']:
self._add_ot_children(child_ot, ontologytrees, ontologywords_dict)

if parent_ot.get('ontologyword_reference'):
parent_ot['leaves'] = []
for ow_id in parent_ot.get('ontologyword_reference').replace('*', '+').split('+'):
parent_ot['leaves'].append(ontologywords_dict.get(int(ow_id)))


def handle(self, **options):
self.options = options
self.verbosity = int(options.get('verbosity', 1))
self.org_syncher = None
self.dept_syncher = None
self.logger = logging.getLogger(__name__)
self.services_changed = False
self.count_services = set()
self.keywords = {}
for lang in self.supported_languages:
kw_list = Keyword.objects.filter(language=lang)
kw_dict = {kw.name: kw for kw in kw_list}
self.keywords[lang] = kw_dict
self.keywords_by_id = {kw.pk: kw for kw in Keyword.objects.all()}

#if options['cached']:
# requests_cache.install_cache('services_import')

# Activate the default language for the duration of the import
# to make sure translated fields are populated correctly.
old_lang = get_language()
activate(settings.LANGUAGES[0][0])

import_count = 0
for imp in self.importer_types:
if not self.options[imp]:
continue
method = getattr(self, "import_%s" % imp)
if self.verbosity:
print("Importing %s..." % imp)
method()
import_count += 1

#if self.services_changed:
# self.update_root_services()
#if self.count_services:
# self.update_unit_counts()
#self.update_division_units()

if not import_count:
sys.stderr.write("Nothing to import.\n")
activate(old_lang)



54 changes: 54 additions & 0 deletions services/migrations/0014_auto_20170323_1354.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import migrations, models
import mptt.fields


class Migration(migrations.Migration):

dependencies = [
('services', '0013_unitconnection_order'),
]

operations = [
migrations.CreateModel(
name='ServiceLeaf',
fields=[
('id', models.IntegerField(primary_key=True, serialize=False)),
('name', models.CharField(max_length=200, db_index=True)),
('name_fi', models.CharField(max_length=200, null=True, db_index=True)),
('name_sv', models.CharField(max_length=200, null=True, db_index=True)),
('name_en', models.CharField(max_length=200, null=True, db_index=True)),
('unit_count', models.PositiveIntegerField(null=True)),
('last_modified_time', models.DateTimeField(help_text='Time of last modification', db_index=True)),
('keywords', models.ManyToManyField(to='services.Keyword')),
],
),
migrations.CreateModel(
name='ServiceNode',
fields=[
('id', models.IntegerField(primary_key=True, serialize=False)),
('name', models.CharField(max_length=200, db_index=True)),
('name_fi', models.CharField(max_length=200, null=True, db_index=True)),
('name_sv', models.CharField(max_length=200, null=True, db_index=True)),
('name_en', models.CharField(max_length=200, null=True, db_index=True)),
('unit_count', models.PositiveIntegerField(null=True)),
('last_modified_time', models.DateTimeField(help_text='Time of last modification', db_index=True)),
('lft', models.PositiveIntegerField(db_index=True, editable=False)),
('rght', models.PositiveIntegerField(db_index=True, editable=False)),
('tree_id', models.PositiveIntegerField(db_index=True, editable=False)),
('level', models.PositiveIntegerField(db_index=True, editable=False)),
('keywords', models.ManyToManyField(to='services.Keyword')),
('leaves', models.ManyToManyField(to='services.ServiceLeaf')),
('parent', mptt.fields.TreeForeignKey(related_name='children', null=True, to='services.ServiceNode')),
],
options={
'abstract': False,
},
),
migrations.AlterModelOptions(
name='unitconnection',
options={'ordering': ['order']},
),
]
2 changes: 2 additions & 0 deletions services/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,5 @@ class UnitAlias(models.Model):
# Not a foreign key, might need
# to reference nonexistent models
second = models.IntegerField(db_index=True, unique=True)

from .models_v2 import *
54 changes: 54 additions & 0 deletions services/models_v2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,58 @@
"""
Models added only to support the v4 interface of the TPR API. Extends the previous models.py
"""
from django.contrib.gis.db import models
from django.db.models.query import QuerySet

from django.utils.encoding import python_2_unicode_compatible
from mptt.models import MPTTModel, TreeForeignKey, TreeManager
from django.conf import settings
from django.db.models import Q

from django.contrib.postgres.fields import HStoreField

from munigeo.models import AdministrativeDivision, Municipality
from munigeo.utils import get_default_srid

DEFAULT_LANG = settings.LANGUAGES[0][0]
PROJECTION_SRID = get_default_srid()

import django.db
from django.db.models.signals import pre_migrate
from django.dispatch import receiver
import sys

from .models import *


# TODO: When we get rid of the old model, this can be renamed as Service.
# This is named ServiceNode just so it won't conflict with Service.
class ServiceNode(MPTTModel):
id = models.IntegerField(primary_key=True) # id of ontologytree
name = models.CharField(max_length=200, db_index=True)
parent = TreeForeignKey('self', null=True, related_name='children')
unit_count = models.PositiveIntegerField(null=True)
keywords = models.ManyToManyField(Keyword)

last_modified_time = models.DateTimeField(db_index=True, help_text='Time of last modification')

leaves = models.ManyToManyField("ServiceLeaf")

objects = ServiceManager()

def __str__(self):
return "%s (%s)" % (get_translated(self, 'name'), self.id)


class ServiceLeaf(models.Model):
id = models.IntegerField(primary_key=True) # id of ontologyword
name = models.CharField(max_length=200, db_index=True)

unit_count = models.PositiveIntegerField(null=True)
keywords = models.ManyToManyField(Keyword)

last_modified_time = models.DateTimeField(db_index=True, help_text='Time of last modification')

def __str__(self):
return "%s (%s)" % (get_translated(self, 'name'), self.id)

9 changes: 9 additions & 0 deletions services/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,12 @@ class UnitTranslationOptions(TranslationOptions):
class UnitConnectionTranslationOptions(TranslationOptions):
fields = ('name', 'www_url')
translator.register(UnitConnection, UnitConnectionTranslationOptions)


class ServiceNodeTranslationOptions(TranslationOptions):
fields = ('name',)
translator.register(ServiceNode, ServiceNodeTranslationOptions)

class ServiceLeafTranslationOptions(TranslationOptions):
fields = ('name',)
translator.register(ServiceLeaf, ServiceLeafTranslationOptions)

0 comments on commit 9680815

Please sign in to comment.