Skip to content

Commit

Permalink
Merge pull request #118 from ride90/v0.6
Browse files Browse the repository at this point in the history
V0.6
  • Loading branch information
ride90 authored Jan 3, 2020
2 parents bc8c3b2 + 21c7390 commit 6bd834d
Show file tree
Hide file tree
Showing 25 changed files with 278 additions and 53 deletions.
2 changes: 1 addition & 1 deletion client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
},
"dependencies": {
"superdesk-analytics": "github:tomaskikutis/superdesk-analytics#next",
"superdesk-core": "github:superdesk/superdesk-client-core#bba50fe92",
"superdesk-core": "github:superdesk/superdesk-client-core#f33b275cb",
"superdesk-planning-extension": "1.0.1",
"superdesk-planning": "github:tomaskikutis/superdesk-planning#move-planning-extension-from-core"
}
Expand Down
2 changes: 1 addition & 1 deletion client/superdesk.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ module.exports = function(grunt) {
editorHighlights: true,
planning: true,
searchShortcut: true,
editFeaturedImage: false,
editFeaturedImage: true,
hideCreatePackage: true,
customAuthoringTopbar: {
publish: true,
Expand Down
3 changes: 3 additions & 0 deletions server/belga/io/feed_parsers/base_belga_newsml_1_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def parse(self, xml, provider=None):
item.setdefault('subject', []).extend([
{"name": 'default', "qcode": 'default', "scheme": "distribution"},
])
# Slugline and keywords is epmty
item['slugline'] = None
item['keywords'] = []
# delete subject is duplicated
item['subject'] = [dict(t) for t in {tuple(d.items()) for d in item['subject']}]
item = self.populate_fields(item)
Expand Down
3 changes: 3 additions & 0 deletions server/belga/io/feed_parsers/belga_anpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ def parse(self, file_path, provider=None):
item['body_html'] = '<p>' + line + '</p>'

self._parse_ednote(item['headline'], item)
# Slugline and keywords is epmty
item['slugline'] = None
item['keywords'] = []
return item
except Exception as ex:
raise ParserError.anpaParseFileError(file_path, ex)
Expand Down
3 changes: 3 additions & 0 deletions server/belga/io/feed_parsers/belga_dpa_newsml_2_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ def parse(self, xml, provider=None):
# Distribution is default
dist = {"name": 'default', "qcode": 'default', "scheme": "distribution"}
item.setdefault('subject', []).append(dist)
# Slugline and keywords is epmty
item['slugline'] = None
item['keywords'] = []
items.append(item)
return items
except Exception as ex:
Expand Down
3 changes: 3 additions & 0 deletions server/belga/io/feed_parsers/belga_iptc7901.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ def parse(self, file_path, provider=None):
item = self.parse_content_dpa(file_path, provider)
if _type == 'ats':
item = self.parse_content_ats(file_path, provider)
# Slugline and keywords is epmty
item['slugline'] = None
item['keywords'] = []
item = self.dpa_derive_dateline(item)
# Markup the text and set the content type
item['body_html'] = '<p>' + item['body_html'].replace('\r\n', ' ').replace('\n', '</p><p>') + '</p>'
Expand Down
87 changes: 71 additions & 16 deletions server/belga/publish/belga_newsml_1_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from datetime import datetime
from urllib.parse import urljoin

from eve.utils import ParsedRequest
from flask import current_app as app

import superdesk
Expand Down Expand Up @@ -40,6 +41,9 @@ class BelgaNewsML12Formatter(NewsML12Formatter):
XML_ROOT = '<?xml version="1.0" encoding="{}"?>'.format(ENCODING)
DATETIME_FORMAT = '%Y%m%dT%H%M%S'
BELGA_TEXT_PROFILE = 'belga_text'
CP_NAME_ROLE_MAP = {
'belga_text': 'Text'
}

def format(self, article, subscriber, codes=None):
"""
Expand Down Expand Up @@ -175,7 +179,7 @@ def _format_newscomponent_1_level(self, newsitem):
descriptivemetadata = SubElement(newscomponent_1_level, 'DescriptiveMetadata')
genre_formalname = ''
for subject in self._article.get('subject', []):
if subject['scheme'] == 'genre':
if subject.get('scheme') == 'genre':
genre_formalname = subject['qcode']
break
SubElement(
Expand All @@ -191,18 +195,15 @@ def _format_newscomponent_2_level(self, newscomponent_1_level):
:param Element newscomponent_1_level: NewsComponent of 1st level
"""

_type = self._article.get('type')
_profile = self._article.get('profile')

self._format_belga_text(newscomponent_1_level)
self._format_text(newscomponent_1_level)
self._format_belga_urls(newscomponent_1_level)
self._format_media(newscomponent_1_level)
self._format_attachments(newscomponent_1_level)
self._format_related_text_item(newscomponent_1_level)

def _format_belga_text(self, newscomponent_1_level):
def _format_text(self, newscomponent_1_level):
"""
Creates a `<NewsComponent>` of a 2nd level with information related to `belga_text` content profile.
Creates a `<NewsComponent>` of a 2nd level with information related to content profile.
:param Element newscomponent_1_level: NewsComponent of 1st level
"""

Expand All @@ -212,7 +213,11 @@ def _format_belga_text(self, newscomponent_1_level):
)

# Role
SubElement(newscomponent_2_level, 'Role', {'FormalName': self.BELGA_TEXT_PROFILE})
if self._article.get('profile') in self.CP_NAME_ROLE_MAP:
role_formal_name = self.CP_NAME_ROLE_MAP[self._article.get('profile')]
else:
role_formal_name = self._get_content_profile_name()
SubElement(newscomponent_2_level, 'Role', {'FormalName': role_formal_name})
# NewsLines
self._format_newslines(newscomponent_2_level, item=self._article)
# AdministrativeMetadata
Expand Down Expand Up @@ -304,7 +309,9 @@ def _format_belga_urls(self, newscomponent_1_level):
SubElement(contentitem, 'DataContent').text = belga_url.get(key)
characteristics = SubElement(contentitem, 'Characteristics')
# string's length is used in original belga's newsml
SubElement(characteristics, 'SizeInBytes').text = str(len(belga_url.get(key)))
SubElement(
characteristics, 'SizeInBytes'
).text = str(len(belga_url[key])) if belga_url.get(key) else '0'
SubElement(characteristics, 'Property', {'FormalName': 'maxCharCount', 'Value': '0'})

def _format_attachments(self, newscomponent_1_level):
Expand Down Expand Up @@ -429,7 +436,7 @@ def _format_picture(self, newscomponent_1_level, picture):
SubElement(contentitem, 'DataContent').text = picture.get(key)
characteristics = SubElement(contentitem, 'Characteristics')
# string's length is used in original belga's newsml
SubElement(characteristics, 'SizeInBytes').text = str(len(picture.get(key)))
SubElement(characteristics, 'SizeInBytes').text = str(len(picture[key])) if picture.get(key) else '0'
SubElement(characteristics, 'Property', {'FormalName': 'maxCharCount', 'Value': '0'})

# original, thumbnail, preview
Expand Down Expand Up @@ -481,7 +488,7 @@ def _format_coverage(self, newscomponent_1_level, coverage):
SubElement(contentitem, 'DataContent').text = coverage.get(key)
characteristics = SubElement(contentitem, 'Characteristics')
# string's length is used in original belga's newsml
SubElement(characteristics, 'SizeInBytes').text = str(len(coverage.get(key)))
SubElement(characteristics, 'SizeInBytes').text = str(len(coverage[key])) if coverage.get(key) else '0'
SubElement(characteristics, 'Property', {'FormalName': 'maxCharCount', 'Value': '0'})

newscomponent_3_level = SubElement(newscomponent_2_level, 'NewsComponent')
Expand Down Expand Up @@ -530,7 +537,7 @@ def _format_audio(self, newscomponent_1_level, audio):
SubElement(contentitem, 'DataContent').text = audio.get(key)
characteristics = SubElement(contentitem, 'Characteristics')
# string's length is used in original belga's newsml
SubElement(characteristics, 'SizeInBytes').text = str(len(audio.get(key)))
SubElement(characteristics, 'SizeInBytes').text = str(len(audio[key])) if audio.get(key) else '0'
SubElement(characteristics, 'Property', {'FormalName': 'maxCharCount', 'Value': '0'})

# sound
Expand Down Expand Up @@ -580,7 +587,7 @@ def _format_video(self, newscomponent_1_level, video):
SubElement(contentitem, 'DataContent').text = video.get(key)
characteristics = SubElement(contentitem, 'Characteristics')
# string's length is used in original belga's newsml
SubElement(characteristics, 'SizeInBytes').text = str(len(video.get(key)))
SubElement(characteristics, 'SizeInBytes').text = str(len(video[key])) if video.get(key) else '0'
SubElement(characteristics, 'Property', {'FormalName': 'maxCharCount', 'Value': '0'})

# sound
Expand Down Expand Up @@ -631,7 +638,7 @@ def _format_attachment(self, newscomponent_1_level, attachment):
SubElement(contentitem, 'DataContent').text = attachment.get(key)
characteristics = SubElement(contentitem, 'Characteristics')
# string's length is used in original belga's newsml
SubElement(characteristics, 'SizeInBytes').text = str(len(attachment.get(key)))
SubElement(characteristics, 'SizeInBytes').text = str(len(attachment[key])) if attachment.get(key) else '0'
SubElement(characteristics, 'Property', {'FormalName': 'maxCharCount', 'Value': '0'})

# Component
Expand Down Expand Up @@ -698,11 +705,48 @@ def _format_newslines(self, newscomponent_2_level, item):
SubElement(newslines, 'CreditLine').text = item.get('creditline', item.get('byline'))
SubElement(newslines, 'HeadLine').text = item.get('headline')
SubElement(newslines, 'CopyrightLine').text = item.get('copyrightholder')
for keyword in item.get('keywords', []):
SubElement(newslines, 'KeywordLine').text = keyword

# KeywordLine from country
for subject in item.get('subject', []):
if subject.get('scheme') == 'country':
try:
SubElement(
newslines, 'KeywordLine'
).text = subject['translations']['name'][item.get('language')]
except KeyError:
logger.warning(
'There is no "{}" translation for country cv. Subject: {}'.format(
item.get('language'), subject
)
)
SubElement(newslines, 'KeywordLine').text = subject['name']
break

# KeywordLine from belga-keywords
for subject in item.get('subject', []):
if subject.get('scheme') == 'belga-keywords':
try:
SubElement(
newslines, 'KeywordLine'
).text = subject['translations']['name'][item.get('language')]
except KeyError:
logger.warning(
'There is no "{}" translation for belga-keywords cv. Subject: {}'.format(
item.get('language'), subject
)
)
SubElement(newslines, 'KeywordLine').text = subject['name']

# KeywordLine from belga-keywords custom field
# just in case if old custom belga-keywords field is used or item has data from it
if item.get('extra', {}).get('belga-keywords'):
for keyword in [i.strip() for i in item['extra']['belga-keywords'].split(',')]:
SubElement(newslines, 'KeywordLine').text = keyword

# KeywordLine from keywords
for keyword in item.get('keywords', []):
SubElement(newslines, 'KeywordLine').text = keyword

newsline = SubElement(newslines, 'NewsLine')
SubElement(newsline, 'NewsLineType', {'FormalName': item.get('line_type', '')})
SubElement(newsline, 'NewsLineText').text = item.get('line_text')
Expand Down Expand Up @@ -889,3 +933,14 @@ def _get_formatted_datetime(self, _datetime):
return datetime.strptime(_datetime, '%Y-%m-%dT%H:%M:%S+0000').strftime(self.DATETIME_FORMAT)
else:
return _datetime.strftime(self.DATETIME_FORMAT)

def _get_content_profile_name(self):
content_types_service = superdesk.get_resource_service('content_types')
req = ParsedRequest()
req.args = {}
req.projection = '{"label": 1}'
content_type = content_types_service.find_one(
req=req,
_id=self._article.get('profile')
)
return content_type['label']
6 changes: 5 additions & 1 deletion server/belga/search_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ def format_list_item(self, data):
'baseImage': {
'href': data['detailUrl'],
},
}
},
'_fetchable': False,
}


Expand Down Expand Up @@ -209,6 +210,7 @@ def format_list_item(self, data):
'extra': {
'bcoverage': guid,
},
'_fetchable': False,
}


Expand Down Expand Up @@ -279,6 +281,7 @@ def format_list_item(self, data):
'_id': guid,
'guid': guid,
'headline': get_text(data['headLine']),
'slugline': get_text(data['topic']),
'name': get_text(data['name']),
'description_text': get_text(data.get('description')),
'versioncreated': created,
Expand All @@ -291,6 +294,7 @@ def format_list_item(self, data):
'extra': {
'bcoverage': guid,
},
'_fetchable': False,
}


Expand Down
99 changes: 99 additions & 0 deletions server/data/vocabularies.json
Original file line number Diff line number Diff line change
Expand Up @@ -3483,6 +3483,105 @@
}
]
},
{
"_id": "keywords",
"init_version": 1,
"display_name": "Storytags",
"type": "manageable",
"selection_type": "multi selection",
"unique_field": "qcode",
"schema": {
"name": {},
"qcode": {}
},
"items": [
{
"name": "#",
"qcode": "#",
"is_active": true
},
{
"name": "#containerparkbullies",
"qcode": "#containerparkbullies",
"is_active": true
},
{
"name": "#zwartepiet",
"qcode": "#zwartepiet",
"is_active": true
},
{
"name": "#blackfriday",
"qcode": "#blackfriday",
"is_active": true
},
{
"name": "#julianassange",
"qcode": "#julianassange",
"is_active": true
},
{
"name": "#phd",
"qcode": "#phd",
"is_active": true
},
{
"name": "#bocoolsaet",
"qcode": "#bocoolsaet",
"is_active": true
},
{
"name": "#tulancesquelquechosecommetuveux",
"qcode": "#tulancesquelquechosecommetuveux",
"is_active": true
},
{
"name": "#Remcoco",
"qcode": "#Remcoco",
"is_active": true
},
{
"name": "#herstructureringProximus",
"qcode": "#herstructureringProximus",
"is_active": true
},
{
"name": "proc\u00e8s",
"qcode": "proc\u00e8s",
"is_active": true
},
{
"name": "agression",
"qcode": "agression",
"is_active": true
},
{
"name": "politique",
"qcode": "politique",
"is_active": true
},
{
"name": "ultradroite",
"qcode": "ultradroite",
"is_active": true
},
{
"name": "ultragauche",
"qcode": "ultragauche",
"is_active": true
},
{
"name": "assises91",
"qcode": "assises91",
"is_active": true
},
{
"name": "Cl\u00e9ment M\u00e9ric",
"qcode": "Cl\u00e9ment M\u00e9ric",
"is_active": true
}
]
},
{
"_id": "genre",
"display_name": "Genre",
Expand Down
Loading

0 comments on commit 6bd834d

Please sign in to comment.