Skip to content

Commit

Permalink
[STTNHUB-336] improve: Group content to single coverage by STT Articl…
Browse files Browse the repository at this point in the history
…e ID (#59)
  • Loading branch information
MarkLark86 authored May 7, 2024
1 parent eafe08e commit 94ebbc2
Show file tree
Hide file tree
Showing 8 changed files with 357 additions and 128 deletions.
155 changes: 83 additions & 72 deletions server/features/ingest_planning.feature
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ Feature: Ingest STT Planning items
@auth
@stt_cvs
@stt_providers
Scenario: Link ingested coverages to content on update
Scenario: Creates new coverage on content ingest
# Ingest Planning with 0 coverages (1 placeholder)
When we fetch from "STTPlanningML" ingest "planning_ml_before_link_content.xml"
When we get "/planning"
Expand All @@ -345,21 +345,10 @@ Feature: Ingest STT Planning items
When we get "/assignments"
Then we get list with 0 items
# Ingest content
When we fetch from "STTNewsML" ingest "stt_newsml_link_content.xml" using routing_scheme
When we fetch from "STTNewsML" ingest "stt_newsml_link_content_with_topic_id.xml" using routing_scheme
"""
#routing_schemes._id#
"""
When we get "published"
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "__no_value__"
}]}
"""
When we get "/assignments"
Then we get list with 0 items
When we fetch from "STTPlanningML" ingest "planning_ml_link_content.xml"
When we get "/assignments"
Then we get list with 1 items
"""
Expand All @@ -374,6 +363,14 @@ Feature: Ingest STT Planning items
}]}
"""
Then we store "assignment" with first item
When we get "published"
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment._id#"
}]}
"""
When we get "/planning"
Then we get list with 1 items
"""
Expand All @@ -387,43 +384,45 @@ Feature: Ingest STT Planning items
"user": null,
"state": "completed",
"priority": 6
},
"news_coverage_status": {"qcode": "ncostat:int", "name": "coverage intended"},
"workflow_status": "active",
"flags": {"placeholder": "__no_value__"},
"planning": {
"g2_content_type": "text",
"scheduled": "2017-12-25T09:16:43+0000",
"slugline": "Parliament passed the Alcohol Act and the government gained confidence*** TRANSLATED ***",
"genre": [{"name": "P\u00e4\u00e4juttu", "qcode": "1"}],
"subject": [
{"name": "Politics", "qcode": "9", "scheme": "sttdepartment"},
{"name": "Pika+", "qcode": "1", "scheme": "sttversion"},
{"name": "Suomi", "qcode": "1", "scheme": "country"},
{"name": "Eurooppa", "qcode": "150", "scheme": "world_region"}
]
}
}],
"extra": {
"stt_topics": "437036"
}
}]}
"""
When we get "published"
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment._id#"
}]}
"""

@auth
@stt_cvs
@stt_providers
Scenario: Creates new coverage on content ingest
# Ingest Planning with 0 coverages (1 placeholder)
When we fetch from "STTPlanningML" ingest "planning_ml_before_link_content.xml"
Scenario: Groups content to a single coverage based on STT Article ID
When we fetch from "STTPlanningML" ingest "planning_ml_link_content.xml"
When we get "/assignments"
Then we get list with 0 items
When we get "/planning"
Then we get list with 1 items
"""
{"_items": [{
"_id": "urn:newsml:stt.fi:437036",
"coverages": [{
"assigned_to": "__empty__",
"flags": {"placeholder": true}
}]
"coverages": [{"coverage_id": "ID_TEXT_120123822"}]
}]}
"""
When we get "/assignments"
Then we get list with 0 items
# Ingest content
When we fetch from "STTNewsML" ingest "stt_newsml_link_content_with_topic_id.xml" using routing_scheme
When we fetch from "STTNewsML" ingest "stt_newsml_link_content.xml" using routing_scheme
"""
#routing_schemes._id#
"""
Expand All @@ -432,55 +431,67 @@ Feature: Ingest STT Planning items
"""
{"_items": [{
"planning_item": "urn:newsml:stt.fi:437036",
"coverage_item": "ID_TEXT_101801633",
"priority": 6,
"assigned_to": {
"desk": "#desks._id#",
"state": "completed"
}
"coverage_item": "ID_TEXT_120123822"
}]}
"""
Then we store "assignment" with first item
When we get "published"
Then we store "assignment_1" with first item
When we get "/planning"
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment._id#"
"_id": "urn:newsml:stt.fi:437036",
"coverages": [{
"coverage_id": "ID_TEXT_120123822",
"assigned_to": {"assignment_id": "#assignment_1._id#"}
}]
}]}
"""
When we get "/planning"
When we get "published"
Then we get list with 1 items
"""
{"_items": [{
"_id": "urn:newsml:stt.fi:437036",
"coverages": [{
"coverage_id": "ID_TEXT_101801633",
"assigned_to": {
"assignment_id": "#assignment._id#",
"desk": "#desks._id#",
"user": null,
"state": "completed",
"priority": 6
},
"news_coverage_status": {"qcode": "ncostat:int", "name": "coverage intended"},
"workflow_status": "active",
"flags": {"placeholder": "__no_value__"},
"planning": {
"g2_content_type": "text",
"scheduled": "2017-12-25T09:16:43+0000",
"slugline": "Parliament passed the Alcohol Act and the government gained confidence*** TRANSLATED ***",
"genre": [{"name": "P\u00e4\u00e4juttu", "qcode": "1"}],
"subject": [
{"name": "Politics", "qcode": "9", "scheme": "sttdepartment"},
{"name": "Pika+", "qcode": "1", "scheme": "sttversion"},
{"name": "Suomi", "qcode": "1", "scheme": "country"},
{"name": "Eurooppa", "qcode": "150", "scheme": "world_region"}
]
}
}],
"extra": {
"stt_topics": "437036"
}
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment_1._id#"
}]}
"""
And we store "content_1" with first item
When we fetch from "STTNewsML" ingest "stt_newsml_link_content_2.xml" using routing_scheme
"""
#routing_schemes._id#
"""
When we get "published"
Then we get list with 2 items
And we store "content_1" with 1 item
And we store "content_2" with 2 item
When we get "/assignments"
Then we get list with 2 items
"""
{"_items": [
{
"planning_item": "urn:newsml:stt.fi:437036",
"coverage_item": "ID_TEXT_120123822",
"item_ids": ["#content_1.guid#"]
},
{
"planning_item": "urn:newsml:stt.fi:437036",
"coverage_item": "ID_TEXT_120123822",
"item_ids": ["#content_2.guid#"]
}
]}
"""
And we store "assignment_1" with 1 item
And we store "assignment_2" with 2 item
When we get "published"
Then we get list with 2 items
"""
{"_items": [
{
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment_1._id#"
},
{
"uri": "urn:newsml:stt.fi:101801733",
"assignment_id": "#assignment_2._id#"
}
]}
"""
7 changes: 4 additions & 3 deletions server/stt/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,10 @@ def set_extra_fields(self, item, xml):

# newsItem altId
try:
alt_id = xml.find(self.qname('contentMeta')).find(self.qname('altId')).text
if alt_id:
item.setdefault('extra', {})['sttidtype_textid'] = alt_id
for alt_id in xml.find(self.qname('contentMeta')).findall(self.qname('altId')):
if alt_id.get("type") == "sttidtype:textid" and alt_id.text:
# textid is STT's Article ID
item.setdefault('extra', {})['sttidtype_textid'] = alt_id.text
except AttributeError:
pass

Expand Down
Loading

0 comments on commit 94ebbc2

Please sign in to comment.