Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[STTNHUB-312] fix: Link Planning item updates to content #44

Merged
merged 3 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 81 additions & 3 deletions server/features/ingest_planning.feature
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ Feature: Ingest STT Planning items
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:20171219:101801633",
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "__no_value__",
"priority": 6,
"task": {
Expand Down Expand Up @@ -175,7 +175,7 @@ Feature: Ingest STT Planning items
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:20171219:101801633",
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment._id#"
}]}
"""
Expand Down Expand Up @@ -243,7 +243,7 @@ Feature: Ingest STT Planning items
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:20171219:101801633",
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment._id#"
}]}
"""
Expand Down Expand Up @@ -327,3 +327,81 @@ Feature: Ingest STT Planning items
}
}]}
"""

@auth
@stt_cvs
@stt_providers
Scenario: Link ingested coverages to content on update
# Ingest Planning with 0 coverages (1 placeholder)
When we fetch from "STTPlanningML" ingest "planning_ml_before_link_content.xml"
When we get "/planning"
Then we get list with 1 items
"""
{"_items": [{
"_id": "urn:newsml:stt.fi:437036",
"coverages": [{
"assigned_to": "__empty__",
"flags": {"placeholder": true}
}]
}]}
"""
When we get "/assignments"
Then we get list with 0 items
# Ingest content
When we fetch from "STTNewsML" ingest "stt_newsml_link_content.xml" using routing_scheme
"""
#routing_schemes._id#
"""
When we get "published"
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "__no_value__"
}]}
"""
When we get "/assignments"
Then we get list with 0 items
When we fetch from "STTPlanningML" ingest "planning_ml_link_content.xml"
When we get "/assignments"
Then we get list with 1 items
"""
{"_items": [{
"planning_item": "urn:newsml:stt.fi:437036",
"coverage_item": "ID_TEXT_120123822",
"priority": 6,
"assigned_to": {
"desk": "#desks._id#",
"state": "completed"
}
}]}
"""
Then we store "assignment" with first item
When we get "/planning"
Then we get list with 1 items
"""
{"_items": [{
"_id": "urn:newsml:stt.fi:437036",
"coverages": [{
"coverage_id": "ID_TEXT_120123822",
"assigned_to": {
"assignment_id": "#assignment._id#",
"desk": "#desks._id#",
"user": null,
"state": "completed",
"priority": 6
}
}],
"extra": {
"stt_topics": "437036"
}
}]}
"""
When we get "published"
Then we get list with 1 items
"""
{"_items": [{
"uri": "urn:newsml:stt.fi:101801633",
"assignment_id": "#assignment._id#"
}]}
"""
2 changes: 1 addition & 1 deletion server/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ gunicorn
honcho

git+https://github.com/superdesk/[email protected]#egg=superdesk-core
git+https://github.com/superdesk/[email protected]rc3#egg=superdesk-planning
git+https://github.com/superdesk/[email protected]rc4#egg=superdesk-planning
21 changes: 11 additions & 10 deletions server/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# This file is autogenerated by pip-compile with Python 3.8
# by the following command:
#
# pip-compile requirements.in
Expand All @@ -14,7 +14,7 @@ async-timeout==4.0.3
# via redis
authlib==0.14.3
# via superdesk-core
babel==2.13.1
babel==2.14.0
# via flask-babel
bcrypt==3.1.7
# via superdesk-core
Expand All @@ -26,9 +26,9 @@ blinker==1.4
# flask-mail
# raven
# superdesk-core
boto3==1.33.12
boto3==1.34.21
# via superdesk-core
botocore==1.33.12
botocore==1.34.21
# via
# boto3
# s3transfer
Expand Down Expand Up @@ -87,7 +87,7 @@ draftjs-exporter[lxml]==2.1.7
# superdesk-core
ecs-logging==2.1.0
# via elastic-apm
elastic-apm[flask]==6.19.0
elastic-apm[flask]==6.20.0
# via
# elastic-apm
# superdesk-core
Expand Down Expand Up @@ -148,7 +148,7 @@ jmespath==1.0.1
# via
# boto3
# botocore
jwcrypto==1.5.0
jwcrypto==1.5.1
# via
# flask-oidc-ex
# python-jwt
Expand Down Expand Up @@ -182,7 +182,7 @@ pillow==9.2.0
# via
# reportlab
# superdesk-core
prompt-toolkit==3.0.41
prompt-toolkit==3.0.43
# via click-repl
pyasn1==0.5.1
# via
Expand Down Expand Up @@ -220,6 +220,7 @@ python-twitter==3.5
# via superdesk-core
pytz==2023.3.post1
# via
# babel
# celery
# eve-elastic
# flask-babel
Expand Down Expand Up @@ -249,7 +250,7 @@ requests-oauthlib==1.3.1
# via python-twitter
rsa==4.9
# via oauth2client
s3transfer==0.8.2
s3transfer==0.10.0
# via boto3
sgmllib3k==1.0.0
# via feedparser
Expand All @@ -263,7 +264,7 @@ six==1.16.0
# python-dateutil
superdesk-core @ git+https://github.com/superdesk/[email protected]
# via -r requirements.in
superdesk-planning @ git+https://github.com/superdesk/[email protected]rc3
superdesk-planning @ git+https://github.com/superdesk/[email protected]rc4
# via -r requirements.in
typing-extensions==4.9.0
# via superdesk-core
Expand All @@ -283,7 +284,7 @@ vine==5.1.0
# amqp
# celery
# kombu
wcwidth==0.2.12
wcwidth==0.2.13
# via prompt-toolkit
websockets==10.3
# via superdesk-core
Expand Down
4 changes: 4 additions & 0 deletions server/stt/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ def remove_date_portion_from_id(item_id: str) -> str:
if len(id_parts) == 5:
# Correct format to split, Remove the date portion of the ID
del id_parts[3]
elif len(id_parts) == 6:
# ID includes version, remove the date and version portions of the ID
del id_parts[5]
del id_parts[3]

return ":".join(id_parts)

Expand Down
3 changes: 3 additions & 0 deletions server/stt/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from superdesk.io.registry import register_feed_parser
from superdesk.io.feed_parsers.stt_newsml import STTNewsMLFeedParser, STT_LOCATION_MAP

from .common import remove_date_portion_from_id


NA = 'N/A'

Expand Down Expand Up @@ -71,6 +73,7 @@ def set_extra_fields(self, item, xml):
# newsItem guid
if 'uri' in item:
item.setdefault('extra', {})['newsItem_guid'] = item['uri']
item["uri"] = remove_date_portion_from_id(item["uri"])

# newsItem altId
try:
Expand Down
105 changes: 76 additions & 29 deletions server/stt/signal_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@

from superdesk import get_resource_service, signals
from superdesk.factory.app import SuperdeskEve
from superdesk.metadata.item import ITEM_STATE, CONTENT_STATE

from planning.common import WORKFLOW_STATE, ASSIGNMENT_WORKFLOW_STATE
from planning.signals import planning_created
from planning.signals import planning_ingested

from stt.stt_planning_ml import STTPlanningMLParser

Expand All @@ -19,48 +18,86 @@


def init_app(_app: SuperdeskEve):
planning_created.connect(after_planning_created)
planning_ingested.connect(link_coverages_to_content)
signals.item_publish.connect(before_content_published)


def after_planning_created(_sender: Any, item: Dict[str, Any]):
def link_coverages_to_content(_sender: Any, item: Dict[str, Any], original: Optional[Dict[str, Any]] = None):
"""Link coverage(s) to content upon ingest (if content exists)"""

try:
planning_id = item[config.ID_FIELD]
except KeyError:
logger.error("Failed to link planning with content, _id is missing")
return

if not len(item.get("coverages") or []):
# No coverages on this Planning item, no need to continue
return

try:
if len(item["coverages"]) == 1 and item["coverages"][0]["flags"]["placeholder"] is True:
# There is only 1 coverage, and it is a placeholder coverage, no need to continue
return
except (KeyError, IndexError, TypeError):
return

if not _is_ingested_by_stt_planning_ml(item):
return

updates = {"coverages": deepcopy(item["coverages"])}
coverage_id_to_content_id_map: Dict[str, str] = {}
delivery_service = get_resource_service("delivery")
planning_id = item.get(config.ID_FIELD)
updates = {"coverages": deepcopy(item.get("coverages") or [])}
coverages_updated = {}
planning_service = get_resource_service("planning")
for coverage in updates["coverages"]:
coverage_id = coverage.get("coverage_id")
deliveries = delivery_service.get(req=None, lookup={
try:
coverage_id = coverage["coverage_id"]
except KeyError:
logger.error("Failed to link coverage with content, coverage_id is missing")
continue

try:
if coverage["flags"]["placeholder"] is True:
# This is a placeholder coverage, and will never be attached to content
continue
except (KeyError, TypeError):
pass

# Get the deliveries that aren't linked to an Assignment
# These deliveries are added in ``STTPlanningMLParser._create_temp_assignment_deliveries``
deliveries = delivery_service.get_from_mongo(req=None, lookup={
"planning_id": planning_id,
"coverage_id": coverage_id,
})
content = _get_content_item_by_uris([
delivery["item_id"]
for delivery in deliveries
if delivery.get("item_id") is not None
])
"assignment_id": None,
"item_id": {"$ne": None}},
)
if not deliveries.count():
# No unlinked deliveries found for this Coverage
continue

content = _get_content_item_by_uris([delivery["item_id"] for delivery in deliveries])
if content is None:
# No content has been found
# Linking will occur when content is published (see ``before_content_published``)
continue

_update_coverage_assignment_details(coverage, content)
coverages_updated[coverage_id] = content
coverage_id_to_content_id_map[coverage_id] = content[config.ID_FIELD]

updated_item = get_resource_service("planning").patch(planning_id, updates)
updated_coverage_ids = coverages_updated.keys()
updated_coverage_ids = coverage_id_to_content_id_map.keys()
if not len(updated_coverage_ids):
# No coverages were updated, no need to update the Planning item or link any content
return

# Update the planning item with the latest Assignment information, and link the coverages to the content
updated_item = planning_service.patch(planning_id, updates)
for coverage in updated_item.get("coverages") or []:
coverage_id = coverage.get("coverage_id")
assignment_id = (coverage.get("assigned_to") or {}).get("assignment_id")
if coverage_id not in updated_coverage_ids or assignment_id is None:
continue

_link_assignment_and_content(assignment_id, coverage_id, coverages_updated[coverage_id]["_id"])
_link_assignment_and_content(assignment_id, coverage_id, coverage_id_to_content_id_map[coverage_id])


def before_content_published(_sender: Any, item: Dict[str, Any], updates: Dict[str, Any]):
Expand Down Expand Up @@ -110,18 +147,24 @@ def before_content_published(_sender: Any, item: Dict[str, Any], updates: Dict[s


def _is_ingested_by_stt_planning_ml(item: Dict[str, Any]) -> bool:
"""Determine if the item was ingested by the STTPlanningMLParser parser"""

ingest_provider_id = item.get("ingest_provider")
if item.get(ITEM_STATE) != CONTENT_STATE.INGESTED or ingest_provider_id is None:
"""Determine if the item was ingested by the ``STTPlanningMLParser`` parser"""

try:
if item["ingest_provider"] is None:
return False
ingest_provider_id = ObjectId(item["ingest_provider"])
ingest_provider = get_resource_service("ingest_providers").find_one(req=None, _id=ingest_provider_id)
return ingest_provider["feed_parser"] == STTPlanningMLParser.NAME
except (KeyError, TypeError):
return False

ingest_provider = get_resource_service("ingest_providers").find_one(req=None, _id=ObjectId(ingest_provider_id))
return ingest_provider is not None and ingest_provider.get("feed_parser") == STTPlanningMLParser.NAME


def _get_content_item_by_uris(uris: List[str]) -> Optional[Dict[str, Any]]:
"""Get content item(s) by uri"""
"""Get latest content item by uri"""

if not len(uris):
# No URIs were provided, so there
return None

req = ParsedRequest()
req.args = {
Expand Down Expand Up @@ -158,8 +201,12 @@ def _update_coverage_assignment_details(coverage: Dict[str, Any], content: Dict[
})


def _link_assignment_and_content(assignment_id: ObjectId, coverage_id: str, content_id: str,
skip_archive_update: Optional[bool] = False):
def _link_assignment_and_content(
assignment_id: ObjectId,
coverage_id: str,
content_id: str,
skip_archive_update: Optional[bool] = False
):
"""Remove all temporary delivery entries for this coverage and link assignment and content"""

get_resource_service("delivery").delete_action(lookup={"coverage_id": coverage_id})
Expand Down
Loading
Loading