Skip to content

Commit

Permalink
Enforce youtube length limits when uploading/updating title, descript…
Browse files Browse the repository at this point in the history
…ion (#1009)
  • Loading branch information
mbertrand authored Feb 24, 2022
1 parent e2a38eb commit c7ecb1a
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 25 deletions.
10 changes: 9 additions & 1 deletion main/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re
from enum import Flag, auto
from pathlib import Path
from typing import Tuple
from typing import Optional, Tuple
from uuid import UUID, uuid4

from django.http import HttpRequest
Expand Down Expand Up @@ -109,3 +109,11 @@ def valid_key(key: str, request: HttpRequest) -> bool:
digest = hmac.new(key.encode("utf-8"), request.body, hashlib.sha1).hexdigest()
sig_parts = request.headers["X-Hub-Signature"].split("=", 1)
return hmac.compare_digest(sig_parts[1], digest)


def truncate_words(content: str, length: int, suffix: Optional[str] = "...") -> str:
"""Truncate text to < length chars, keeping words intact"""
if len(content) <= length:
return content
else:
return content[: (length - len(suffix))].rsplit(" ", 1)[0] + suffix
9 changes: 9 additions & 0 deletions main/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
get_file_extension,
is_valid_uuid,
remove_trailing_slashes,
truncate_words,
valid_key,
)

Expand Down Expand Up @@ -95,3 +96,11 @@ def test_valid_key(mocker, key, is_valid):
headers={"X-Hub-Signature": "sha1=6a4e7673fa9c3afbb2860ae03ac2082958313a9c"},
)
assert valid_key(key, mock_request) is is_valid


@pytest.mark.parametrize(
"text, truncated", [["Hello world", "Hello___"], ["HelloWorld", "HelloW___"]]
)
def test_truncate_words(text, truncated):
""" truncate_words returns expected result"""
assert truncate_words(text, 9, suffix="___") == truncated
3 changes: 1 addition & 2 deletions static/js/components/PublishDrawer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,7 @@ export default function PublishDrawer(props: Props): JSX.Element {
{website.content_warnings && !isEmpty(website.content_warnings) ? (
<div className="publish-warnings pt-2">
<strong className="text-danger">
This site is missing information that could affect publishing
output.
This site has issues that could affect publishing output.
</strong>
<ul className="text-danger">
{website.content_warnings.map((warning: string, idx: number) => (
Expand Down
2 changes: 2 additions & 0 deletions videos/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
ALL_DESTINATIONS = [DESTINATION_YOUTUBE, DESTINATION_ARCHIVE]

YT_THUMBNAIL_IMG = "https://img.youtube.com/vi/{video_id}/default.jpg"
YT_MAX_LENGTH_TITLE = 100
YT_MAX_LENGTH_DESCRIPTION = 5000


class VideoStatus:
Expand Down
19 changes: 15 additions & 4 deletions videos/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
from smart_open.s3 import Reader

from content_sync.constants import VERSION_DRAFT, VERSION_LIVE
from videos.constants import DESTINATION_YOUTUBE
from main.utils import truncate_words
from videos.constants import (
DESTINATION_YOUTUBE,
YT_MAX_LENGTH_DESCRIPTION,
YT_MAX_LENGTH_TITLE,
)
from videos.messages import YouTubeUploadFailureMessage, YouTubeUploadSuccessMessage
from videos.models import VideoFile
from websites.api import is_ocw_site
Expand Down Expand Up @@ -209,7 +214,9 @@ def upload_video(self, videofile: VideoFile, privacy="unlisted"):
original_name = videofile.video.source_key.split("/")[-1]
request_body = dict(
snippet=dict(
title=strip_bad_chars(original_name)[:100],
title=truncate_words(
strip_bad_chars(original_name), YT_MAX_LENGTH_TITLE
),
description="",
categoryId=settings.YT_CATEGORY_ID,
),
Expand Down Expand Up @@ -304,8 +311,12 @@ def update_video(self, resource: WebsiteContent, privacy=None):
body={
"id": youtube_id,
"snippet": {
"title": resource.title,
"description": description,
"title": truncate_words(
strip_bad_chars(resource.title), YT_MAX_LENGTH_TITLE
),
"description": truncate_words(
strip_bad_chars(description), YT_MAX_LENGTH_DESCRIPTION
),
"tags": get_dict_field(metadata, settings.YT_FIELD_TAGS),
"categoryId": settings.YT_CATEGORY_ID,
},
Expand Down
30 changes: 22 additions & 8 deletions videos/youtube_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
from content_sync.constants import VERSION_DRAFT, VERSION_LIVE
from users.factories import UserFactory
from videos.conftest import MockHttpErrorResponse
from videos.constants import DESTINATION_YOUTUBE
from videos.constants import (
DESTINATION_YOUTUBE,
YT_MAX_LENGTH_DESCRIPTION,
YT_MAX_LENGTH_TITLE,
)
from videos.factories import VideoFactory, VideoFileFactory
from videos.messages import YouTubeUploadFailureMessage, YouTubeUploadSuccessMessage
from videos.youtube import (
Expand Down Expand Up @@ -174,7 +178,7 @@ def test_upload_video_long_fields(mocker, youtube_mocker):
mock_upload = youtube_mocker().videos.return_value.insert
YouTubeApi().upload_video(video_file)
called_args, called_kwargs = mock_upload.call_args
assert called_kwargs["body"]["snippet"]["title"] == name[:100]
assert called_kwargs["body"]["snippet"]["title"] == f"{name[:97]}..."


def test_delete_video(youtube_mocker):
Expand All @@ -191,20 +195,30 @@ def test_update_video(settings, mocker, youtube_mocker, privacy):
"""update_video should send the correct data in a request to update youtube metadata"""
speakers = "speaker1, speaker2"
tags = "tag1, tag2"
youtube_id = "abc123"
description = "video test description"
youtube_id = "test video description"
title = "TitleLngt>"
description = "DescLngth>"
content = WebsiteContentFactory.create(
title=" ".join([title for i in range(11)]),
metadata={
"resourcetype": RESOURCE_TYPE_VIDEO,
"description": description,
"description": " ".join([description for _ in range(501)]),
"video_metadata": {
"youtube_id": youtube_id,
"video_tags": tags,
"video_speakers": speakers,
},
}
},
)

expected_title = f'{" ".join([title.replace(">", "") for _ in range(9)])}...'
expected_desc = f'{" ".join([description.replace(">", "") for _ in range(499)])}...'

assert len(content.title) > YT_MAX_LENGTH_TITLE
assert len(content.metadata["description"]) > YT_MAX_LENGTH_DESCRIPTION
assert len(expected_title) <= YT_MAX_LENGTH_TITLE
assert len(expected_desc) <= YT_MAX_LENGTH_DESCRIPTION

mock_update_caption = mocker.patch("videos.youtube.YouTubeApi.update_captions")

YouTubeApi().update_video(content, privacy=privacy)
Expand All @@ -213,8 +227,8 @@ def test_update_video(settings, mocker, youtube_mocker, privacy):
body={
"id": youtube_id,
"snippet": {
"title": content.title,
"description": f"{description}\n\nSpeakers: {speakers}",
"title": expected_title,
"description": expected_desc,
"tags": tags,
"categoryId": settings.YT_CATEGORY_ID,
},
Expand Down
50 changes: 46 additions & 4 deletions websites/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,20 @@

from django.conf import settings
from django.core.files.uploadedfile import UploadedFile
from django.db.models import Q, QuerySet
from django.db.models import CharField, Q, QuerySet
from django.db.models.fields.json import KeyTextTransform
from django.db.models.functions import Cast, Length
from magic import Magic
from mitol.common.utils import max_or_none, now_in_utc
from mitol.mail.api import get_message_sender

from content_sync.constants import VERSION_DRAFT
from users.models import User
from videos.constants import YT_THUMBNAIL_IMG
from videos.constants import (
YT_MAX_LENGTH_DESCRIPTION,
YT_MAX_LENGTH_TITLE,
YT_THUMBNAIL_IMG,
)
from websites.constants import (
CONTENT_FILENAME_MAX_LEN,
PUBLISH_STATUS_ABORTED,
Expand Down Expand Up @@ -198,7 +204,7 @@ def update_youtube_thumbnail(website_id: str, metadata: Dict, overwrite=False):
)


def unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
def videos_with_unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
"""Return a list of WebsiteContent objects for videos with unassigned youtube ids"""
if not is_ocw_site(website):
return []
Expand All @@ -217,6 +223,34 @@ def unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
)


def videos_with_truncatable_text(website: Website) -> List[WebsiteContent]:
"""Return a list of WebsiteContent objects with text fields that will be truncated in YouTube"""
if not is_ocw_site(website):
return []
query_resource_type_field = get_dict_query_field(
"metadata", settings.FIELD_RESOURCETYPE
)
return (
WebsiteContent.objects.annotate(
desc_len=Length(
Cast(
KeyTextTransform(settings.YT_FIELD_DESCRIPTION, "metadata"),
CharField(),
)
)
)
.annotate(title_len=Length("title"))
.filter(
Q(website=website)
& Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO})
& (
Q(desc_len__gt=YT_MAX_LENGTH_DESCRIPTION)
| Q(title_len__gt=YT_MAX_LENGTH_TITLE)
)
)
)


def videos_missing_captions(website: Website) -> List[WebsiteContent]:
"""Return a list of WebsiteContent objects for videos with unassigned captions"""
if not is_ocw_site(website):
Expand Down Expand Up @@ -319,14 +353,18 @@ def incomplete_content_warnings(website):
Return array with error/warning messages for any website content missing expected data
(currently: video youtube ids and captions).
"""
missing_youtube_ids = unassigned_youtube_ids(website)
missing_youtube_ids = videos_with_unassigned_youtube_ids(website)

missing_youtube_ids_titles = [video.title for video in missing_youtube_ids]

missing_captions_titles = [
video.title for video in videos_missing_captions(website)
]

truncatable_video_titles = [
video.title for video in videos_with_truncatable_text(website)
]

messages = []

if len(missing_youtube_ids_titles) > 0:
Expand All @@ -337,5 +375,9 @@ def incomplete_content_warnings(website):
messages.append(
f"The following videos have missing captions: {', '.join(missing_captions_titles)}"
)
if len(truncatable_video_titles) > 0:
messages.append(
f"The following videos have titles or descriptions that will be truncated on YouTube: {', '.join(truncatable_video_titles)}"
)

return messages
61 changes: 55 additions & 6 deletions websites/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
incomplete_content_warnings,
is_ocw_site,
mail_on_publish,
unassigned_youtube_ids,
update_website_status,
update_youtube_thumbnail,
videos_missing_captions,
videos_with_truncatable_text,
videos_with_unassigned_youtube_ids,
)
from websites.constants import (
PUBLISH_STATUS_ERRORED,
Expand Down Expand Up @@ -228,7 +229,7 @@ def test_update_youtube_thumbnail(

@pytest.mark.parametrize("is_ocw", [True, False])
def test_unassigned_youtube_ids(mocker, is_ocw):
"""unassigned_youtube_ids should return WebsiteContent objects for videos with no youtube ids"""
"""videos_with_unassigned_youtube_ids should return WebsiteContent objects for videos with no youtube ids"""
mocker.patch("websites.api.is_ocw_site", return_value=is_ocw)
website = WebsiteFactory.create()
WebsiteContentFactory.create_batch(
Expand Down Expand Up @@ -266,7 +267,7 @@ def test_unassigned_youtube_ids(mocker, is_ocw):
"video_metadata": {"youtube_id": "bad_data"},
},
)
unassigned_content = unassigned_youtube_ids(website)
unassigned_content = videos_with_unassigned_youtube_ids(website)
if is_ocw:
assert len(unassigned_content) == 3
for content in videos_without_ids:
Expand Down Expand Up @@ -317,6 +318,40 @@ def test_videos_missing_captions(mocker, is_ocw):
assert len(unassigned_content) == 0


@pytest.mark.parametrize("is_ocw", [True, False])
def test_videos_with_truncatable_text(mocker, is_ocw):
"""Videos with titles or descriptions that are too long should be returned"""
mocker.patch("websites.api.is_ocw_site", return_value=is_ocw)
website = WebsiteFactory.create()
title_descs = (
(" ".join(["TooLongTitle" for _ in range(10)]), "desc"),
("title", " ".join(["TooLongDescription" for _ in range(500)])),
("title", "desc"),
)
resources = []
for title, desc in title_descs:
resources.append(
WebsiteContentFactory.create(
website=website,
title=title,
metadata={
"description": desc,
"resourcetype": RESOURCE_TYPE_VIDEO,
"video_files": {"video_captions_file": "abc123"},
},
)
)
truncatable_content = videos_with_truncatable_text(website)
assert len(resources[1].metadata["description"]) > 5000

if is_ocw:
assert len(truncatable_content) == 2
for content in resources[0:2]:
assert content in truncatable_content
else:
assert truncatable_content == []


@pytest.mark.parametrize("success", [True, False])
@pytest.mark.parametrize("version", ["live", "draft"])
def test_mail_on_publish(settings, mocker, success, version, permission_groups):
Expand Down Expand Up @@ -416,26 +451,40 @@ def test_update_unpublished_website_status(status, version):

@pytest.mark.parametrize("has_missing_ids", [True, False])
@pytest.mark.parametrize("has_missing_captions", [True, False])
def test_incomplete_content_warnings(mocker, has_missing_ids, has_missing_captions):
@pytest.mark.parametrize("has_truncatable_text", [True, False])
def test_incomplete_content_warnings(
mocker, has_missing_ids, has_missing_captions, has_truncatable_text
):
"""incomplete_content_warnings should return expected warning messages"""
website = WebsiteFactory.create()
video_content = WebsiteContentFactory.create_batch(3, website=website)
no_yt_ids = video_content[0:2] if has_missing_ids else []
no_caps = video_content[1:3] if has_missing_captions else []
truncatable_vids = [video_content[2]] if has_truncatable_text else []
mocker.patch(
"websites.api.videos_with_truncatable_text", return_value=truncatable_vids
)
mocker.patch(
"websites.api.unassigned_youtube_ids",
"websites.api.videos_with_unassigned_youtube_ids",
return_value=no_yt_ids,
)
mocker.patch(
"websites.api.videos_missing_captions",
return_value=no_caps,
)
warnings = incomplete_content_warnings(website)
warnings_len = 0
if has_missing_ids:
warnings_len += 1
for content in no_yt_ids:
assert content.title in warnings[0]
if has_missing_captions:
warnings_len += 1
for content in no_caps:
assert content.title in warnings[1 if has_missing_ids else 0]
if not has_missing_ids and not has_missing_captions:
if has_truncatable_text:
warnings_len += 1
assert len(warnings) == warnings_len
assert video_content[2].title in warnings[warnings_len - 1]
if not has_missing_ids and not has_missing_captions and not has_truncatable_text:
assert warnings == []

0 comments on commit c7ecb1a

Please sign in to comment.