diff --git a/main/utils.py b/main/utils.py
index fdfd60f3f..98f318d04 100644
--- a/main/utils.py
+++ b/main/utils.py
@@ -4,7 +4,7 @@
import re
from enum import Flag, auto
from pathlib import Path
-from typing import Tuple
+from typing import Optional, Tuple
from uuid import UUID, uuid4
from django.http import HttpRequest
@@ -109,3 +109,11 @@ def valid_key(key: str, request: HttpRequest) -> bool:
digest = hmac.new(key.encode("utf-8"), request.body, hashlib.sha1).hexdigest()
sig_parts = request.headers["X-Hub-Signature"].split("=", 1)
return hmac.compare_digest(sig_parts[1], digest)
+
+
+def truncate_words(content: str, length: int, suffix: Optional[str] = "...") -> str:
+ """Truncate text to < length chars, keeping words intact"""
+ if len(content) <= length:
+ return content
+ else:
+ return content[: (length - len(suffix))].rsplit(" ", 1)[0] + suffix
diff --git a/main/utils_test.py b/main/utils_test.py
index 6f8d7dbcc..9b439fd03 100644
--- a/main/utils_test.py
+++ b/main/utils_test.py
@@ -7,6 +7,7 @@
get_file_extension,
is_valid_uuid,
remove_trailing_slashes,
+ truncate_words,
valid_key,
)
@@ -95,3 +96,11 @@ def test_valid_key(mocker, key, is_valid):
headers={"X-Hub-Signature": "sha1=6a4e7673fa9c3afbb2860ae03ac2082958313a9c"},
)
assert valid_key(key, mock_request) is is_valid
+
+
+@pytest.mark.parametrize(
+ "text, truncated", [["Hello world", "Hello___"], ["HelloWorld", "HelloW___"]]
+)
+def test_truncate_words(text, truncated):
+ """ truncate_words returns expected result"""
+ assert truncate_words(text, 9, suffix="___") == truncated
diff --git a/static/js/components/PublishDrawer.tsx b/static/js/components/PublishDrawer.tsx
index 941009452..2987ab5c0 100644
--- a/static/js/components/PublishDrawer.tsx
+++ b/static/js/components/PublishDrawer.tsx
@@ -149,8 +149,7 @@ export default function PublishDrawer(props: Props): JSX.Element {
{website.content_warnings && !isEmpty(website.content_warnings) ? (
- This site is missing information that could affect publishing
- output.
+ This site has issues that could affect publishing output.
{website.content_warnings.map((warning: string, idx: number) => (
diff --git a/videos/constants.py b/videos/constants.py
index 9ddd5704f..1e8199f74 100644
--- a/videos/constants.py
+++ b/videos/constants.py
@@ -8,6 +8,8 @@
ALL_DESTINATIONS = [DESTINATION_YOUTUBE, DESTINATION_ARCHIVE]
YT_THUMBNAIL_IMG = "https://img.youtube.com/vi/{video_id}/default.jpg"
+YT_MAX_LENGTH_TITLE = 100
+YT_MAX_LENGTH_DESCRIPTION = 5000
class VideoStatus:
diff --git a/videos/youtube.py b/videos/youtube.py
index 7a0763efb..36e22525d 100644
--- a/videos/youtube.py
+++ b/videos/youtube.py
@@ -18,7 +18,12 @@
from smart_open.s3 import Reader
from content_sync.constants import VERSION_DRAFT, VERSION_LIVE
-from videos.constants import DESTINATION_YOUTUBE
+from main.utils import truncate_words
+from videos.constants import (
+ DESTINATION_YOUTUBE,
+ YT_MAX_LENGTH_DESCRIPTION,
+ YT_MAX_LENGTH_TITLE,
+)
from videos.messages import YouTubeUploadFailureMessage, YouTubeUploadSuccessMessage
from videos.models import VideoFile
from websites.api import is_ocw_site
@@ -209,7 +214,9 @@ def upload_video(self, videofile: VideoFile, privacy="unlisted"):
original_name = videofile.video.source_key.split("/")[-1]
request_body = dict(
snippet=dict(
- title=strip_bad_chars(original_name)[:100],
+ title=truncate_words(
+ strip_bad_chars(original_name), YT_MAX_LENGTH_TITLE
+ ),
description="",
categoryId=settings.YT_CATEGORY_ID,
),
@@ -304,8 +311,12 @@ def update_video(self, resource: WebsiteContent, privacy=None):
body={
"id": youtube_id,
"snippet": {
- "title": resource.title,
- "description": description,
+ "title": truncate_words(
+ strip_bad_chars(resource.title), YT_MAX_LENGTH_TITLE
+ ),
+ "description": truncate_words(
+ strip_bad_chars(description), YT_MAX_LENGTH_DESCRIPTION
+ ),
"tags": get_dict_field(metadata, settings.YT_FIELD_TAGS),
"categoryId": settings.YT_CATEGORY_ID,
},
diff --git a/videos/youtube_test.py b/videos/youtube_test.py
index d696deb58..14ecdc27f 100644
--- a/videos/youtube_test.py
+++ b/videos/youtube_test.py
@@ -12,7 +12,11 @@
from content_sync.constants import VERSION_DRAFT, VERSION_LIVE
from users.factories import UserFactory
from videos.conftest import MockHttpErrorResponse
-from videos.constants import DESTINATION_YOUTUBE
+from videos.constants import (
+ DESTINATION_YOUTUBE,
+ YT_MAX_LENGTH_DESCRIPTION,
+ YT_MAX_LENGTH_TITLE,
+)
from videos.factories import VideoFactory, VideoFileFactory
from videos.messages import YouTubeUploadFailureMessage, YouTubeUploadSuccessMessage
from videos.youtube import (
@@ -174,7 +178,7 @@ def test_upload_video_long_fields(mocker, youtube_mocker):
mock_upload = youtube_mocker().videos.return_value.insert
YouTubeApi().upload_video(video_file)
called_args, called_kwargs = mock_upload.call_args
- assert called_kwargs["body"]["snippet"]["title"] == name[:100]
+ assert called_kwargs["body"]["snippet"]["title"] == f"{name[:97]}..."
def test_delete_video(youtube_mocker):
@@ -191,20 +195,30 @@ def test_update_video(settings, mocker, youtube_mocker, privacy):
"""update_video should send the correct data in a request to update youtube metadata"""
speakers = "speaker1, speaker2"
tags = "tag1, tag2"
- youtube_id = "abc123"
- description = "video test description"
+ youtube_id = "test video description"
+ title = "TitleLngt>"
+ description = "DescLngth>"
content = WebsiteContentFactory.create(
+ title=" ".join([title for i in range(11)]),
metadata={
"resourcetype": RESOURCE_TYPE_VIDEO,
- "description": description,
+ "description": " ".join([description for _ in range(501)]),
"video_metadata": {
"youtube_id": youtube_id,
"video_tags": tags,
"video_speakers": speakers,
},
- }
+ },
)
+ expected_title = f'{" ".join([title.replace(">", "") for _ in range(9)])}...'
+ expected_desc = f'{" ".join([description.replace(">", "") for _ in range(499)])}...'
+
+ assert len(content.title) > YT_MAX_LENGTH_TITLE
+ assert len(content.metadata["description"]) > YT_MAX_LENGTH_DESCRIPTION
+ assert len(expected_title) <= YT_MAX_LENGTH_TITLE
+ assert len(expected_desc) <= YT_MAX_LENGTH_DESCRIPTION
+
mock_update_caption = mocker.patch("videos.youtube.YouTubeApi.update_captions")
YouTubeApi().update_video(content, privacy=privacy)
@@ -213,8 +227,8 @@ def test_update_video(settings, mocker, youtube_mocker, privacy):
body={
"id": youtube_id,
"snippet": {
- "title": content.title,
- "description": f"{description}\n\nSpeakers: {speakers}",
+ "title": expected_title,
+ "description": expected_desc,
"tags": tags,
"categoryId": settings.YT_CATEGORY_ID,
},
diff --git a/websites/api.py b/websites/api.py
index d3dd934c6..97402664d 100644
--- a/websites/api.py
+++ b/websites/api.py
@@ -7,14 +7,20 @@
from django.conf import settings
from django.core.files.uploadedfile import UploadedFile
-from django.db.models import Q, QuerySet
+from django.db.models import CharField, Q, QuerySet
+from django.db.models.fields.json import KeyTextTransform
+from django.db.models.functions import Cast, Length
from magic import Magic
from mitol.common.utils import max_or_none, now_in_utc
from mitol.mail.api import get_message_sender
from content_sync.constants import VERSION_DRAFT
from users.models import User
-from videos.constants import YT_THUMBNAIL_IMG
+from videos.constants import (
+ YT_MAX_LENGTH_DESCRIPTION,
+ YT_MAX_LENGTH_TITLE,
+ YT_THUMBNAIL_IMG,
+)
from websites.constants import (
CONTENT_FILENAME_MAX_LEN,
PUBLISH_STATUS_ABORTED,
@@ -198,7 +204,7 @@ def update_youtube_thumbnail(website_id: str, metadata: Dict, overwrite=False):
)
-def unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
+def videos_with_unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
"""Return a list of WebsiteContent objects for videos with unassigned youtube ids"""
if not is_ocw_site(website):
return []
@@ -217,6 +223,34 @@ def unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
)
+def videos_with_truncatable_text(website: Website) -> List[WebsiteContent]:
+ """Return a list of WebsiteContent objects with text fields that will be truncated in YouTube"""
+ if not is_ocw_site(website):
+ return []
+ query_resource_type_field = get_dict_query_field(
+ "metadata", settings.FIELD_RESOURCETYPE
+ )
+ return (
+ WebsiteContent.objects.annotate(
+ desc_len=Length(
+ Cast(
+ KeyTextTransform(settings.YT_FIELD_DESCRIPTION, "metadata"),
+ CharField(),
+ )
+ )
+ )
+ .annotate(title_len=Length("title"))
+ .filter(
+ Q(website=website)
+ & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO})
+ & (
+ Q(desc_len__gt=YT_MAX_LENGTH_DESCRIPTION)
+ | Q(title_len__gt=YT_MAX_LENGTH_TITLE)
+ )
+ )
+ )
+
+
def videos_missing_captions(website: Website) -> List[WebsiteContent]:
"""Return a list of WebsiteContent objects for videos with unassigned captions"""
if not is_ocw_site(website):
@@ -319,7 +353,7 @@ def incomplete_content_warnings(website):
Return array with error/warning messages for any website content missing expected data
(currently: video youtube ids and captions).
"""
- missing_youtube_ids = unassigned_youtube_ids(website)
+ missing_youtube_ids = videos_with_unassigned_youtube_ids(website)
missing_youtube_ids_titles = [video.title for video in missing_youtube_ids]
@@ -327,6 +361,10 @@ def incomplete_content_warnings(website):
video.title for video in videos_missing_captions(website)
]
+ truncatable_video_titles = [
+ video.title for video in videos_with_truncatable_text(website)
+ ]
+
messages = []
if len(missing_youtube_ids_titles) > 0:
@@ -337,5 +375,9 @@ def incomplete_content_warnings(website):
messages.append(
f"The following videos have missing captions: {', '.join(missing_captions_titles)}"
)
+ if len(truncatable_video_titles) > 0:
+ messages.append(
+ f"The following videos have titles or descriptions that will be truncated on YouTube: {', '.join(truncatable_video_titles)}"
+ )
return messages
diff --git a/websites/api_test.py b/websites/api_test.py
index 260bf69af..4c1a9669e 100644
--- a/websites/api_test.py
+++ b/websites/api_test.py
@@ -16,10 +16,11 @@
incomplete_content_warnings,
is_ocw_site,
mail_on_publish,
- unassigned_youtube_ids,
update_website_status,
update_youtube_thumbnail,
videos_missing_captions,
+ videos_with_truncatable_text,
+ videos_with_unassigned_youtube_ids,
)
from websites.constants import (
PUBLISH_STATUS_ERRORED,
@@ -228,7 +229,7 @@ def test_update_youtube_thumbnail(
@pytest.mark.parametrize("is_ocw", [True, False])
def test_unassigned_youtube_ids(mocker, is_ocw):
- """unassigned_youtube_ids should return WebsiteContent objects for videos with no youtube ids"""
+ """videos_with_unassigned_youtube_ids should return WebsiteContent objects for videos with no youtube ids"""
mocker.patch("websites.api.is_ocw_site", return_value=is_ocw)
website = WebsiteFactory.create()
WebsiteContentFactory.create_batch(
@@ -266,7 +267,7 @@ def test_unassigned_youtube_ids(mocker, is_ocw):
"video_metadata": {"youtube_id": "bad_data"},
},
)
- unassigned_content = unassigned_youtube_ids(website)
+ unassigned_content = videos_with_unassigned_youtube_ids(website)
if is_ocw:
assert len(unassigned_content) == 3
for content in videos_without_ids:
@@ -317,6 +318,40 @@ def test_videos_missing_captions(mocker, is_ocw):
assert len(unassigned_content) == 0
+@pytest.mark.parametrize("is_ocw", [True, False])
+def test_videos_with_truncatable_text(mocker, is_ocw):
+ """Videos with titles or descriptions that are too long should be returned"""
+ mocker.patch("websites.api.is_ocw_site", return_value=is_ocw)
+ website = WebsiteFactory.create()
+ title_descs = (
+ (" ".join(["TooLongTitle" for _ in range(10)]), "desc"),
+ ("title", " ".join(["TooLongDescription" for _ in range(500)])),
+ ("title", "desc"),
+ )
+ resources = []
+ for title, desc in title_descs:
+ resources.append(
+ WebsiteContentFactory.create(
+ website=website,
+ title=title,
+ metadata={
+ "description": desc,
+ "resourcetype": RESOURCE_TYPE_VIDEO,
+ "video_files": {"video_captions_file": "abc123"},
+ },
+ )
+ )
+ truncatable_content = videos_with_truncatable_text(website)
+ assert len(resources[1].metadata["description"]) > 5000
+
+ if is_ocw:
+ assert len(truncatable_content) == 2
+ for content in resources[0:2]:
+ assert content in truncatable_content
+ else:
+ assert truncatable_content == []
+
+
@pytest.mark.parametrize("success", [True, False])
@pytest.mark.parametrize("version", ["live", "draft"])
def test_mail_on_publish(settings, mocker, success, version, permission_groups):
@@ -416,14 +451,21 @@ def test_update_unpublished_website_status(status, version):
@pytest.mark.parametrize("has_missing_ids", [True, False])
@pytest.mark.parametrize("has_missing_captions", [True, False])
-def test_incomplete_content_warnings(mocker, has_missing_ids, has_missing_captions):
+@pytest.mark.parametrize("has_truncatable_text", [True, False])
+def test_incomplete_content_warnings(
+ mocker, has_missing_ids, has_missing_captions, has_truncatable_text
+):
"""incomplete_content_warnings should return expected warning messages"""
website = WebsiteFactory.create()
video_content = WebsiteContentFactory.create_batch(3, website=website)
no_yt_ids = video_content[0:2] if has_missing_ids else []
no_caps = video_content[1:3] if has_missing_captions else []
+ truncatable_vids = [video_content[2]] if has_truncatable_text else []
+ mocker.patch(
+ "websites.api.videos_with_truncatable_text", return_value=truncatable_vids
+ )
mocker.patch(
- "websites.api.unassigned_youtube_ids",
+ "websites.api.videos_with_unassigned_youtube_ids",
return_value=no_yt_ids,
)
mocker.patch(
@@ -431,11 +473,18 @@ def test_incomplete_content_warnings(mocker, has_missing_ids, has_missing_captio
return_value=no_caps,
)
warnings = incomplete_content_warnings(website)
+ warnings_len = 0
if has_missing_ids:
+ warnings_len += 1
for content in no_yt_ids:
assert content.title in warnings[0]
if has_missing_captions:
+ warnings_len += 1
for content in no_caps:
assert content.title in warnings[1 if has_missing_ids else 0]
- if not has_missing_ids and not has_missing_captions:
+ if has_truncatable_text:
+ warnings_len += 1
+ assert len(warnings) == warnings_len
+ assert video_content[2].title in warnings[warnings_len - 1]
+ if not has_missing_ids and not has_missing_captions and not has_truncatable_text:
assert warnings == []