Enforce youtube length limits when uploading/updating title, descript…

…ion (#1009)
mitodl · Feb 24, 2022 · c7ecb1a · c7ecb1a
1 parent e2a38eb
commit c7ecb1a
Show file tree

Hide file tree

Showing 8 changed files with 159 additions and 25 deletions.
diff --git a/main/utils.py b/main/utils.py
@@ -4,7 +4,7 @@
 import re
 from enum import Flag, auto
 from pathlib import Path
-from typing import Tuple
+from typing import Optional, Tuple
 from uuid import UUID, uuid4
 
 from django.http import HttpRequest
@@ -109,3 +109,11 @@ def valid_key(key: str, request: HttpRequest) -> bool:
     digest = hmac.new(key.encode("utf-8"), request.body, hashlib.sha1).hexdigest()
     sig_parts = request.headers["X-Hub-Signature"].split("=", 1)
     return hmac.compare_digest(sig_parts[1], digest)
+
+
+def truncate_words(content: str, length: int, suffix: Optional[str] = "...") -> str:
+    """Truncate text to < length chars, keeping words intact"""
+    if len(content) <= length:
+        return content
+    else:
+        return content[: (length - len(suffix))].rsplit(" ", 1)[0] + suffix
diff --git a/main/utils_test.py b/main/utils_test.py
@@ -7,6 +7,7 @@
     get_file_extension,
     is_valid_uuid,
     remove_trailing_slashes,
+    truncate_words,
     valid_key,
 )
 
@@ -95,3 +96,11 @@ def test_valid_key(mocker, key, is_valid):
         headers={"X-Hub-Signature": "sha1=6a4e7673fa9c3afbb2860ae03ac2082958313a9c"},
     )
     assert valid_key(key, mock_request) is is_valid
+
+
+@pytest.mark.parametrize(
+    "text, truncated", [["Hello world", "Hello___"], ["HelloWorld", "HelloW___"]]
+)
+def test_truncate_words(text, truncated):
+    """ truncate_words returns expected result"""
+    assert truncate_words(text, 9, suffix="___") == truncated
diff --git a/static/js/components/PublishDrawer.tsx b/static/js/components/PublishDrawer.tsx
@@ -149,8 +149,7 @@ export default function PublishDrawer(props: Props): JSX.Element {
         {website.content_warnings && !isEmpty(website.content_warnings) ? (
           <div className="publish-warnings pt-2">
             <strong className="text-danger">
-              This site is missing information that could affect publishing
-              output.
+              This site has issues that could affect publishing output.
             </strong>
             <ul className="text-danger">
               {website.content_warnings.map((warning: string, idx: number) => (

diff --git a/videos/constants.py b/videos/constants.py
@@ -8,6 +8,8 @@
 ALL_DESTINATIONS = [DESTINATION_YOUTUBE, DESTINATION_ARCHIVE]
 
 YT_THUMBNAIL_IMG = "https://img.youtube.com/vi/{video_id}/default.jpg"
+YT_MAX_LENGTH_TITLE = 100
+YT_MAX_LENGTH_DESCRIPTION = 5000
 
 
 class VideoStatus:

diff --git a/videos/youtube.py b/videos/youtube.py
@@ -18,7 +18,12 @@
 from smart_open.s3 import Reader
 
 from content_sync.constants import VERSION_DRAFT, VERSION_LIVE
-from videos.constants import DESTINATION_YOUTUBE
+from main.utils import truncate_words
+from videos.constants import (
+    DESTINATION_YOUTUBE,
+    YT_MAX_LENGTH_DESCRIPTION,
+    YT_MAX_LENGTH_TITLE,
+)
 from videos.messages import YouTubeUploadFailureMessage, YouTubeUploadSuccessMessage
 from videos.models import VideoFile
 from websites.api import is_ocw_site
@@ -209,7 +214,9 @@ def upload_video(self, videofile: VideoFile, privacy="unlisted"):
         original_name = videofile.video.source_key.split("/")[-1]
         request_body = dict(
             snippet=dict(
-                title=strip_bad_chars(original_name)[:100],
+                title=truncate_words(
+                    strip_bad_chars(original_name), YT_MAX_LENGTH_TITLE
+                ),
                 description="",
                 categoryId=settings.YT_CATEGORY_ID,
             ),
@@ -304,8 +311,12 @@ def update_video(self, resource: WebsiteContent, privacy=None):
             body={
                 "id": youtube_id,
                 "snippet": {
-                    "title": resource.title,
-                    "description": description,
+                    "title": truncate_words(
+                        strip_bad_chars(resource.title), YT_MAX_LENGTH_TITLE
+                    ),
+                    "description": truncate_words(
+                        strip_bad_chars(description), YT_MAX_LENGTH_DESCRIPTION
+                    ),
                     "tags": get_dict_field(metadata, settings.YT_FIELD_TAGS),
                     "categoryId": settings.YT_CATEGORY_ID,
                 },

diff --git a/videos/youtube_test.py b/videos/youtube_test.py
@@ -12,7 +12,11 @@
 from content_sync.constants import VERSION_DRAFT, VERSION_LIVE
 from users.factories import UserFactory
 from videos.conftest import MockHttpErrorResponse
-from videos.constants import DESTINATION_YOUTUBE
+from videos.constants import (
+    DESTINATION_YOUTUBE,
+    YT_MAX_LENGTH_DESCRIPTION,
+    YT_MAX_LENGTH_TITLE,
+)
 from videos.factories import VideoFactory, VideoFileFactory
 from videos.messages import YouTubeUploadFailureMessage, YouTubeUploadSuccessMessage
 from videos.youtube import (
@@ -174,7 +178,7 @@ def test_upload_video_long_fields(mocker, youtube_mocker):
     mock_upload = youtube_mocker().videos.return_value.insert
     YouTubeApi().upload_video(video_file)
     called_args, called_kwargs = mock_upload.call_args
-    assert called_kwargs["body"]["snippet"]["title"] == name[:100]
+    assert called_kwargs["body"]["snippet"]["title"] == f"{name[:97]}..."
 
 
 def test_delete_video(youtube_mocker):
@@ -191,20 +195,30 @@ def test_update_video(settings, mocker, youtube_mocker, privacy):
     """update_video should send the correct data in a request to update youtube metadata"""
     speakers = "speaker1, speaker2"
     tags = "tag1, tag2"
-    youtube_id = "abc123"
-    description = "video test description"
+    youtube_id = "test video description"
+    title = "TitleLngt>"
+    description = "DescLngth>"
     content = WebsiteContentFactory.create(
+        title=" ".join([title for i in range(11)]),
         metadata={
             "resourcetype": RESOURCE_TYPE_VIDEO,
-            "description": description,
+            "description": " ".join([description for _ in range(501)]),
             "video_metadata": {
                 "youtube_id": youtube_id,
                 "video_tags": tags,
                 "video_speakers": speakers,
             },
-        }
+        },
     )
 
+    expected_title = f'{" ".join([title.replace(">", "") for _ in range(9)])}...'
+    expected_desc = f'{" ".join([description.replace(">", "") for _ in range(499)])}...'
+
+    assert len(content.title) > YT_MAX_LENGTH_TITLE
+    assert len(content.metadata["description"]) > YT_MAX_LENGTH_DESCRIPTION
+    assert len(expected_title) <= YT_MAX_LENGTH_TITLE
+    assert len(expected_desc) <= YT_MAX_LENGTH_DESCRIPTION
+
     mock_update_caption = mocker.patch("videos.youtube.YouTubeApi.update_captions")
 
     YouTubeApi().update_video(content, privacy=privacy)
@@ -213,8 +227,8 @@ def test_update_video(settings, mocker, youtube_mocker, privacy):
         body={
             "id": youtube_id,
             "snippet": {
-                "title": content.title,
-                "description": f"{description}\n\nSpeakers: {speakers}",
+                "title": expected_title,
+                "description": expected_desc,
                 "tags": tags,
                 "categoryId": settings.YT_CATEGORY_ID,
             },

diff --git a/websites/api.py b/websites/api.py
@@ -7,14 +7,20 @@
 
 from django.conf import settings
 from django.core.files.uploadedfile import UploadedFile
-from django.db.models import Q, QuerySet
+from django.db.models import CharField, Q, QuerySet
+from django.db.models.fields.json import KeyTextTransform
+from django.db.models.functions import Cast, Length
 from magic import Magic
 from mitol.common.utils import max_or_none, now_in_utc
 from mitol.mail.api import get_message_sender
 
 from content_sync.constants import VERSION_DRAFT
 from users.models import User
-from videos.constants import YT_THUMBNAIL_IMG
+from videos.constants import (
+    YT_MAX_LENGTH_DESCRIPTION,
+    YT_MAX_LENGTH_TITLE,
+    YT_THUMBNAIL_IMG,
+)
 from websites.constants import (
     CONTENT_FILENAME_MAX_LEN,
     PUBLISH_STATUS_ABORTED,
@@ -198,7 +204,7 @@ def update_youtube_thumbnail(website_id: str, metadata: Dict, overwrite=False):
             )
 
 
-def unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
+def videos_with_unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
     """Return a list of WebsiteContent objects for videos with unassigned youtube ids"""
     if not is_ocw_site(website):
         return []
@@ -217,6 +223,34 @@ def unassigned_youtube_ids(website: Website) -> List[WebsiteContent]:
     )
 
 
+def videos_with_truncatable_text(website: Website) -> List[WebsiteContent]:
+    """Return a list of WebsiteContent objects with text fields that will be truncated in YouTube"""
+    if not is_ocw_site(website):
+        return []
+    query_resource_type_field = get_dict_query_field(
+        "metadata", settings.FIELD_RESOURCETYPE
+    )
+    return (
+        WebsiteContent.objects.annotate(
+            desc_len=Length(
+                Cast(
+                    KeyTextTransform(settings.YT_FIELD_DESCRIPTION, "metadata"),
+                    CharField(),
+                )
+            )
+        )
+        .annotate(title_len=Length("title"))
+        .filter(
+            Q(website=website)
+            & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO})
+            & (
+                Q(desc_len__gt=YT_MAX_LENGTH_DESCRIPTION)
+                | Q(title_len__gt=YT_MAX_LENGTH_TITLE)
+            )
+        )
+    )
+
+
 def videos_missing_captions(website: Website) -> List[WebsiteContent]:
     """Return a list of WebsiteContent objects for videos with unassigned captions"""
     if not is_ocw_site(website):
@@ -319,14 +353,18 @@ def incomplete_content_warnings(website):
     Return array with error/warning messages for any website content missing expected data
     (currently: video youtube ids and captions).
     """
-    missing_youtube_ids = unassigned_youtube_ids(website)
+    missing_youtube_ids = videos_with_unassigned_youtube_ids(website)
 
     missing_youtube_ids_titles = [video.title for video in missing_youtube_ids]
 
     missing_captions_titles = [
         video.title for video in videos_missing_captions(website)
     ]
 
+    truncatable_video_titles = [
+        video.title for video in videos_with_truncatable_text(website)
+    ]
+
     messages = []
 
     if len(missing_youtube_ids_titles) > 0:
@@ -337,5 +375,9 @@ def incomplete_content_warnings(website):
         messages.append(
             f"The following videos have missing captions: {', '.join(missing_captions_titles)}"
         )
+    if len(truncatable_video_titles) > 0:
+        messages.append(
+            f"The following videos have titles or descriptions that will be truncated on YouTube: {', '.join(truncatable_video_titles)}"
+        )
 
     return messages
diff --git a/websites/api_test.py b/websites/api_test.py
@@ -16,10 +16,11 @@
     incomplete_content_warnings,
     is_ocw_site,
     mail_on_publish,
-    unassigned_youtube_ids,
     update_website_status,
     update_youtube_thumbnail,
     videos_missing_captions,
+    videos_with_truncatable_text,
+    videos_with_unassigned_youtube_ids,
 )
 from websites.constants import (
     PUBLISH_STATUS_ERRORED,
@@ -228,7 +229,7 @@ def test_update_youtube_thumbnail(
 
 @pytest.mark.parametrize("is_ocw", [True, False])
 def test_unassigned_youtube_ids(mocker, is_ocw):
-    """unassigned_youtube_ids should return WebsiteContent objects for videos with no youtube ids"""
+    """videos_with_unassigned_youtube_ids should return WebsiteContent objects for videos with no youtube ids"""
     mocker.patch("websites.api.is_ocw_site", return_value=is_ocw)
     website = WebsiteFactory.create()
     WebsiteContentFactory.create_batch(
@@ -266,7 +267,7 @@ def test_unassigned_youtube_ids(mocker, is_ocw):
             "video_metadata": {"youtube_id": "bad_data"},
         },
     )
-    unassigned_content = unassigned_youtube_ids(website)
+    unassigned_content = videos_with_unassigned_youtube_ids(website)
     if is_ocw:
         assert len(unassigned_content) == 3
         for content in videos_without_ids:
@@ -317,6 +318,40 @@ def test_videos_missing_captions(mocker, is_ocw):
         assert len(unassigned_content) == 0
 
 
+@pytest.mark.parametrize("is_ocw", [True, False])
+def test_videos_with_truncatable_text(mocker, is_ocw):
+    """Videos with titles or descriptions that are too long should be returned"""
+    mocker.patch("websites.api.is_ocw_site", return_value=is_ocw)
+    website = WebsiteFactory.create()
+    title_descs = (
+        (" ".join(["TooLongTitle" for _ in range(10)]), "desc"),
+        ("title", " ".join(["TooLongDescription" for _ in range(500)])),
+        ("title", "desc"),
+    )
+    resources = []
+    for title, desc in title_descs:
+        resources.append(
+            WebsiteContentFactory.create(
+                website=website,
+                title=title,
+                metadata={
+                    "description": desc,
+                    "resourcetype": RESOURCE_TYPE_VIDEO,
+                    "video_files": {"video_captions_file": "abc123"},
+                },
+            )
+        )
+    truncatable_content = videos_with_truncatable_text(website)
+    assert len(resources[1].metadata["description"]) > 5000
+
+    if is_ocw:
+        assert len(truncatable_content) == 2
+        for content in resources[0:2]:
+            assert content in truncatable_content
+    else:
+        assert truncatable_content == []
+
+
 @pytest.mark.parametrize("success", [True, False])
 @pytest.mark.parametrize("version", ["live", "draft"])
 def test_mail_on_publish(settings, mocker, success, version, permission_groups):
@@ -416,26 +451,40 @@ def test_update_unpublished_website_status(status, version):
 
 @pytest.mark.parametrize("has_missing_ids", [True, False])
 @pytest.mark.parametrize("has_missing_captions", [True, False])
-def test_incomplete_content_warnings(mocker, has_missing_ids, has_missing_captions):
+@pytest.mark.parametrize("has_truncatable_text", [True, False])
+def test_incomplete_content_warnings(
+    mocker, has_missing_ids, has_missing_captions, has_truncatable_text
+):
     """incomplete_content_warnings should return expected warning messages"""
     website = WebsiteFactory.create()
     video_content = WebsiteContentFactory.create_batch(3, website=website)
     no_yt_ids = video_content[0:2] if has_missing_ids else []
     no_caps = video_content[1:3] if has_missing_captions else []
+    truncatable_vids = [video_content[2]] if has_truncatable_text else []
+    mocker.patch(
+        "websites.api.videos_with_truncatable_text", return_value=truncatable_vids
+    )
     mocker.patch(
-        "websites.api.unassigned_youtube_ids",
+        "websites.api.videos_with_unassigned_youtube_ids",
         return_value=no_yt_ids,
     )
     mocker.patch(
         "websites.api.videos_missing_captions",
         return_value=no_caps,
     )
     warnings = incomplete_content_warnings(website)
+    warnings_len = 0
     if has_missing_ids:
+        warnings_len += 1
         for content in no_yt_ids:
             assert content.title in warnings[0]
     if has_missing_captions:
+        warnings_len += 1
         for content in no_caps:
             assert content.title in warnings[1 if has_missing_ids else 0]
-    if not has_missing_ids and not has_missing_captions:
+    if has_truncatable_text:
+        warnings_len += 1
+        assert len(warnings) == warnings_len
+        assert video_content[2].title in warnings[warnings_len - 1]
+    if not has_missing_ids and not has_missing_captions and not has_truncatable_text:
         assert warnings == []