From b15828ef34a8e4d9fa43ea8b6e15e65225c8faeb Mon Sep 17 00:00:00 2001 From: Umar Hassan Date: Wed, 31 Jul 2024 20:35:47 +0500 Subject: [PATCH 1/2] Management command for gdrive file sync (#2257) * initial commit to add management command for gdrive file sync * added backend sync for gdrive sync command * added drive file update for the broken resource file * added comments * fixed broken filename parameter * updated DriveFile status update process * set filter argument to required * added info log for drive file name while processing * replaced logger with std.write --- .../management/commands/sync_gdrive_files.py | 80 +++++++++++++++++++ main/constants.py | 2 + main/management/commands/filter.py | 4 +- 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 gdrive_sync/management/commands/sync_gdrive_files.py diff --git a/gdrive_sync/management/commands/sync_gdrive_files.py b/gdrive_sync/management/commands/sync_gdrive_files.py new file mode 100644 index 000000000..4eace5b30 --- /dev/null +++ b/gdrive_sync/management/commands/sync_gdrive_files.py @@ -0,0 +1,80 @@ +"""Sync GDrive files with DB resources.""" + +import logging + +from content_sync.api import get_sync_backend +from gdrive_sync.api import create_gdrive_resource_content, process_file_result +from gdrive_sync.models import DriveFile +from gdrive_sync.tasks import _get_gdrive_files, process_drive_file +from main.management.commands.filter import WebsiteFilterCommand +from websites.models import Website + +log = logging.getLogger(__name__) + + +class Command(WebsiteFilterCommand): + """ + Sync GDrive files with DB resources. + + Usage Examples + + ./manage.py sync_gdrive_files --filter course-id + ./manage.py sync_gdrive_files --filter course-id --filename filename1 + """ + + help = __doc__ + + def add_arguments(self, parser): + super().add_arguments(parser, is_filter_required=True) + + parser.add_argument( + "--filename", + dest="filename", + default="", + help="If specified, only trigger processing files whose names are in this comma-delimited list", # noqa: E501 + ) + + def handle(self, *args, **options): + super().handle(*args, **options) + + filenames = options["filename"] + + if filenames: + filenames = [ + filename.strip() for filename in filenames.split(",") if filename + ] + + website_queryset = self.filter_websites(websites=Website.objects.all()) + + for website in website_queryset: + gdrive_subfolder_files, _ = _get_gdrive_files(website) + + for gdrive_files in gdrive_subfolder_files.values(): + for gdrive_file in gdrive_files: + if filenames and gdrive_file["name"] not in filenames: + continue + + try: + self.stdout.write( + f"Processing GDrive file {gdrive_file['name']} for {website.short_id}" # noqa: E501 + ) + + # Add/Update Drivefile objects and perform necessary operations + process_file_result(gdrive_file, website) + + # Upload to S3 and transcoding operations if video + process_drive_file.apply((gdrive_file["id"],)) + + # Get the related drive file and update status + drive_file = DriveFile.objects.get(file_id=gdrive_file["id"]) + create_gdrive_resource_content(drive_file) + + except: # pylint:disable=bare-except # noqa: E722 + self.stderr.write( + f"Error processing GDrive file {gdrive_file['name']} for {website.short_id}" # noqa: E501 + ) + + backend = get_sync_backend(website) + backend.sync_all_content_to_backend() + + self.stdout.write("Done") diff --git a/main/constants.py b/main/constants.py index 2245f18fd..a2a49e3d0 100644 --- a/main/constants.py +++ b/main/constants.py @@ -10,3 +10,5 @@ PRIORITY_STEPS = 5 # priority range (0 - 4) DEFAULT_PRIORITY = 2 # Half step of range (0 - 4) + +IS_FILTER_REQUIRED = False diff --git a/main/management/commands/filter.py b/main/management/commands/filter.py index 491810988..c406702dd 100644 --- a/main/management/commands/filter.py +++ b/main/management/commands/filter.py @@ -7,6 +7,7 @@ from content_sync.constants import VERSION_DRAFT from content_sync.models import ContentSyncStateQuerySet +from main.constants import IS_FILTER_REQUIRED from videos.models import VideoQuerySet from websites.models import WebsiteContentQuerySet, WebsiteQuerySet @@ -17,7 +18,7 @@ class WebsiteFilterCommand(BaseCommand): filter_list = None exclude_list = None - def add_arguments(self, parser): + def add_arguments(self, parser, is_filter_required=IS_FILTER_REQUIRED): parser.add_argument( "--filter-json", dest="filter_json", @@ -29,6 +30,7 @@ def add_arguments(self, parser): "--filter", dest="filter", default="", + required=is_filter_required, help="If specified, only trigger website pipelines whose names are in this comma-delimited list", # noqa: E501 ) parser.add_argument( From 98f351d448a4d93e6d4a735a13e223ed7aa45bcf Mon Sep 17 00:00:00 2001 From: Doof Date: Wed, 31 Jul 2024 15:36:27 +0000 Subject: [PATCH 2/2] Release 0.124.0 --- RELEASE.rst | 5 +++++ main/settings.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/RELEASE.rst b/RELEASE.rst index ed8073028..01ad6610d 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -1,6 +1,11 @@ Release Notes ============= +Version 0.124.0 +--------------- + +- Management command for gdrive file sync (#2257) + Version 0.123.0 (Released July 30, 2024) --------------- diff --git a/main/settings.py b/main/settings.py index 4d40e03fe..613726558 100644 --- a/main/settings.py +++ b/main/settings.py @@ -26,7 +26,7 @@ # pylint: disable=too-many-lines -VERSION = "0.123.0" +VERSION = "0.124.0" SITE_ID = get_int( name="OCW_STUDIO_SITE_ID",