diff --git a/lms/djangoapps/certificates/admin.py b/lms/djangoapps/certificates/admin.py index 3facf6f6b53..ffa22532120 100644 --- a/lms/djangoapps/certificates/admin.py +++ b/lms/djangoapps/certificates/admin.py @@ -22,6 +22,7 @@ CertificateTemplateAsset, GeneratedCertificate, ModifiedCertificateTemplateCommandConfiguration, + PurgeReferencestoPDFCertificatesCommandConfiguration, ) @@ -103,6 +104,11 @@ class CertificateGenerationCommandConfigurationAdmin(ConfigurationModelAdmin): pass +@admin.register(PurgeReferencestoPDFCertificatesCommandConfiguration) +class PurgeReferencestoPDFCertificatesCommandConfigurationAdmin(ConfigurationModelAdmin): + pass + + class CertificateDateOverrideAdmin(admin.ModelAdmin): """ # Django admin customizations for CertificateDateOverride model diff --git a/lms/djangoapps/certificates/management/commands/purge_references_to_pdf_certificates.py b/lms/djangoapps/certificates/management/commands/purge_references_to_pdf_certificates.py new file mode 100644 index 00000000000..384e58b3083 --- /dev/null +++ b/lms/djangoapps/certificates/management/commands/purge_references_to_pdf_certificates.py @@ -0,0 +1,104 @@ +""" +A management command designed to be part of the retirement pipeline for any Open EdX operators +with users who still have legacy PDF certificates. + +Once an external process has run to remove the four files comprising a legacy PDF certificate, +this management command will remove the reference to the file from the certificate record. + +Note: it is important to retain the reference in the certificate table until +the files have been deleted, because that reference is the files' identifying descriptor. +""" + +import logging +import shlex + +from django.contrib.auth import get_user_model +from django.core.management.base import BaseCommand, CommandError + +from lms.djangoapps.certificates.models import ( + GeneratedCertificate, + PurgeReferencestoPDFCertificatesCommandConfiguration, +) + +User = get_user_model() +log = logging.getLogger(__name__) + + +class Command(BaseCommand): + """ + Doesn't invoke the custom save() function defined as part of the `GeneratedCertificate` + model; perforce will emit no Django signals. This is desired behavior. We are + using this management command to purge information that was never sent to any + other systems, so we don't need to propagate updates. + + Example usage: + + # Dry Run (preview changes): + $ ./manage.py lms purge_references_to_pdf_certificates --dry-run + + # Purge data: + $ ./manage.py lms purge_references_to_pdf_certificates + """ + + help = """Purges references to PDF certificates. Intended to be run after the files have been deleted.""" + + def add_arguments(self, parser): + parser.add_argument( + "--dry-run", + action="store_true", + help="Shows a preview of what users would be affected by running this management command.", + ) + parser.add_argument( + "--certificate_ids", + nargs="+", + dest="certificate_ids", + help="space-separated list of GeneratedCertificate IDs to clean up", + ) + parser.add_argument( + "--args-from-database", + action="store_true", + help=( + "Use arguments from the PurgeReferencesToPDFCertificatesCommandConfiguration " + "model instead of the command line" + ), + ) + + def get_args_from_database(self): + """ + Returns an options dictionary from the current CertificateGenerationCommandConfiguration model. + """ + config = PurgeReferencestoPDFCertificatesCommandConfiguration.current() + if not config.enabled: + raise CommandError( + "PurgeReferencestoPDFCertificatesCommandConfiguration is disabled, " + "but --args-from-database was requested" + ) + + args = shlex.split(config.arguments) + parser = self.create_parser("manage.py", "purge_references_to_pdf_certificates") + + return vars(parser.parse_args(args)) + + def handle(self, *args, **options): + # database args will override cmd line args + if options["args_from_database"]: + options = self.get_args_from_database() + + if options["dry_run"]: + dry_run_string = "[DRY RUN] " + else: + dry_run_string = "" + + certificate_ids = options.get("certificate_ids") + if not certificate_ids: + raise CommandError("You must specify one or more certificate IDs") + + log.info( + f"{dry_run_string}Purging download_url and download_uri " + f"from the following certificate records: {certificate_ids}" + ) + if not options["dry_run"]: + GeneratedCertificate.objects.filter(id__in=certificate_ids).update( + download_url="", + download_uuid="", + ) diff --git a/lms/djangoapps/certificates/management/commands/tests/test_purge_references_to_pdf_certificates.py b/lms/djangoapps/certificates/management/commands/tests/test_purge_references_to_pdf_certificates.py new file mode 100644 index 00000000000..b831aa813fd --- /dev/null +++ b/lms/djangoapps/certificates/management/commands/tests/test_purge_references_to_pdf_certificates.py @@ -0,0 +1,101 @@ +""" +Tests for the `purge_references_to_pdf_certificates` management command. +""" + +import uuid + +from django.core.management import CommandError, call_command +from testfixtures import LogCapture + +from common.djangoapps.student.tests.factories import UserFactory +from lms.djangoapps.certificates.models import GeneratedCertificate +from lms.djangoapps.certificates.tests.factories import GeneratedCertificateFactory +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase +from xmodule.modulestore.tests.factories import CourseFactory + + +class PurgeReferencesToPDFCertificatesTests(ModuleStoreTestCase): + """ + Tests for the `purge_references_to_pdf_certificates` management command. + """ + + def setUp(self): + super().setUp() + self.user = UserFactory() + self.course_run_1 = CourseFactory() + self.course_run_2 = CourseFactory() + self.course_run_3 = CourseFactory() + self.cert_1 = GeneratedCertificateFactory( + user=self.user, + course_id=self.course_run_1.id, + download_url="http://example.com/1", + download_uuid=uuid.uuid4(), + grade=1.00, + ) + self.cert_2 = GeneratedCertificateFactory( + user=self.user, + course_id=self.course_run_2.id, + download_url="http://example.com/2", + download_uuid=uuid.uuid4(), + grade=2.00, + ) + self.cert_3 = GeneratedCertificateFactory( + user=self.user, + course_id=self.course_run_3.id, + download_url="http://example.com/3", + download_uuid=uuid.uuid4(), + grade=3.00, + ) + + def test_command_with_missing_certificate_ids(self): + """ + Verify command with a missing certificate_ids param. + """ + with self.assertRaises(CommandError): + call_command("purge_references_to_pdf_certificates") + + def test_management_command(self): + """ + Verify the management command purges expected data from only the certs requested. + """ + call_command( + "purge_references_to_pdf_certificates", + "--certificate_ids", + self.cert_2.id, + self.cert_3.id, + ) + + cert1_post = GeneratedCertificate.objects.get(id=self.cert_1.id) + cert2_post = GeneratedCertificate.objects.get(id=self.cert_2.id) + cert3_post = GeneratedCertificate.objects.get(id=self.cert_3.id) + self.assertEqual(cert1_post.download_url, "http://example.com/1") + self.assertNotEqual(cert1_post.download_uuid, "") + + self.assertEqual(cert2_post.download_url, "") + self.assertEqual(cert2_post.download_uuid, "") + + self.assertEqual(cert3_post.download_url, "") + self.assertEqual(cert3_post.download_uuid, "") + + def test_management_command_dry_run(self): + """ + Verify that the management command does not purge any data when invoked with the `--dry-run` flag + """ + expected_log_msg = ( + "[DRY RUN] Purging download_url and download_uri " + f"from the following certificate records: {list(str(self.cert_3.id))}" + ) + + with LogCapture() as logger: + call_command( + "purge_references_to_pdf_certificates", + "--dry-run", + "--certificate_ids", + self.cert_3.id, + ) + + cert3_post = GeneratedCertificate.objects.get(id=self.cert_3.id) + self.assertEqual(cert3_post.download_url, "http://example.com/3") + self.assertNotEqual(cert3_post.download_uuid, "") + + assert logger.records[0].msg == expected_log_msg diff --git a/lms/djangoapps/certificates/migrations/0038_pdf_certificate_purge_data_management.py b/lms/djangoapps/certificates/migrations/0038_pdf_certificate_purge_data_management.py new file mode 100644 index 00000000000..6816719975e --- /dev/null +++ b/lms/djangoapps/certificates/migrations/0038_pdf_certificate_purge_data_management.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.18 on 2025-02-20 22:36 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('certificates', '0037_fix_legacy_broken_invalid_certs'), + ] + + operations = [ + migrations.CreateModel( + name='PurgeReferencestoPDFCertificatesCommandConfiguration', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('change_date', models.DateTimeField(auto_now_add=True, verbose_name='Change date')), + ('enabled', models.BooleanField(default=False, verbose_name='Enabled')), + ('arguments', models.TextField(blank=True, default='', help_text="Arguments for the 'purge_references_to_pdf_certificates' management command. Specify like '--certificate_ids '")), + ('changed_by', models.ForeignKey(editable=False, null=True, on_delete=django.db.models.deletion.PROTECT, to=settings.AUTH_USER_MODEL, verbose_name='Changed by')), + ], + options={ + 'verbose_name': 'purge_references_to_pdf_certificates argument', + }, + ), + ] diff --git a/lms/djangoapps/certificates/models.py b/lms/djangoapps/certificates/models.py index 9c58327b99a..61ce095aecd 100644 --- a/lms/djangoapps/certificates/models.py +++ b/lms/djangoapps/certificates/models.py @@ -1289,6 +1289,30 @@ def __str__(self): return str(self.arguments) +class PurgeReferencestoPDFCertificatesCommandConfiguration(ConfigurationModel): + """ + Manages configuration for a run of the purge_references_to_pdf_certificates management command. + + .. no_pii: + """ + + class Meta: + app_label = "certificates" + verbose_name = "purge_references_to_pdf_certificates argument" + + arguments = models.TextField( + blank=True, + help_text=( + "Arguments for the 'purge_references_to_pdf_certificates' management command. " + "Specify like '--certificate_ids '" + ), + default="", + ) + + def __str__(self): + return str(self.arguments) + + class CertificateDateOverride(TimeStampedModel): """ Model to manually override a given certificate date with the given date.