Skip to content

Commit

Permalink
feat: management command to purge information about web certificates …
Browse files Browse the repository at this point in the history
…from retired users

for Open edX  operators who still have users with legacy PDF
certificates, retirement requires first extracting information from
the user's GeneratedCertificate record in order to delete the 4
associated files for each PDF certificate, and then removing the links
to the relevant files.  this creates a management command to do that
work.

FIXES: APER-3889
  • Loading branch information
deborahgu committed Feb 20, 2025
1 parent 7dd4a09 commit e7bd728
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 0 deletions.
6 changes: 6 additions & 0 deletions lms/djangoapps/certificates/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
CertificateTemplateAsset,
GeneratedCertificate,
ModifiedCertificateTemplateCommandConfiguration,
PurgeReferencestoPDFCertificatesCommandConfiguration,
)


Expand Down Expand Up @@ -103,6 +104,11 @@ class CertificateGenerationCommandConfigurationAdmin(ConfigurationModelAdmin):
pass


@admin.register(PurgeReferencestoPDFCertificatesCommandConfiguration)
class PurgeReferencestoPDFCertificatesCommandConfigurationAdmin(ConfigurationModelAdmin):
pass


class CertificateDateOverrideAdmin(admin.ModelAdmin):
"""
# Django admin customizations for CertificateDateOverride model
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""
A management command designed to be part of the retirement pipeline for any Open EdX operators
with users who still have legacy PDF certificates.
Once an external process has run to remove the four files comprising a legacy PDF certificate,
this management command will remove the reference to the file from the certificate record.
Note: it is important to retain the reference in the certificate table until
the files have been deleted, because that reference is the files' identifying descriptor.
"""

import logging
import shlex

from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand, CommandError

from lms.djangoapps.certificates.data import CertificateStatuses
from lms.djangoapps.certificates.models import (
GeneratedCertificate,
PurgeReferencestoPDFCertificatesCommandConfiguration,
)
from openedx.core.djangoapps.user_api.api import get_retired_user_ids

User = get_user_model()
log = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Invokes the custom save() function defined as part of the `GeneratedCertificate`
model; perforce will emit no Django signals. This is desired behavior. We are
using this management command to purge information that was never sent to any
other systems, so we don't need to propagate updates.
Example usage:
# Dry Run (preview changes):
$ ./manage.py lms purge_references_to_pdf_certificates --dry-run
# Purge data:
$ ./manage.py lms purge_references_to_pdf_certificates
"""

help = """Purges references to PDF certificates. Intended to be run after the files have been deleted."""

def add_arguments(self, parser):
parser.add_argument(
"--dry-run",
action="store_true",
help="Shows a preview of what users would be affected by running this management command.",
)
parser.add_argument(
"--certificate_ids",
nargs="+",
dest="certificate_ids",
help="space-separated list of GeneratedCertificate IDs to clean up",
)
parser.add_argument(
"--args-from-database",
action="store_true",
help=(
"Use arguments from the PurgeReferencesToPDFCertificatesCommandConfiguration "
"model instead of the command line"
),
)

def get_args_from_database(self):
"""
Returns an options dictionary from the current CertificateGenerationCommandConfiguration model.
"""
config = PurgeReferencestoPDFCertificatesCommandConfiguration.current()
if not config.enabled:
raise CommandError(
"PurgeReferencestoPDFCertificatesCommandConfiguration is disabled, "
"but --args-from-database was requested"
)

args = shlex.split(config.arguments)
parser = self.create_parser("manage.py", "purge_references_to_pdf_certificates")

return vars(parser.parse_args(args))

def handle(self, *args, **options):
# database args will override cmd line args
if options["args_from_database"]:
options = self.get_args_from_database()

if options["dry_run"]:
dry_run_string = "[DRY RUN] "
else:
dry_run_string = ""

certificate_ids = options.get("certificate_ids")
if not certificate_ids:
raise CommandError("You must specify one or more certificate IDs")

log.info(
f"{dry_run_string}Purging download_url and download_uri, and "
f"resetting status, from the following certificate records: {certificate_ids}"
)
if not options["dry_run"]:
x = GeneratedCertificate.objects.filter(id__in=certificate_ids).update(
download_url="",
download_uuid="",
status=CertificateStatuses.unavailable,
)
print(f"XX {x}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""
Tests for the `purge_references_to_pdf_certificates` management command.
"""

import uuid

from django.core.management import CommandError, call_command
from testfixtures import LogCapture

from common.djangoapps.student.tests.factories import UserFactory
from lms.djangoapps.certificates.data import CertificateStatuses
from lms.djangoapps.certificates.models import GeneratedCertificate
from lms.djangoapps.certificates.tests.factories import GeneratedCertificateFactory
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory


class PurgeReferencesToPDFCertificatesTests(ModuleStoreTestCase):
"""
Tests for the `purge_references_to_pdf_certificates` management command.
"""

def setUp(self):
super().setUp()
self.user = UserFactory()
self.course_run_1 = CourseFactory()
self.course_run_2 = CourseFactory()
self.course_run_3 = CourseFactory()
self.cert_1 = GeneratedCertificateFactory(
user=self.user,
course_id=self.course_run_1.id,
status=CertificateStatuses.downloadable,
download_url="http://example.com/1",
download_uuid=uuid.uuid4(),
grade=1.00,
)
self.cert_2 = GeneratedCertificateFactory(
user=self.user,
course_id=self.course_run_2.id,
status=CertificateStatuses.downloadable,
download_url="http://example.com/2",
download_uuid=uuid.uuid4(),
grade=2.00,
)
self.cert_3 = GeneratedCertificateFactory(
user=self.user,
course_id=self.course_run_3.id,
status=CertificateStatuses.downloadable,
download_url="http://example.com/3",
download_uuid=uuid.uuid4(),
grade=3.00,
)

def test_command_with_missing_certificate_ids(self):
"""
Verify command with a missing certificate_ids param.
"""
with self.assertRaises(CommandError):
call_command("purge_references_to_pdf_certificates")

def test_management_command(self):
"""
Verify the management command purges expected data from only the certs requested.
"""
call_command(
"purge_references_to_pdf_certificates",
"--certificate_ids",
self.cert_2.id,
self.cert_3.id,
)

cert1_post = GeneratedCertificate.objects.get(id=self.cert_1.id)
cert2_post = GeneratedCertificate.objects.get(id=self.cert_2.id)
cert3_post = GeneratedCertificate.objects.get(id=self.cert_3.id)
self.assertEqual(cert1_post.download_url, "http://example.com/1")
self.assertNotEqual(cert1_post.download_uuid, "")
self.assertEqual(cert1_post.status, CertificateStatuses.downloadable)

self.assertEqual(cert2_post.download_url, "")
self.assertEqual(cert2_post.download_uuid, "")
self.assertEqual(cert2_post.status, CertificateStatuses.unavailable)

self.assertEqual(cert3_post.download_url, "")
self.assertEqual(cert3_post.download_uuid, "")
self.assertEqual(cert3_post.status, CertificateStatuses.unavailable)

def test_management_command_dry_run(self):
"""
Verify that the management command does not purge any data when invoked with the `--dry-run` flag
"""
expected_log_msg = (
"[DRY RUN] Purging download_url and download_uri, and resetting status, "
f"from the following certificate records: {list(str(self.cert_3.id))}"
)

with LogCapture() as logger:
call_command(
"purge_references_to_pdf_certificates",
"--dry-run",
"--certificate_ids",
self.cert_3.id,
)

cert3_post = GeneratedCertificate.objects.get(id=self.cert_3.id)
self.assertEqual(cert3_post.download_url, "http://example.com/3")
self.assertNotEqual(cert3_post.download_uuid, "")
self.assertEqual(cert3_post.status, CertificateStatuses.downloadable)

assert logger.records[0].msg == expected_log_msg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2.18 on 2025-02-20 22:36

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('certificates', '0037_fix_legacy_broken_invalid_certs'),
]

operations = [
migrations.CreateModel(
name='PurgeReferencestoPDFCertificatesCommandConfiguration',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('change_date', models.DateTimeField(auto_now_add=True, verbose_name='Change date')),
('enabled', models.BooleanField(default=False, verbose_name='Enabled')),
('arguments', models.TextField(blank=True, default='', help_text="Arguments for the 'purge_references_to_pdf_certificates' management command. Specify like '--certificate_ids <id1> <id2>'")),
('changed_by', models.ForeignKey(editable=False, null=True, on_delete=django.db.models.deletion.PROTECT, to=settings.AUTH_USER_MODEL, verbose_name='Changed by')),
],
options={
'verbose_name': 'purge_references_to_pdf_certificates argument',
},
),
]
24 changes: 24 additions & 0 deletions lms/djangoapps/certificates/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,30 @@ def __str__(self):
return str(self.arguments)


class PurgeReferencestoPDFCertificatesCommandConfiguration(ConfigurationModel):
"""
Manages configuration for a run of the purge_references_to_pdf_certificates management command.
.. no_pii:
"""

class Meta:
app_label = "certificates"
verbose_name = "purge_references_to_pdf_certificates argument"

arguments = models.TextField(
blank=True,
help_text=(
"Arguments for the 'purge_references_to_pdf_certificates' management command. "
"Specify like '--certificate_ids <id1> <id2>'"
),
default="",
)

def __str__(self):
return str(self.arguments)


class CertificateDateOverride(TimeStampedModel):
"""
Model to manually override a given certificate date with the given date.
Expand Down

0 comments on commit e7bd728

Please sign in to comment.