-
Notifications
You must be signed in to change notification settings - Fork 19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#3299: Script to update suborg values - [ZA] #3322
base: main
Are you sure you want to change the base?
Changes from all commits
5e5626a
14b6382
ae2e4c5
1b6667a
f170ab9
028d033
db75d31
bff9899
2fa8366
bf9824a
25cf2fe
e60d1db
104b1f8
0ddb3a2
e58d68b
5e697c3
9a887d8
19e4dff
ac08b17
2ca7cd4
2d509cf
51105eb
48f4708
e95bcb4
bf8cdc5
2fad00d
e184149
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import logging | ||
from django.core.management import BaseCommand | ||
from registrar.models import Suborganization, DomainRequest, DomainInformation | ||
from registrar.management.commands.utility.terminal_helper import TerminalColors, TerminalHelper | ||
from registrar.models.utility.generic_helper import count_capitals, normalize_string | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Clean up duplicate suborganizations that differ only by spaces and capitalization" | ||
|
||
def handle(self, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I really like the us of help text here! |
||
"""Process manual deletions and find/remove duplicates. Shows preview | ||
and updates DomainInformation / DomainRequest sub_organization references before deletion.""" | ||
|
||
# First: get a preset list of records we want to delete. | ||
# For extra_records_to_prune: the key gets deleted, the value gets kept. | ||
extra_records_to_prune = { | ||
normalize_string("Assistant Secretary for Preparedness and Response Office of the Secretary"): { | ||
"replace_with": "Assistant Secretary for Preparedness and Response, Office of the Secretary" | ||
}, | ||
normalize_string("US Geological Survey"): {"replace_with": "U.S. Geological Survey"}, | ||
normalize_string("USDA/OC"): {"replace_with": "USDA, Office of Communications"}, | ||
normalize_string("GSA, IC, OGP WebPortfolio"): {"replace_with": "GSA, IC, OGP Web Portfolio"}, | ||
normalize_string("USDA/ARS/NAL"): {"replace_with": "USDA, ARS, NAL"}, | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious as to where these values came from? Are they supposed to be hardcorded? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They come from this spreadsheet. The create_federal_portfolio script assumes that the org names the user provides are always correct, but some domains misspelt their org name. Since its just a misspelling, these corner cases are hard coded. Data cleanup basically |
||
|
||
# Second: loop through every Suborganization and return a dict of what to keep, and what to delete | ||
# for each duplicate or "incorrect" record. We do this by pruning records with extra spaces or bad caps | ||
# Note that "extra_records_to_prune" is just a manual mapping. | ||
records_to_prune = self.get_records_to_prune(extra_records_to_prune) | ||
if len(records_to_prune) == 0: | ||
TerminalHelper.colorful_logger(logger.error, TerminalColors.FAIL, "No suborganizations to delete.") | ||
return | ||
|
||
# Third: Build a preview of the changes | ||
total_records_to_remove = 0 | ||
preview_lines = ["The following records will be removed:"] | ||
for data in records_to_prune.values(): | ||
keep = data.get("keep") | ||
delete = data.get("delete") | ||
if keep: | ||
preview_lines.append(f"Keeping: '{keep.name}' (id: {keep.id})") | ||
|
||
for duplicate in delete: | ||
preview_lines.append(f"Removing: '{duplicate.name}' (id: {duplicate.id})") | ||
total_records_to_remove += 1 | ||
preview_lines.append("") | ||
preview = "\n".join(preview_lines) | ||
|
||
# Fourth: Get user confirmation and delete | ||
if TerminalHelper.prompt_for_execution( | ||
system_exit_on_terminate=True, | ||
prompt_message=preview, | ||
prompt_title=f"Remove {total_records_to_remove} suborganizations?", | ||
verify_message="*** WARNING: This will replace the record on DomainInformation and DomainRequest! ***", | ||
): | ||
try: | ||
# Update all references to point to the right suborg before deletion | ||
all_suborgs_to_remove = set() | ||
for record in records_to_prune.values(): | ||
best_record = record["keep"] | ||
suborgs_to_remove = {dupe.id for dupe in record["delete"]} | ||
DomainRequest.objects.filter(sub_organization_id__in=suborgs_to_remove).update( | ||
sub_organization=best_record | ||
) | ||
DomainInformation.objects.filter(sub_organization_id__in=suborgs_to_remove).update( | ||
sub_organization=best_record | ||
) | ||
all_suborgs_to_remove.update(suborgs_to_remove) | ||
# Delete the suborgs | ||
delete_count, _ = Suborganization.objects.filter(id__in=all_suborgs_to_remove).delete() | ||
TerminalHelper.colorful_logger( | ||
logger.info, TerminalColors.MAGENTA, f"Successfully deleted {delete_count} suborganizations." | ||
) | ||
except Exception as e: | ||
TerminalHelper.colorful_logger( | ||
logger.error, TerminalColors.FAIL, f"Failed to delete suborganizations: {str(e)}" | ||
) | ||
|
||
def get_records_to_prune(self, extra_records_to_prune): | ||
"""Maps all suborgs into a dictionary with a record to keep, and an array of records to delete.""" | ||
# First: Group all suborganization names by their "normalized" names (finding duplicates). | ||
# Returns a dict that looks like this: | ||
# { | ||
# "amtrak": [<Suborganization: AMTRAK>, <Suborganization: aMtRaK>, <Suborganization: AMTRAK >], | ||
# "usda/oc": [<Suborganization: USDA/OC>], | ||
# ...etc | ||
# } | ||
# | ||
name_groups = {} | ||
for suborg in Suborganization.objects.all(): | ||
normalized_name = normalize_string(suborg.name) | ||
name_groups.setdefault(normalized_name, []).append(suborg) | ||
|
||
# Second: find the record we should keep, and the records we should delete | ||
# Returns a dict that looks like this: | ||
# { | ||
# "amtrak": { | ||
# "keep": <Suborganization: AMTRAK> | ||
# "delete": [<Suborganization: aMtRaK>, <Suborganization: AMTRAK >] | ||
# }, | ||
# "usda/oc": { | ||
# "keep": <Suborganization: USDA, Office of Communications>, | ||
# "delete": [<Suborganization: USDA/OC>] | ||
# }, | ||
# ...etc | ||
# } | ||
records_to_prune = {} | ||
for normalized_name, duplicate_suborgs in name_groups.items(): | ||
# Delete data from our preset list | ||
if normalized_name in extra_records_to_prune: | ||
# The 'keep' field expects a Suborganization but we just pass in a string, so this is just a workaround. | ||
# This assumes that there is only one item in the name_group array (see usda/oc example). | ||
# But this should be fine, given our data. | ||
hardcoded_record_name = extra_records_to_prune[normalized_name]["replace_with"] | ||
name_group = name_groups.get(normalize_string(hardcoded_record_name)) | ||
keep = name_group[0] if name_group else None | ||
records_to_prune[normalized_name] = {"keep": keep, "delete": duplicate_suborgs} | ||
# Delete duplicates (extra spaces or casing differences) | ||
elif len(duplicate_suborgs) > 1: | ||
# Pick the best record (fewest spaces, most leading capitals) | ||
best_record = max( | ||
duplicate_suborgs, | ||
key=lambda suborg: (-suborg.name.count(" "), count_capitals(suborg.name, leading_only=True)), | ||
) | ||
records_to_prune[normalized_name] = { | ||
"keep": best_record, | ||
"delete": [s for s in duplicate_suborgs if s != best_record], | ||
} | ||
return records_to_prune |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -401,16 +401,15 @@ def prompt_for_execution( | |
# Allow the user to inspect the command string | ||
# and ask if they wish to proceed | ||
proceed_execution = TerminalHelper.query_yes_no_exit( | ||
f"""{TerminalColors.OKCYAN} | ||
===================================================== | ||
{prompt_title} | ||
===================================================== | ||
{verify_message} | ||
|
||
{prompt_message} | ||
{TerminalColors.FAIL} | ||
Proceed? (Y = proceed, N = {action_description_for_selecting_no}) | ||
{TerminalColors.ENDC}""" | ||
f"\n{TerminalColors.OKCYAN}" | ||
"=====================================================" | ||
f"\n{prompt_title}\n" | ||
"=====================================================" | ||
f"\n{verify_message}\n" | ||
f"\n{prompt_message}\n" | ||
f"{TerminalColors.FAIL}" | ||
f"Proceed? (Y = proceed, N = {action_description_for_selecting_no})" | ||
f"{TerminalColors.ENDC}" | ||
) | ||
Comment on lines
403
to
413
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This fixes the spacing issue that has been present on this helper function, so now it is "left aligned" (in a sense) |
||
|
||
# If the user decided to proceed return true. | ||
|
@@ -443,13 +442,14 @@ def print_to_file_conditional(print_condition: bool, filename: str, file_directo | |
f.write(file_contents) | ||
|
||
@staticmethod | ||
def colorful_logger(log_level, color, message): | ||
def colorful_logger(log_level, color, message, exc_info=True): | ||
"""Adds some color to your log output. | ||
|
||
Args: | ||
log_level: str | Logger.method -> Desired log level. ex: logger.info or "INFO" | ||
color: str | TerminalColors -> Output color. ex: TerminalColors.YELLOW or "YELLOW" | ||
message: str -> Message to display. | ||
exc_info: bool -> Whether the log should print exc_info or not | ||
""" | ||
|
||
if isinstance(log_level, str) and hasattr(logger, log_level.lower()): | ||
|
@@ -463,4 +463,4 @@ def colorful_logger(log_level, color, message): | |
terminal_color = color | ||
|
||
colored_message = f"{terminal_color}{message}{TerminalColors.ENDC}" | ||
log_method(colored_message) | ||
log_method(colored_message, exc_info=exc_info) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was the comment
Returns the portfolio for the given federal_agency
removed?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question! I just removed that because this function doesn't actually return anything - so I think it was a typo