Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(clp-package): Rework del_archives script to also find and delete archives by ID. #665

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
88acd44
Rework del_archives into archive manager
Eden-D-Zhang Jan 13, 2025
afd71d4
Implement find and dry-run
Eden-D-Zhang Jan 13, 2025
370e78c
Fix del_archives reference
Eden-D-Zhang Jan 13, 2025
209b883
fix check for end_ts
Eden-D-Zhang Jan 13, 2025
757d358
Fix argument passing between scripts
Eden-D-Zhang Jan 14, 2025
5d49d95
Rename variable for consistency
Eden-D-Zhang Jan 15, 2025
497d2a4
Fix find filter not working
Eden-D-Zhang Jan 15, 2025
f48ea7c
Improve formatting
Eden-D-Zhang Jan 15, 2025
5f880af
Remove trailing whitespace
Eden-D-Zhang Jan 15, 2025
c210992
Linter reformats
Eden-D-Zhang Jan 15, 2025
812ce45
Address review comments
Eden-D-Zhang Jan 16, 2025
08b8f80
Fix lint
Eden-D-Zhang Jan 16, 2025
59a910a
Fix Coderabbit suggestions
Eden-D-Zhang Jan 16, 2025
2b841fa
Implement review suggestions
Eden-D-Zhang Jan 22, 2025
89299ca
Lint fixes
Eden-D-Zhang Jan 22, 2025
f2b5542
Replace "ids" with BY_IDS_COMMAND
Eden-D-Zhang Jan 22, 2025
f9f4f8e
Add delete handler class
Eden-D-Zhang Jan 23, 2025
63775f5
Fix typo
Eden-D-Zhang Jan 23, 2025
1a5b0d2
PR review suggestions
Eden-D-Zhang Jan 24, 2025
7d60b99
Replace BY_IDS_COMMAND with DEL_BY_IDS_SUBCOMMAND
Eden-D-Zhang Jan 27, 2025
a1781da
Add line after fmt
Eden-D-Zhang Jan 27, 2025
4591b1b
Add type annotation to variables
Eden-D-Zhang Jan 27, 2025
4f9e165
Add comments on unclear types
Eden-D-Zhang Jan 27, 2025
1b925ce
Fix lint
Eden-D-Zhang Jan 27, 2025
a9e12af
Remove unsure type comments
Eden-D-Zhang Jan 27, 2025
8680126
Change list annotation to typing.List
Eden-D-Zhang Jan 29, 2025
7edbf77
Fix list annotation
Eden-D-Zhang Jan 29, 2025
a0eb7e2
Fix method call
Eden-D-Zhang Jan 29, 2025
3241f61
Add asserts for begin_timestamp, tweak typing
Eden-D-Zhang Jan 30, 2025
d69684a
Fix ids_list_string
Eden-D-Zhang Jan 30, 2025
325778f
Add executable permissions
Eden-D-Zhang Jan 30, 2025
c09bfbe
Add return
Eden-D-Zhang Jan 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Eden-D-Zhang marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import argparse
import logging
import subprocess
import sys
from pathlib import Path

from clp_py_utils.clp_config import StorageType

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
load_config_file,
validate_and_load_db_credentials_file,
)

# Command/Argument Constants
from clp_package_utils.scripts.native.archive_manager import (
BEGIN_TS_ARG,
BY_FILTER_COMMAND,
BY_IDS_COMMAND,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should apply the renaming to BY_IDS_COMMAND as well, right?

DEL_COMMAND,
DRY_RUN_ARG,
END_TS_ARG,
FIND_COMMAND,
)

logger = logging.getLogger(__file__)


def _validate_timestamps(begin_ts, end_ts):
if begin_ts is not None and begin_ts < 0:
logger.error("begin-ts must be non-negative.")
return False
if end_ts is not None and end_ts < 0:
logger.error("end-ts must be non-negative.")
return False
if begin_ts is not None and end_ts is not None and begin_ts > end_ts:
logger.error("begin-ts must be <= end-ts")
return False
return True


def main(argv):
Eden-D-Zhang marked this conversation as resolved.
Show resolved Hide resolved
clp_home = get_clp_home()
default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH

# Top-level parser and options
args_parser = argparse.ArgumentParser(description="View or delete archives.")
Eden-D-Zhang marked this conversation as resolved.
Show resolved Hide resolved
args_parser.add_argument(
"--config",
"-c",
default=str(default_config_file_path),
help="CLP package configuration file.",
)

# Top-level commands
subparsers = args_parser.add_subparsers(
dest="subcommand",
required=True,
)
find_parser = subparsers.add_parser(
FIND_COMMAND,
help="List IDs of archives.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
help="List IDs of archives.",
help="Lists IDs of compressed archives.",

)
del_parser = subparsers.add_parser(
DEL_COMMAND,
help="Delete archives.",
)

# Options for find subcommand
find_parser.add_argument(
BEGIN_TS_ARG,
dest="begin_ts",
type=int,
default=0,
help="Time-range lower-bound (inclusive) as milliseconds from the UNIX epoch.",
)
find_parser.add_argument(
END_TS_ARG,
dest="end_ts",
type=int,
help="Time-range upper-bound (include) as milliseconds from the UNIX epoch.",
Eden-D-Zhang marked this conversation as resolved.
Show resolved Hide resolved
)

# Options for delete subcommand
del_parser.add_argument(
DRY_RUN_ARG,
dest="dry_run",
action="store_true",
help="Only prints the archives to be deleted, without actually deleting them.",
)

# Subcommands for delete subcommand
del_subparsers = del_parser.add_subparsers(
dest="del_subcommand",
required=True,
)

# Delete by ID subcommand
del_id_parser = del_subparsers.add_parser(
BY_IDS_COMMAND,
help="Delete archives by ID.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
help="Delete archives by ID.",
help="Deletes archives by ID.",

)

# Delete by ID arguments
del_id_parser.add_argument(
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
"ids",
nargs="+",
help="List of archive IDs to delete",
)

# Delete by filter subcommand
del_filter_parser = del_subparsers.add_parser(
BY_FILTER_COMMAND,
help="Delete archives that fall within the specified time range.",
)

# Delete by filter arguments
del_filter_parser.add_argument(
BEGIN_TS_ARG,
type=int,
default=0,
help="Time-range lower-bound (inclusive) as milliseconds from the UNIX epoch.",
)
del_filter_parser.add_argument(
END_TS_ARG,
type=int,
required=True,
help="Time-range upper-bound (include) as milliseconds from the UNIX epoch.",
Eden-D-Zhang marked this conversation as resolved.
Show resolved Hide resolved
)

parsed_args = args_parser.parse_args(argv[1:])

begin_timestamp = None
end_timestamp = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit:

Suggested change
begin_timestamp = None
end_timestamp = None
begin_timestamp: int
end_timestamp: int

They should get properly None initialized.

subcommand = parsed_args.subcommand

# Validate and load config file
try:
config_file_path = Path(parsed_args.config)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_logs_dir()

# Validate and load necessary credentials
validate_and_load_db_credentials_file(clp_config, clp_home, False)
except:
logger.exception("Failed to load config.")
return -1
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved

storage_type = clp_config.archive_output.storage.type
if StorageType.FS != storage_type:
logger.error(f"Archive deletion is not supported for storage type: {storage_type}.")
return -1

# Validate input depending on subcommands
if DEL_COMMAND == subcommand and BY_FILTER_COMMAND == parsed_args.del_subcommand:
begin_timestamp = parsed_args.begin_ts
end_timestamp = parsed_args.end_ts

# Validate the input timestamp
if not _validate_timestamps(begin_timestamp, end_timestamp):
return -1

elif FIND_COMMAND == subcommand:
begin_timestamp = parsed_args.begin_ts
end_timestamp = parsed_args.end_ts

if begin_timestamp is not None or end_timestamp is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the code again, begin_timestamp has a default value, so it should never gets a None value?

the _validate_timestamps is also written to handle None input, in this case, do we still need this if begin_timestamp is not None or end_timestamp is not None: statement?

If we can remove this if statement, maybe we can also combine the two if-clause for ts validation


# Validate the input timestamp
if not _validate_timestamps(begin_timestamp, end_timestamp):
return -1

container_name = generate_container_name("archive-manager")

container_clp_config, mounts = generate_container_config(clp_config, clp_home)
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
container_clp_config, clp_config, container_name
)

necessary_mounts = [mounts.clp_home, mounts.logs_dir, mounts.archives_output_dir]
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

# fmt: off
archive_manager_cmd = [
"python3",
"-m", "clp_package_utils.scripts.native.archive_manager",
"--config", str(generated_config_path_on_container),
str(subcommand),
]
# fmt : on
# Add subcommand-specific arguments
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
if DEL_COMMAND == subcommand:
if parsed_args.dry_run:
archive_manager_cmd.append(DRY_RUN_ARG)
if BY_IDS_COMMAND == parsed_args.del_subcommand:
archive_manager_cmd.append(BY_IDS_COMMAND)
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
archive_manager_cmd.extend(parsed_args.ids)
elif BY_FILTER_COMMAND == parsed_args.del_subcommand:
archive_manager_cmd.extend([
BY_FILTER_COMMAND,
str(begin_timestamp),
str(end_timestamp)
])
elif FIND_COMMAND == subcommand:
archive_manager_cmd.extend([BEGIN_TS_ARG, str(begin_timestamp)])
Eden-D-Zhang marked this conversation as resolved.
Show resolved Hide resolved
if end_timestamp is not None:
Eden-D-Zhang marked this conversation as resolved.
Show resolved Hide resolved
archive_manager_cmd.extend([END_TS_ARG, str(end_timestamp)])

haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
cmd = container_start_cmd + archive_manager_cmd

subprocess.run(cmd, check=True)

# Remove generated files
generated_config_path_on_host.unlink()

return 0


if "__main__" == __name__:
sys.exit(main(sys.argv))
110 changes: 0 additions & 110 deletions components/clp-package-utils/clp_package_utils/scripts/del_archives.py

This file was deleted.

Loading
Loading