From a4656a9615a70cd05a8416b4fcae2eadb497d40a Mon Sep 17 00:00:00 2001 From: Kent Huang Date: Wed, 4 Dec 2024 16:18:42 +0900 Subject: [PATCH] [Update] Rename the command upload/donwload artifact to artifacts - Add new command `donwload-base-artifacts` for the users who develeop dbt projects. The branch will be default branch and target path will be `target-base` Signed-off-by: Kent Huang --- recce/artifact.py | 88 ++++++++++++++++--------------- recce/cli.py | 130 +++++++++++++++++++++++++++++++--------------- 2 files changed, 134 insertions(+), 84 deletions(-) diff --git a/recce/artifact.py b/recce/artifact.py index e1f42fbc..60f47f05 100644 --- a/recce/artifact.py +++ b/recce/artifact.py @@ -14,12 +14,12 @@ from recce.util.recce_cloud import RecceCloud, PresignedUrlMethod -def verify_artifact_path(target_path: str) -> bool: +def verify_artifacts_path(target_path: str) -> bool: """ - Verify if the target path is a valid artifact path. + Verify if the target path is a valid artifacts path. :param target_path: the target path to check - :return: True if the target path is a valid artifact path, False otherwise + :return: True if the target path is a valid artifacts path, False otherwise """ if not target_path: return False @@ -30,12 +30,12 @@ def verify_artifact_path(target_path: str) -> bool: if not os.path.isdir(target_path): return False - required_artifact_files = [ + required_artifacts_files = [ 'manifest.json', 'catalog.json' ] - if all(f in os.listdir(target_path) for f in required_artifact_files): + if all(f in os.listdir(target_path) for f in required_artifacts_files): # Check if the required files are present in the target path return True @@ -50,8 +50,8 @@ def parse_dbt_version(file_path: str) -> str: return dbt_version -def archive_artifact(target_path: str) -> (str, str): - if verify_artifact_path(target_path) is False: +def archive_artifacts(target_path: str) -> (str, str): + if verify_artifacts_path(target_path) is False: raise Exception(f'Invalid target path: {target_path}') manifest_path = os.path.join(target_path, 'manifest.json') @@ -61,43 +61,43 @@ def archive_artifact(target_path: str) -> (str, str): if dbt_version is None: raise Exception('Failed to parse dbt version from manifest.json') - # prepare the temporary artifact path + # prepare the temporary artifacts path tmp_dir = tempfile.mkdtemp() - artifact_tar_path = os.path.join(tmp_dir, 'dbt_artifact.tar') - artifact_tar_gz_path = artifact_tar_path + '.gz' + artifacts_tar_path = os.path.join(tmp_dir, 'dbt_artifacts.tar') + artifacts_tar_gz_path = artifacts_tar_path + '.gz' - with tarfile.open(artifact_tar_path, 'w') as tar: + with tarfile.open(artifacts_tar_path, 'w') as tar: tar.add(manifest_path, arcname='manifest.json') tar.add(catalog_path, arcname='catalog.json') # Compress the tar file - with open(artifact_tar_path, 'rb') as f_in, gzip.open(artifact_tar_gz_path, 'wb') as f_out: + with open(artifacts_tar_path, 'rb') as f_in, gzip.open(artifacts_tar_gz_path, 'wb') as f_out: f_out.writelines(f_in) # Clean up the temporary directory try: - os.remove(artifact_tar_path) + os.remove(artifacts_tar_path) except FileNotFoundError: pass - return artifact_tar_gz_path, dbt_version + return artifacts_tar_gz_path, dbt_version -def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str, debug: bool = False): +def upload_dbt_artifacts(target_path: str, branch: str, token: str, password: str, debug: bool = False): console = Console() - if verify_artifact_path(target_path) is False: + if verify_artifacts_path(target_path) is False: console.print(f"[[red]Error[/red]] Invalid target path: {target_path}") console.print("Please provide a valid target path containing manifest.json and catalog.json.") return 1 if branch != current_branch(): console.print( - f"[[yellow]Warning[/yellow]] You are uploading the dbt artifact as branch '{branch}'. " + f"[[yellow]Warning[/yellow]] You are uploading the dbt artifacts as branch '{branch}'. " f"However, the current branch is '{current_branch()}'." ) - console.print("Please make sure you are uploading the dbt artifact to the correct branch.") + console.print("Please make sure you are uploading the dbt artifacts to the correct branch.") - compress_file_path, dbt_version = archive_artifact(target_path) + compress_file_path, dbt_version = archive_artifacts(target_path) repo = hosting_repo() sha = commit_hash_from_branch(branch) metadata = { @@ -105,11 +105,11 @@ def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str 'dbt_version': dbt_version } - # Get the presigned URL for uploading the artifact + # Get the presigned URL for uploading the artifacts presigned_url = RecceCloud(token).get_presigned_url( method=PresignedUrlMethod.UPLOAD, repository=repo, - artifact_name='dbt_artifact.tar.gz', + artifact_name='dbt_artifacts.tar.gz', branch=branch, metadata=metadata ) @@ -122,8 +122,9 @@ def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str console.print(f'Artifact path: {compress_file_path}') console.print(f'DBT version: {dbt_version}') console.print(f'Presigned URL: {presigned_url}') + console.print(f'Uploading the dbt artifacts from path "{target_path}" to branch "{branch}"') - # Upload the compressed artifact + # Upload the compressed artifacts headers = s3_sse_c_headers(password) if metadata: @@ -132,9 +133,9 @@ def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str if response.status_code != 200: raise Exception({response.text}) - # Clean up the compressed artifact + # Clean up the compressed artifacts try: - # Remove the compressed artifact + # Remove the compressed artifacts os.remove(compress_file_path) # Clean up the temporary directory os.rmdir(os.path.dirname(compress_file_path)) @@ -142,28 +143,22 @@ def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str pass -def download_dbt_artifact(target_path: str, branch: str, token: str, password: str, - force: bool = False, - debug: bool = False): +def download_dbt_artifacts(target_path: str, branch: str, token: str, password: str, + force: bool = False, + debug: bool = False): console = Console() repo = hosting_repo() - - if os.path.exists(target_path): - if not force: - raise Exception( - f'Path {target_path} already exists. Please provide a new path or use \'--force\' option to overwrite the existing folder.') - console.print(f'[[yellow]Warning[/yellow]] Overwrite existing path: {target_path}') - shutil.rmtree(target_path) - - os.mkdir(target_path) + sha = None + dbt_version = None presigned_url, tags = RecceCloud(token).get_download_presigned_url_with_tags( repository=repo, - artifact_name='dbt_artifact.tar.gz', + artifact_name='dbt_artifacts.tar.gz', branch=branch, ) - sha = tags.get('commit') - dbt_version = tags.get('dbt_version') + if tags: + sha = tags.get('commit') + dbt_version = tags.get('dbt_version') if debug: console.rule('Debug information', style='blue') @@ -171,6 +166,7 @@ def download_dbt_artifact(target_path: str, branch: str, token: str, password: s console.print(f'Git Commit: {sha}') console.print(f'GitHub repository: {repo}') console.print(f'DBT version: {dbt_version}') + console.print(f'Downloading from branch: "{branch}" and extracting to "{target_path}"') headers = s3_sse_c_headers(password) response = requests.get(presigned_url, headers=headers) @@ -178,16 +174,24 @@ def download_dbt_artifact(target_path: str, branch: str, token: str, password: s if response.status_code != 200: raise Exception(response.text) - tar_gz_file = os.path.join(target_path, 'dbt_artifact.tar.gz') + if os.path.exists(target_path): + if not force: + raise Exception( + f'Path {target_path} already exists. Please provide a new path or use \'--force\' option to overwrite the existing folder.') + console.print(f'[[yellow]Warning[/yellow]] Overwrite existing path: {target_path}') + shutil.rmtree(target_path) + os.mkdir(target_path) + + tar_gz_file = os.path.join(target_path, 'dbt_artifacts.tar.gz') with open(tar_gz_file, 'wb') as f: f.write(response.content) with tarfile.open(tar_gz_file, 'r') as tar: tar.extractall(path=target_path) - # Clean up the compressed artifact + # Clean up the compressed artifacts try: - # Remove the compressed artifact + # Remove the compressed artifacts os.remove(tar_gz_file) except FileNotFoundError: pass diff --git a/recce/cli.py b/recce/cli.py index 96f1eb57..446e58c3 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -7,7 +7,7 @@ import uvicorn from recce import event -from recce.artifact import upload_dbt_artifact, download_dbt_artifact +from recce.artifact import upload_dbt_artifacts, download_dbt_artifacts from recce.config import RecceConfig, RECCE_CONFIG_FILE, RECCE_ERROR_LOG_FILE from recce.git import current_branch, current_default_branch from recce.run import cli_run, check_github_ci_env @@ -567,15 +567,22 @@ def download(**kwargs): @cloud.command(cls=TrackCommand) @click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING, envvar='GITHUB_TOKEN') -@click.option('--branch', '-b', help='The branch of the provided artifact.', type=click.STRING, - envvar='GITHUB_HEAD_REF', default=current_branch()) -@click.option('--target-path', help='dbt artifacts directory for your artifact.', type=click.STRING, default='target') -@click.option('--password', '-p', help='The password to encrypt the dbt artifact in cloud.', type=click.STRING, +@click.option('--branch', '-b', help='The branch of the provided artifacts.', type=click.STRING, + envvar='GITHUB_HEAD_REF', default=current_branch(), show_default=True) +@click.option('--target-path', help='dbt artifacts directory for your artifacts.', type=click.STRING, default='target', + show_default=True) +@click.option('--password', '-p', help='The password to encrypt the dbt artifacts in cloud.', type=click.STRING, envvar='RECCE_STATE_PASSWORD', required=True) @add_options(recce_options) -def upload_artifact(**kwargs): +def upload_artifacts(**kwargs): """ - Upload the dbt artifact to cloud + Upload the dbt artifacts to cloud + + Upload the dbt artifacts (metadata.json, catalog.json) to Recce Cloud for the given branch. + The password is used to encrypt the dbt artifacts in the cloud. You will need the password to download the dbt artifacts. + + By default, the artifacts are uploaded to the current branch. You can specify the branch using the --branch option. + The target path is set to 'target' by default. You can specify the target path using the --target-path option. """ from rich.console import Console console = Console() @@ -585,67 +592,106 @@ def upload_artifact(**kwargs): branch = kwargs.get('branch') try: - rc = upload_dbt_artifact(target_path, branch=branch, - token=cloud_token, password=password, - debug=kwargs.get('debug', False)) + rc = upload_dbt_artifacts(target_path, branch=branch, + token=cloud_token, password=password, + debug=kwargs.get('debug', False)) console.rule('Uploaded Successfully') console.print( - f'Uploaded dbt artifact to Recce Cloud for branch "{branch}" from "{os.path.abspath(target_path)}"') + f'Uploaded dbt artifacts to Recce Cloud for branch "{branch}" from "{os.path.abspath(target_path)}"') except Exception as e: console.rule('Failed to Upload', style='red') - console.print("[[red]Error[/red]] Failed to upload the dbt artifact to cloud.") + console.print("[[red]Error[/red]] Failed to upload the dbt artifacts to cloud.") console.print(f"Reason: {e}") rc = 1 return rc -@cloud.command(cls=TrackCommand) -@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING, - envvar='GITHUB_TOKEN') -@click.option('--branch', '-b', help='The branch of the selected artifact.', type=click.STRING, - envvar='GITHUB_BASE_REF', default=current_default_branch()) -@click.option('--target-path', help='The dbt artifacts directory for your artifact.', type=click.STRING, - default='target-base') -@click.option('--password', '-p', help='The password to encrypt the dbt artifact in cloud.', type=click.STRING, - envvar='RECCE_STATE_PASSWORD', required=True) -@click.option('--force', '-f', help='Bypasses the confirmation prompt. Download the artifact directly.', - is_flag=True) -@add_options(recce_options) -def download_artifact(**kwargs): - """ - Download the dbt artifact to cloud - """ - from rich.console import Console - console = Console() - cloud_token = kwargs.get('cloud_token') - password = kwargs.get('password') - target_path = kwargs.get('target_path') - branch = kwargs.get('branch') +def _download_artifacts(branch, cloud_token, console, kwargs, password, target_path): try: - rc = download_dbt_artifact(target_path, branch=branch, token=cloud_token, password=password, - force=kwargs.get('force', False), - debug=kwargs.get('debug', False)) + rc = download_dbt_artifacts(target_path, branch=branch, token=cloud_token, password=password, + force=kwargs.get('force', False), + debug=kwargs.get('debug', False)) console.rule('Downloaded Successfully') console.print( - f'Downloaded dbt artifact from Recce Cloud for branch "{branch}" to "{os.path.abspath(target_path)}"') + f'Downloaded dbt artifacts from Recce Cloud for branch "{branch}" to "{os.path.abspath(target_path)}"') except Exception as e: console.rule('Failed to Download', style='red') - console.print("[[red]Error[/red]] Failed to download the dbt artifact from cloud.") + console.print("[[red]Error[/red]] Failed to download the dbt artifacts from cloud.") reason = str(e) if 'Requests specifying Server Side Encryption with Customer provided keys must provide the correct secret key' in reason: console.print("Reason: Decryption failed due to incorrect password.") console.print( - "Please provide the correct password to decrypt the dbt artifact. Or re-upload the dbt artifact with a new password.") + "Please provide the correct password to decrypt the dbt artifacts. Or re-upload the dbt artifacts with a new password.") elif 'The specified key does not exist' in reason: - console.print("Reason: The dbt artifact is not found in the cloud.") - console.print("Please upload the dbt artifact to the cloud before downloading it.") + console.print("Reason: The dbt artifacts is not found in the cloud.") + console.print("Please upload the dbt artifacts to the cloud before downloading it.") else: console.print(f"Reason: {reason}") rc = 1 return rc +@cloud.command(cls=TrackCommand) +@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING, + envvar='GITHUB_TOKEN') +@click.option('--branch', '-b', help='The branch of the selected artifacts.', type=click.STRING, + envvar='GITHUB_BASE_REF', default=current_branch(), show_default=True) +@click.option('--target-path', help='The dbt artifacts directory for your artifacts.', type=click.STRING, + default='target', show_default=True) +@click.option('--password', '-p', help='The password to decrypt the dbt artifacts in cloud.', type=click.STRING, + envvar='RECCE_STATE_PASSWORD', required=True) +@click.option('--force', '-f', help='Bypasses the confirmation prompt. Download the artifacts directly.', + is_flag=True) +@add_options(recce_options) +def download_artifacts(**kwargs): + """ + Download the dbt artifacts from cloud + + Download the dbt artifacts (metadata.json, catalog.json) from Recce Cloud for the given branch. + The password is used to decrypt the dbt artifacts in the cloud. + + By default, the artifacts are downloaded from the current branch. You can specify the branch using the --branch option. + The target path is set to 'target' by default. You can specify the target path using the --target-path option. + """ + from rich.console import Console + console = Console() + cloud_token = kwargs.get('cloud_token') + password = kwargs.get('password') + target_path = kwargs.get('target_path') + branch = kwargs.get('branch') + return _download_artifacts(branch, cloud_token, console, kwargs, password, target_path) + + +@cloud.command(cls=TrackCommand) +@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING, + envvar='GITHUB_TOKEN') +@click.option('--branch', '-b', help='The branch of the selected artifacts.', type=click.STRING, + envvar='GITHUB_BASE_REF', default=current_default_branch(), show_default=True) +@click.option('--target-path', help='The dbt artifacts directory for your artifacts.', type=click.STRING, + default='target-base', show_default=True) +@click.option('--password', '-p', help='The password to decrypt the dbt artifacts in cloud.', type=click.STRING, + envvar='RECCE_STATE_PASSWORD', required=True) +@click.option('--force', '-f', help='Bypasses the confirmation prompt. Download the artifacts directly.', + is_flag=True) +def download_base_artifacts(**kwargs): + """ + Download the base dbt artifacts from cloud + + Download the base dbt artifacts (metadata.json, catalog.json) from Recce Cloud. + This is useful when you start to set up the base dbt artifacts for the first time. + + Please make sure you have uploaded the dbt artifacts before downloading them. + """ + from rich.console import Console + console = Console() + cloud_token = kwargs.get('cloud_token') + password = kwargs.get('password') + target_path = kwargs.get('target_path') + branch = kwargs.get('branch') + return _download_artifacts(branch, cloud_token, console, kwargs, password, target_path) + + @cli.group('github', short_help='GitHub related commands', hidden=True) def github(**kwargs): pass