Skip to content

Commit

Permalink
[Update] Rename the command upload/donwload artifact to artifacts
Browse files Browse the repository at this point in the history
- Add new command `donwload-base-artifacts` for the users who develeop
  dbt projects. The branch will be default branch and target path will
  be `target-base`

Signed-off-by: Kent Huang <[email protected]>
  • Loading branch information
kentwelcome committed Dec 5, 2024
1 parent ff25493 commit a4656a9
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 84 deletions.
88 changes: 46 additions & 42 deletions recce/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
from recce.util.recce_cloud import RecceCloud, PresignedUrlMethod


def verify_artifact_path(target_path: str) -> bool:
def verify_artifacts_path(target_path: str) -> bool:
"""
Verify if the target path is a valid artifact path.
Verify if the target path is a valid artifacts path.
:param target_path: the target path to check
:return: True if the target path is a valid artifact path, False otherwise
:return: True if the target path is a valid artifacts path, False otherwise
"""
if not target_path:
return False
Expand All @@ -30,12 +30,12 @@ def verify_artifact_path(target_path: str) -> bool:
if not os.path.isdir(target_path):
return False

required_artifact_files = [
required_artifacts_files = [
'manifest.json',
'catalog.json'
]

if all(f in os.listdir(target_path) for f in required_artifact_files):
if all(f in os.listdir(target_path) for f in required_artifacts_files):
# Check if the required files are present in the target path
return True

Expand All @@ -50,8 +50,8 @@ def parse_dbt_version(file_path: str) -> str:
return dbt_version


def archive_artifact(target_path: str) -> (str, str):
if verify_artifact_path(target_path) is False:
def archive_artifacts(target_path: str) -> (str, str):
if verify_artifacts_path(target_path) is False:
raise Exception(f'Invalid target path: {target_path}')

manifest_path = os.path.join(target_path, 'manifest.json')
Expand All @@ -61,55 +61,55 @@ def archive_artifact(target_path: str) -> (str, str):
if dbt_version is None:
raise Exception('Failed to parse dbt version from manifest.json')

# prepare the temporary artifact path
# prepare the temporary artifacts path
tmp_dir = tempfile.mkdtemp()
artifact_tar_path = os.path.join(tmp_dir, 'dbt_artifact.tar')
artifact_tar_gz_path = artifact_tar_path + '.gz'
artifacts_tar_path = os.path.join(tmp_dir, 'dbt_artifacts.tar')
artifacts_tar_gz_path = artifacts_tar_path + '.gz'

with tarfile.open(artifact_tar_path, 'w') as tar:
with tarfile.open(artifacts_tar_path, 'w') as tar:
tar.add(manifest_path, arcname='manifest.json')
tar.add(catalog_path, arcname='catalog.json')

# Compress the tar file
with open(artifact_tar_path, 'rb') as f_in, gzip.open(artifact_tar_gz_path, 'wb') as f_out:
with open(artifacts_tar_path, 'rb') as f_in, gzip.open(artifacts_tar_gz_path, 'wb') as f_out:
f_out.writelines(f_in)

# Clean up the temporary directory
try:
os.remove(artifact_tar_path)
os.remove(artifacts_tar_path)
except FileNotFoundError:
pass

return artifact_tar_gz_path, dbt_version
return artifacts_tar_gz_path, dbt_version


def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str, debug: bool = False):
def upload_dbt_artifacts(target_path: str, branch: str, token: str, password: str, debug: bool = False):
console = Console()
if verify_artifact_path(target_path) is False:
if verify_artifacts_path(target_path) is False:
console.print(f"[[red]Error[/red]] Invalid target path: {target_path}")
console.print("Please provide a valid target path containing manifest.json and catalog.json.")
return 1

if branch != current_branch():
console.print(
f"[[yellow]Warning[/yellow]] You are uploading the dbt artifact as branch '{branch}'. "
f"[[yellow]Warning[/yellow]] You are uploading the dbt artifacts as branch '{branch}'. "
f"However, the current branch is '{current_branch()}'."
)
console.print("Please make sure you are uploading the dbt artifact to the correct branch.")
console.print("Please make sure you are uploading the dbt artifacts to the correct branch.")

compress_file_path, dbt_version = archive_artifact(target_path)
compress_file_path, dbt_version = archive_artifacts(target_path)
repo = hosting_repo()
sha = commit_hash_from_branch(branch)
metadata = {
'commit': sha,
'dbt_version': dbt_version
}

# Get the presigned URL for uploading the artifact
# Get the presigned URL for uploading the artifacts
presigned_url = RecceCloud(token).get_presigned_url(
method=PresignedUrlMethod.UPLOAD,
repository=repo,
artifact_name='dbt_artifact.tar.gz',
artifact_name='dbt_artifacts.tar.gz',
branch=branch,
metadata=metadata
)
Expand All @@ -122,8 +122,9 @@ def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str
console.print(f'Artifact path: {compress_file_path}')
console.print(f'DBT version: {dbt_version}')
console.print(f'Presigned URL: {presigned_url}')
console.print(f'Uploading the dbt artifacts from path "{target_path}" to branch "{branch}"')

# Upload the compressed artifact
# Upload the compressed artifacts

headers = s3_sse_c_headers(password)
if metadata:
Expand All @@ -132,62 +133,65 @@ def upload_dbt_artifact(target_path: str, branch: str, token: str, password: str
if response.status_code != 200:
raise Exception({response.text})

# Clean up the compressed artifact
# Clean up the compressed artifacts
try:
# Remove the compressed artifact
# Remove the compressed artifacts
os.remove(compress_file_path)
# Clean up the temporary directory
os.rmdir(os.path.dirname(compress_file_path))
except FileNotFoundError:
pass


def download_dbt_artifact(target_path: str, branch: str, token: str, password: str,
force: bool = False,
debug: bool = False):
def download_dbt_artifacts(target_path: str, branch: str, token: str, password: str,
force: bool = False,
debug: bool = False):
console = Console()
repo = hosting_repo()

if os.path.exists(target_path):
if not force:
raise Exception(
f'Path {target_path} already exists. Please provide a new path or use \'--force\' option to overwrite the existing folder.')
console.print(f'[[yellow]Warning[/yellow]] Overwrite existing path: {target_path}')
shutil.rmtree(target_path)

os.mkdir(target_path)
sha = None
dbt_version = None

presigned_url, tags = RecceCloud(token).get_download_presigned_url_with_tags(
repository=repo,
artifact_name='dbt_artifact.tar.gz',
artifact_name='dbt_artifacts.tar.gz',
branch=branch,
)
sha = tags.get('commit')
dbt_version = tags.get('dbt_version')
if tags:
sha = tags.get('commit')
dbt_version = tags.get('dbt_version')

if debug:
console.rule('Debug information', style='blue')
console.print(f'Git Branch: {branch}')
console.print(f'Git Commit: {sha}')
console.print(f'GitHub repository: {repo}')
console.print(f'DBT version: {dbt_version}')
console.print(f'Downloading from branch: "{branch}" and extracting to "{target_path}"')

headers = s3_sse_c_headers(password)
response = requests.get(presigned_url, headers=headers)

if response.status_code != 200:
raise Exception(response.text)

tar_gz_file = os.path.join(target_path, 'dbt_artifact.tar.gz')
if os.path.exists(target_path):
if not force:
raise Exception(
f'Path {target_path} already exists. Please provide a new path or use \'--force\' option to overwrite the existing folder.')
console.print(f'[[yellow]Warning[/yellow]] Overwrite existing path: {target_path}')
shutil.rmtree(target_path)
os.mkdir(target_path)

tar_gz_file = os.path.join(target_path, 'dbt_artifacts.tar.gz')
with open(tar_gz_file, 'wb') as f:
f.write(response.content)

with tarfile.open(tar_gz_file, 'r') as tar:
tar.extractall(path=target_path)

# Clean up the compressed artifact
# Clean up the compressed artifacts
try:
# Remove the compressed artifact
# Remove the compressed artifacts
os.remove(tar_gz_file)
except FileNotFoundError:
pass
Expand Down
130 changes: 88 additions & 42 deletions recce/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import uvicorn

from recce import event
from recce.artifact import upload_dbt_artifact, download_dbt_artifact
from recce.artifact import upload_dbt_artifacts, download_dbt_artifacts
from recce.config import RecceConfig, RECCE_CONFIG_FILE, RECCE_ERROR_LOG_FILE
from recce.git import current_branch, current_default_branch
from recce.run import cli_run, check_github_ci_env
Expand Down Expand Up @@ -567,15 +567,22 @@ def download(**kwargs):
@cloud.command(cls=TrackCommand)
@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING,
envvar='GITHUB_TOKEN')
@click.option('--branch', '-b', help='The branch of the provided artifact.', type=click.STRING,
envvar='GITHUB_HEAD_REF', default=current_branch())
@click.option('--target-path', help='dbt artifacts directory for your artifact.', type=click.STRING, default='target')
@click.option('--password', '-p', help='The password to encrypt the dbt artifact in cloud.', type=click.STRING,
@click.option('--branch', '-b', help='The branch of the provided artifacts.', type=click.STRING,
envvar='GITHUB_HEAD_REF', default=current_branch(), show_default=True)
@click.option('--target-path', help='dbt artifacts directory for your artifacts.', type=click.STRING, default='target',
show_default=True)
@click.option('--password', '-p', help='The password to encrypt the dbt artifacts in cloud.', type=click.STRING,
envvar='RECCE_STATE_PASSWORD', required=True)
@add_options(recce_options)
def upload_artifact(**kwargs):
def upload_artifacts(**kwargs):
"""
Upload the dbt artifact to cloud
Upload the dbt artifacts to cloud
Upload the dbt artifacts (metadata.json, catalog.json) to Recce Cloud for the given branch.
The password is used to encrypt the dbt artifacts in the cloud. You will need the password to download the dbt artifacts.
By default, the artifacts are uploaded to the current branch. You can specify the branch using the --branch option.
The target path is set to 'target' by default. You can specify the target path using the --target-path option.
"""
from rich.console import Console
console = Console()
Expand All @@ -585,67 +592,106 @@ def upload_artifact(**kwargs):
branch = kwargs.get('branch')

try:
rc = upload_dbt_artifact(target_path, branch=branch,
token=cloud_token, password=password,
debug=kwargs.get('debug', False))
rc = upload_dbt_artifacts(target_path, branch=branch,
token=cloud_token, password=password,
debug=kwargs.get('debug', False))
console.rule('Uploaded Successfully')
console.print(
f'Uploaded dbt artifact to Recce Cloud for branch "{branch}" from "{os.path.abspath(target_path)}"')
f'Uploaded dbt artifacts to Recce Cloud for branch "{branch}" from "{os.path.abspath(target_path)}"')
except Exception as e:
console.rule('Failed to Upload', style='red')
console.print("[[red]Error[/red]] Failed to upload the dbt artifact to cloud.")
console.print("[[red]Error[/red]] Failed to upload the dbt artifacts to cloud.")
console.print(f"Reason: {e}")
rc = 1
return rc


@cloud.command(cls=TrackCommand)
@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING,
envvar='GITHUB_TOKEN')
@click.option('--branch', '-b', help='The branch of the selected artifact.', type=click.STRING,
envvar='GITHUB_BASE_REF', default=current_default_branch())
@click.option('--target-path', help='The dbt artifacts directory for your artifact.', type=click.STRING,
default='target-base')
@click.option('--password', '-p', help='The password to encrypt the dbt artifact in cloud.', type=click.STRING,
envvar='RECCE_STATE_PASSWORD', required=True)
@click.option('--force', '-f', help='Bypasses the confirmation prompt. Download the artifact directly.',
is_flag=True)
@add_options(recce_options)
def download_artifact(**kwargs):
"""
Download the dbt artifact to cloud
"""
from rich.console import Console
console = Console()
cloud_token = kwargs.get('cloud_token')
password = kwargs.get('password')
target_path = kwargs.get('target_path')
branch = kwargs.get('branch')
def _download_artifacts(branch, cloud_token, console, kwargs, password, target_path):
try:
rc = download_dbt_artifact(target_path, branch=branch, token=cloud_token, password=password,
force=kwargs.get('force', False),
debug=kwargs.get('debug', False))
rc = download_dbt_artifacts(target_path, branch=branch, token=cloud_token, password=password,
force=kwargs.get('force', False),
debug=kwargs.get('debug', False))
console.rule('Downloaded Successfully')
console.print(
f'Downloaded dbt artifact from Recce Cloud for branch "{branch}" to "{os.path.abspath(target_path)}"')
f'Downloaded dbt artifacts from Recce Cloud for branch "{branch}" to "{os.path.abspath(target_path)}"')
except Exception as e:
console.rule('Failed to Download', style='red')
console.print("[[red]Error[/red]] Failed to download the dbt artifact from cloud.")
console.print("[[red]Error[/red]] Failed to download the dbt artifacts from cloud.")
reason = str(e)

if 'Requests specifying Server Side Encryption with Customer provided keys must provide the correct secret key' in reason:
console.print("Reason: Decryption failed due to incorrect password.")
console.print(
"Please provide the correct password to decrypt the dbt artifact. Or re-upload the dbt artifact with a new password.")
"Please provide the correct password to decrypt the dbt artifacts. Or re-upload the dbt artifacts with a new password.")
elif 'The specified key does not exist' in reason:
console.print("Reason: The dbt artifact is not found in the cloud.")
console.print("Please upload the dbt artifact to the cloud before downloading it.")
console.print("Reason: The dbt artifacts is not found in the cloud.")
console.print("Please upload the dbt artifacts to the cloud before downloading it.")
else:
console.print(f"Reason: {reason}")
rc = 1
return rc


@cloud.command(cls=TrackCommand)
@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING,
envvar='GITHUB_TOKEN')
@click.option('--branch', '-b', help='The branch of the selected artifacts.', type=click.STRING,
envvar='GITHUB_BASE_REF', default=current_branch(), show_default=True)
@click.option('--target-path', help='The dbt artifacts directory for your artifacts.', type=click.STRING,
default='target', show_default=True)
@click.option('--password', '-p', help='The password to decrypt the dbt artifacts in cloud.', type=click.STRING,
envvar='RECCE_STATE_PASSWORD', required=True)
@click.option('--force', '-f', help='Bypasses the confirmation prompt. Download the artifacts directly.',
is_flag=True)
@add_options(recce_options)
def download_artifacts(**kwargs):
"""
Download the dbt artifacts from cloud
Download the dbt artifacts (metadata.json, catalog.json) from Recce Cloud for the given branch.
The password is used to decrypt the dbt artifacts in the cloud.
By default, the artifacts are downloaded from the current branch. You can specify the branch using the --branch option.
The target path is set to 'target' by default. You can specify the target path using the --target-path option.
"""
from rich.console import Console
console = Console()
cloud_token = kwargs.get('cloud_token')
password = kwargs.get('password')
target_path = kwargs.get('target_path')
branch = kwargs.get('branch')
return _download_artifacts(branch, cloud_token, console, kwargs, password, target_path)


@cloud.command(cls=TrackCommand)
@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING,
envvar='GITHUB_TOKEN')
@click.option('--branch', '-b', help='The branch of the selected artifacts.', type=click.STRING,
envvar='GITHUB_BASE_REF', default=current_default_branch(), show_default=True)
@click.option('--target-path', help='The dbt artifacts directory for your artifacts.', type=click.STRING,
default='target-base', show_default=True)
@click.option('--password', '-p', help='The password to decrypt the dbt artifacts in cloud.', type=click.STRING,
envvar='RECCE_STATE_PASSWORD', required=True)
@click.option('--force', '-f', help='Bypasses the confirmation prompt. Download the artifacts directly.',
is_flag=True)
def download_base_artifacts(**kwargs):
"""
Download the base dbt artifacts from cloud
Download the base dbt artifacts (metadata.json, catalog.json) from Recce Cloud.
This is useful when you start to set up the base dbt artifacts for the first time.
Please make sure you have uploaded the dbt artifacts before downloading them.
"""
from rich.console import Console
console = Console()
cloud_token = kwargs.get('cloud_token')
password = kwargs.get('password')
target_path = kwargs.get('target_path')
branch = kwargs.get('branch')
return _download_artifacts(branch, cloud_token, console, kwargs, password, target_path)


@cli.group('github', short_help='GitHub related commands', hidden=True)
def github(**kwargs):
pass
Expand Down

0 comments on commit a4656a9

Please sign in to comment.