Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support alternative S3 endpoints #598

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion WDL/runtime/config_templates/default.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ docker = ghcr.io/miniwdl-ext/miniwdl-tools:Id_sha256_73733b619d46f8a3f75229fb6d0
host_credentials = false
# see: https://github.com/chanzuckerberg/miniwdl/tree/main/tools_image
docker = ghcr.io/miniwdl-ext/miniwdl-tools:Id_sha256_73733b619d46f8a3f75229fb6d0b3f61868d159be5fb0ddc3a04613feab9ad18

# Specify endpoint to use a non-AWS S3 endpoint
#endpoint = <alternative endpoint>

[download_gsutil]
# current version from https://github.com/GoogleCloudPlatform/cloud-sdk-docker/releases
Expand Down
25 changes: 19 additions & 6 deletions WDL/runtime/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,11 @@ def aria2c_downloader(
def awscli_downloader(
cfg: config.Loader, logger: logging.Logger, uri: str, **kwargs
) -> Generator[Dict[str, Any], Dict[str, Any], None]:
inputs: Dict[str, Any] = {"uri": uri, "docker": cfg["download_awscli"]["docker"]}
inputs: Dict[str, Any] = {
"uri": uri,
"docker": cfg["download_awscli"]["docker"],
"endpoint": cfg["download_awscli"].get("endpoint"),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This get() throws unless either we provide a non-None default (second argument), or keep endpoint defined (not commented out) in the default.cfg. The default.cfg could just have endpoint= to define it to an empty string (but then that has to be detected in the tasks WDL)

}
with ExitStack() as cleanup:
inputs["aws_credentials"] = prepare_aws_credentials(cfg, logger, cleanup)

Expand All @@ -211,9 +215,12 @@ def awscli_downloader(
input {
String uri
String docker
String? endpoint
File? aws_credentials
}

String endpoint_arg = if defined(endpoint) then "--endpoint-url ~{select_first([endpoint])}" else ""

command <<<
set -euo pipefail
if [ -n "~{aws_credentials}" ]; then
Expand All @@ -223,15 +230,15 @@ def awscli_downloader(
export AWS_MAX_ATTEMPTS=5
set -x
mkdir __out
if ! aws s3 cp "~{uri}" __out/ ; then
if ! aws ~{endpoint_arg} s3 cp "~{uri}" __out/ ; then
# Retry with --no-sign-request in case the object is public. Without this flag,
# the previous invocation could have failed either because (i) no AWS
# credentials are available or (ii) the IAM policy restricts accessible S3
# buckets regardless of whether the desired object is public.
rm -f __out/*
>&2 echo 'Retrying with --no-sign-request in case the object is public.' \
' If the overall operation fails, the real error may precede this message.'
aws s3 cp --no-sign-request "~{uri}" __out/
aws ~{endpoint_arg} s3 cp --no-sign-request "~{uri}" __out/
fi
>>>

Expand All @@ -251,7 +258,11 @@ def awscli_downloader(
def awscli_directory_downloader(
cfg: config.Loader, logger: logging.Logger, uri: str, **kwargs
) -> Generator[Dict[str, Any], Dict[str, Any], None]:
inputs: Dict[str, Any] = {"uri": uri, "docker": cfg["download_awscli"]["docker"]}
inputs: Dict[str, Any] = {
"uri": uri,
"docker": cfg["download_awscli"]["docker"],
"endpoint": cfg["download_awscli"].get("endpoint"),
}
with ExitStack() as cleanup:
inputs["aws_credentials"] = prepare_aws_credentials(cfg, logger, cleanup)

Expand All @@ -260,10 +271,12 @@ def awscli_directory_downloader(
input {
String uri
String docker
String? endpoint
File? aws_credentials
}

String dnm = basename(uri, "/")
String endpoint_arg = if defined(endpoint) then "--endpoint-url ~{select_first([endpoint])}" else ""

command <<<
set -euo pipefail
Expand All @@ -274,15 +287,15 @@ def awscli_directory_downloader(
export AWS_MAX_ATTEMPTS=5
set -x
mkdir -p "__out/~{dnm}/"
if ! aws s3 cp --recursive "~{uri}" "__out/~{dnm}/" ; then
if ! aws ~{endpoint_arg} s3 cp --recursive "~{uri}" "__out/~{dnm}/" ; then
# Retry with --no-sign-request in case the object is public. Without this flag,
# the previous invocation could have failed either because (i) no AWS
# credentials are available or (ii) the IAM policy restricts accessible S3
# buckets regardless of whether the desired object is public.
rm -f "__out/~{dnm}/*"
>&2 echo 'Retrying with --no-sign-request in case the folder is public.' \
' If the overall operation fails, the real error may precede this message.'
aws s3 cp --recursive --no-sign-request "~{uri}" "__out/~{dnm}/"
aws ~{endpoint_arg} s3 cp --recursive --no-sign-request "~{uri}" "__out/~{dnm}/"
fi
>>>

Expand Down