Skip to content

Commit

Permalink
Implement Docker image cache cross-environment support
Browse files Browse the repository at this point in the history
      - Name Docker images based on the branch hash value (shasum)
      - Add setup.py PEP-440 workaround for common package
      - Reorganize GHA jobs
      - Utilize Docker registry cache for image cache layers sharing
      - Use separate Docker images (_cache) for caching
      - Optimize Docker images size (foreman, salmon)
      - Add remote builder support (used by default in GHA jobs)
      - Set umask explicitly to avoid cache miss due to permissions diff
      - Introduce API base image, derive api local/production from it
      - Reformat affected files for better readability
      - Use better names for scripts/variables
      - Consolidate renv related scripts location (use common/ everywhere)
      - Retire -d option of `prepare_image.sh`
      - Delete affymetrix_local image as no longer used
        (affymetrix cache boost satisfies our tests execution time
        requirements)
      - Sort .dockerignore entries
      - Update .pre-commit-config.yaml
      - Update README
  • Loading branch information
arkid15r committed Jun 13, 2023
1 parent 529080f commit 6071787
Show file tree
Hide file tree
Showing 69 changed files with 1,869 additions and 1,545 deletions.
21 changes: 10 additions & 11 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
volume
*/volume
test_volume
*/test_volume
volume
.git
*/batch-job-specs
*/test_volume
*/volume
config/externally_supplied_metadata/metasra/*.tab
config/externally_supplied_metadata/metasra/metasra_keywords.json
config/externally_supplied_metadata/metasra/metasra_translated.json
config/externally_supplied_metadata/metasra/SRAmetadb.sqlite
dr_env
volumes_postgres/
env/
infrastructure/
terraform/
*/batch-job-specs
config/externally_supplied_metadata/metasra/metasra_translated.json
config/externally_supplied_metadata/metasra/metasra_keywords.json
config/externally_supplied_metadata/metasra/*.tab
config/externally_supplied_metadata/metasra/SRAmetadb.sqlite
test_volume
volume
volumes_postgres/
9 changes: 8 additions & 1 deletion .github/scripts/cleanup_instance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ sudo apt-get remove -y '^ghc-8.*'
sudo apt-get remove -y '^dotnet-.*'
sudo apt-get remove -y '^llvm-.*'
sudo apt-get remove -y 'php.*'
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel
sudo apt-get remove -y \
azure-cli \
firefox \
google-chrome-stable \
google-cloud-sdk \
hhvm \
mono-devel \
powershell
sudo apt-get autoremove -y
sudo apt-get clean

Expand Down
6 changes: 3 additions & 3 deletions .github/scripts/filter_tests.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/bin/bash

# Exit on failure
# Exit on failure.
set -e

git log --format=oneline -n 1 "$GITHUB_SHA"
if [[ $(git log --format=oneline -n 1 "$GITHUB_SHA") = *"noslow"* ]]; then
echo "Skipping slow tests..";
echo "Skipping slow tests..."
./workers/run_tests.sh --exclude-tag=slow "$@"
else
echo "Running all tests..";
echo "Running all tests..."
./workers/run_tests.sh "$@"
fi

Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/post_deploy_cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
ssh -o StrictHostKeyChecking=no \
-o ServerAliveInterval=15 \
-i infrastructure/data-refinery-key.pem \
ubuntu@"${DEPLOY_IP_ADDRESS}" "cd refinebio && git clean -f"
"ubuntu@${DEPLOY_IP_ADDRESS}" \
"cd refinebio && git clean -f"
5 changes: 3 additions & 2 deletions .github/scripts/pull_docker_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

set -e

REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]')
if [ -z "$IMAGES" ]; then
echo "Error: must put images to pull in \$IMAGES" >&2
exit 1
fi

REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]')

for image in $IMAGES; do
PACKAGE="$REPO/dr_$image"
# Only pull the package if it already exists
# Only pull the package if it already exists.
(docker pull "$PACKAGE" && docker tag "$PACKAGE" "ccdlstaging/dr_$image") || true
done
5 changes: 4 additions & 1 deletion .github/scripts/push_docker_images.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#!/bin/sh

REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]')
set -e

if [ -z "$IMAGES" ]; then
echo "Error: must put images to pull in \$IMAGES" >&2
exit 1
fi

REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]')

for image in $IMAGES; do
PACKAGE="$REPO/dr_$image"
docker tag "ccdlstaging/dr_$image" "$PACKAGE"
Expand Down
21 changes: 10 additions & 11 deletions .github/scripts/remote_deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@
# - AWS_ACCESS_KEY_ID -- The AWS key id to use when interacting with AWS.
# - AWS_SECRET_ACCESS_KEY -- The AWS secret key to use when interacting with AWS.


echo "$INSTANCE_SSH_KEY" > infrastructure/data-refinery-key.pem
echo "$INSTANCE_SSH_KEY" >infrastructure/data-refinery-key.pem
chmod 600 infrastructure/data-refinery-key.pem

run_on_deploy_box () {
run_on_deploy_box() {
# shellcheck disable=SC2029
ssh -o StrictHostKeyChecking=no \
-o ServerAliveInterval=15 \
Expand All @@ -32,7 +31,7 @@ run_on_deploy_box () {

# Create file containing local env vars that are needed for deploy.
rm -f env_vars
cat >> env_vars <<EOF
cat >>env_vars <<EOF
export CI_TAG='$CI_TAG'
export DOCKER_ID='$DOCKER_ID'
export DOCKER_PASSWD='$DOCKER_PASSWD'
Expand Down Expand Up @@ -69,18 +68,18 @@ echo "Building new images"
run_on_deploy_box "sudo touch /var/log/docker_update_$CI_TAG.log"
run_on_deploy_box "sudo chown ubuntu:ubuntu /var/log/docker_update_$CI_TAG.log"
run_on_deploy_box "source env_vars && echo -e '######\nBuilding new images for $CI_TAG\n######' 2>&1 | tee -a /var/log/docker_update_$CI_TAG.log"
run_on_deploy_box "source env_vars && ./.github/scripts/update_docker_img.sh 2>&1 | tee -a /var/log/docker_update_$CI_TAG.log"
run_on_deploy_box "source env_vars && ./.github/scripts/update_docker_image.sh 2>&1 | tee -a /var/log/docker_update_$CI_TAG.log"
run_on_deploy_box "source env_vars && echo -e '######\nFinished building new images for $CI_TAG\n######' 2>&1 | tee -a /var/log/docker_update_$CI_TAG.log"

# Load docker_img_exists function and $ALL_CCDL_IMAGES
source scripts/common.sh
# Load docker_image_exists function and $ALL_IMAGES.
. ./scripts/common.sh

if [[ "$MASTER_OR_DEV" == "master" ]]; then
DOCKERHUB_REPO=ccdl
elif [[ "$MASTER_OR_DEV" == "dev" ]]; then
DOCKERHUB_REPO=ccdlstaging
else
echo "Why in the world was remote_deploy.sh called from a branch other than dev or master?!?!?"
echo "Why in the world was remote_deploy.sh called from a branch other than dev or master?!"
exit 1
fi

Expand All @@ -89,10 +88,10 @@ fi
# https://github.com/AlexsLemonade/refinebio/issues/784
# Since it's not clear how that happened, the safest thing is to add
# an explicit check that the Docker images were successfully updated.
for IMAGE in $ALL_CCDL_IMAGES; do
for IMAGE in $ALL_IMAGES; do
image_name="$DOCKERHUB_REPO/dr_$IMAGE"
if ! docker_img_exists "$image_name" "$CI_TAG"; then
echo "Docker image $image_name:$CI_TAG doesn't exist after running update_docker_img.sh!"
if ! docker_image_exists "$image_name" "$CI_TAG"; then
echo "Docker image $image_name:$CI_TAG doesn't exist after running update_docker_image.sh!"
echo "This is generally caused by a temporary error, please try the 'Rerun workflow' button."
exit 1
fi
Expand Down
12 changes: 5 additions & 7 deletions .github/scripts/run_terraform.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
#!/bin/bash -e

# Import Hashicorps' Key.
# Import Hashicorps' key.
curl https://keybase.io/hashicorp/pgp_keys.asc | gpg --import


# Install terraform and nomad
# Install Terraform.
cd
TERRAFORM_VERSION=0.13.5
wget -N https://releases.hashicorp.com/terraform/$TERRAFORM_VERSION/terraform_${TERRAFORM_VERSION}_linux_amd64.zip
wget -N https://releases.hashicorp.com/terraform/$TERRAFORM_VERSION/terraform_${TERRAFORM_VERSION}_SHA256SUMS
wget -N https://releases.hashicorp.com/terraform/$TERRAFORM_VERSION/terraform_${TERRAFORM_VERSION}_SHA256SUMS.sig


# Verify the signature file is untampered.
gpg_ok=$(gpg --verify terraform_${TERRAFORM_VERSION}_SHA256SUMS.sig terraform_${TERRAFORM_VERSION}_SHA256SUMS |& grep Good)
if [[ "$gpg_ok" == "" ]]; then
Expand All @@ -32,17 +30,17 @@ sudo mv terraform /usr/local/bin/
cd ~/refinebio/infrastructure

# Circle won't set the branch name for us, so do it ourselves.
source ~/refinebio/scripts/common.sh
branch=$(get_master_or_dev "$CI_TAG")
. ~/refinebio/scripts/common.sh

branch=$(get_master_or_dev "$CI_TAG")
if [[ $branch == "master" ]]; then
ENVIRONMENT=prod
BATCH_USE_ON_DEMAND_INSTANCES="false"
elif [[ $branch == "dev" ]]; then
ENVIRONMENT=staging
BATCH_USE_ON_DEMAND_INSTANCES="true"
else
echo "Why in the world was run_terraform.sh called from a branch other than dev or master?!?!?"
echo "Why in the world was run_terraform.sh called from a branch other than dev or master?!"
exit 1
fi

Expand Down
19 changes: 8 additions & 11 deletions .github/scripts/slackpost_deploy.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
#!/bin/bash

if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]]
then
if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]]; then
echo "No webhook url. Set ENGAGEMENTBOT_WEBHOOK in the environment variables if you want to be notified of deploys on slack"
exit 0
fi

# ------------
channel=$1
if [[ $channel == "" ]]
then
echo "No channel specified"
exit 1
if [[ $channel == "" ]]; then
echo "No channel specified"
exit 1
fi

# ------------
shift
username=$1
if [[ $username == "" ]]
then
echo "No username specified"
exit 1
if [[ $username == "" ]]; then
echo "No username specified"
exit 1
fi

# ------------
Expand All @@ -35,7 +32,7 @@ fi

text="New deployment! Woo! $CI_USERNAME: $CI_BRANCH $CI_TAG"

escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g" )
escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g")

json="{\"channel\": \"$channel\", \"username\":\"$username\", \"icon_emoji\":\":tada:\", \"attachments\":[{\"color\":\"danger\" , \"text\": \"$escapedText\"}]}"

Expand Down
19 changes: 8 additions & 11 deletions .github/scripts/slackpost_end_to_end.sh
Original file line number Diff line number Diff line change
@@ -1,31 +1,28 @@
#!/bin/bash

if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]]
then
if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]]; then
echo "No webhook url. Set ENGAGEMENTBOT_WEBHOOK in the environment variables if you want to be notified of deploys on slack"
exit 0
fi

# ------------
channel=$1
if [[ $channel == "" ]]
then
echo "No channel specified"
exit 1
if [[ $channel == "" ]]; then
echo "No channel specified"
exit 1
fi

# ------------
shift
username=$1
if [[ $username == "" ]]
then
echo "No username specified"
exit 1
if [[ $username == "" ]]; then
echo "No username specified"
exit 1
fi

text="The end-to-end tests passed in the staging stack!!!"

escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g" )
escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g")

json="{\"channel\": \"$channel\", \"username\":\"$username\", \"icon_emoji\":\":tada:\", \"attachments\":[{\"color\":\"danger\" , \"text\": \"$escapedText\"}]}"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash
set -e

# Load docker_img_exists function and $CCDL_WORKER_IMAGES
source ~/refinebio/scripts/common.sh
# Load docker_image_exists function and $WORKER_IMAGES.
. ~/refinebio/scripts/common.sh

# Github won't set the branch name for us, so do it ourselves.
branch=$(get_master_or_dev "$CI_TAG")
Expand All @@ -12,71 +12,74 @@ if [[ "$branch" == "master" ]]; then
elif [[ "$branch" == "dev" ]]; then
DOCKERHUB_REPO=ccdlstaging
else
echo "Why in the world was update_docker_img.sh called from a branch other than dev or master?!?!?"
echo "Why in the world was update_docker_image.sh called from a branch other than dev or master!?"
exit 1
fi

echo "$CI_TAG" > ~/refinebio/common/version
echo "$CI_TAG" >~/refinebio/common/version

# Create ~/refinebio/common/dist/data-refinery-common-*.tar.gz, which is
# required by the workers and data_refinery_foreman images.
## Remove old common distributions if they exist
## Remove old common distributions if they exist.
rm -f ~/refinebio/common/dist/*
cd ~/refinebio/common && python3 setup.py sdist

# Log into DockerHub
# Log into DockerHub.
docker login -u "$DOCKER_ID" -p "$DOCKER_PASSWD"

cd ~/refinebio
for IMAGE in $CCDL_WORKER_IMAGES; do
for IMAGE in $WORKER_IMAGES; do
image_name="$DOCKERHUB_REPO/dr_$IMAGE"
if docker_img_exists "$image_name" "$CI_TAG"; then
if docker_image_exists "$image_name" "$CI_TAG"; then
echo "Docker image exists, skipping: $image_name:$CI_TAG"
else
echo "Building docker image: $image_name:$CI_TAG"
# Build and push image. We use the CI_TAG as the system version.
docker build \
-t "$image_name:$CI_TAG" \
-f "workers/dockerfiles/Dockerfile.$IMAGE" \
--build-arg SYSTEM_VERSION="$CI_TAG" .
--build-arg SYSTEM_VERSION="$CI_TAG" \
--file "workers/dockerfiles/Dockerfile.$IMAGE" \
--tag "$image_name:$CI_TAG" \
.
docker push "$image_name:$CI_TAG"
# Update latest version
# Update latest version.
docker tag "$image_name:$CI_TAG" "$image_name:latest"
docker push "$image_name:latest"

# Save some space when we're through
# Save some space when we're through.
docker rmi "$image_name:$CI_TAG"
fi
done

# Build and push foreman image
# Build and push foreman image.
FOREMAN_DOCKER_IMAGE="$DOCKERHUB_REPO/dr_foreman"
if docker_img_exists "$FOREMAN_DOCKER_IMAGE" "$CI_TAG"; then
if docker_image_exists "$FOREMAN_DOCKER_IMAGE" "$CI_TAG"; then
echo "Docker image exists, skipping: $FOREMAN_DOCKER_IMAGE:$CI_TAG"
else
# Build and push image. We use the CI_TAG as the system version.
docker build \
-t "$FOREMAN_DOCKER_IMAGE:$CI_TAG" \
-f foreman/dockerfiles/Dockerfile.foreman \
--build-arg SYSTEM_VERSION="$CI_TAG" .
--build-arg SYSTEM_VERSION="$CI_TAG" \
--file foreman/dockerfiles/Dockerfile.foreman \
--tag "$FOREMAN_DOCKER_IMAGE:$CI_TAG" \
.
docker push "$FOREMAN_DOCKER_IMAGE:$CI_TAG"
# Update latest version
# Update latest version.
docker tag "$FOREMAN_DOCKER_IMAGE:$CI_TAG" "$FOREMAN_DOCKER_IMAGE:latest"
docker push "$FOREMAN_DOCKER_IMAGE:latest"
fi

# Build and push API image
# Build and push API image.
API_DOCKER_IMAGE="$DOCKERHUB_REPO/dr_api"
if docker_img_exists "$API_DOCKER_IMAGE" "$CI_TAG"; then
if docker_image_exists "$API_DOCKER_IMAGE" "$CI_TAG"; then
echo "Docker image exists, skipping: $API_DOCKER_IMAGE:$CI_TAG"
else
# Build and push image. We use the CI_TAG as the system version.
docker build \
-t "$API_DOCKER_IMAGE:$CI_TAG" \
-f api/dockerfiles/Dockerfile.api_production \
--build-arg SYSTEM_VERSION="$CI_TAG" .
--build-arg SYSTEM_VERSION="$CI_TAG" \
--file api/dockerfiles/Dockerfile.api_production \
--tag "$API_DOCKER_IMAGE:$CI_TAG" \
.
docker push "$API_DOCKER_IMAGE:$CI_TAG"
# Update latest version
# Update latest version.
docker tag "$API_DOCKER_IMAGE:$CI_TAG" "$API_DOCKER_IMAGE:latest"
docker push "$API_DOCKER_IMAGE:latest"
fi
Loading

0 comments on commit 6071787

Please sign in to comment.