diff --git a/.dockerignore b/.dockerignore index cfd8a7fed..c2205b57e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,16 +1,15 @@ -volume -*/volume -test_volume -*/test_volume -volume .git +*/batch-job-specs +*/test_volume +*/volume +config/externally_supplied_metadata/metasra/*.tab +config/externally_supplied_metadata/metasra/metasra_keywords.json +config/externally_supplied_metadata/metasra/metasra_translated.json +config/externally_supplied_metadata/metasra/SRAmetadb.sqlite dr_env -volumes_postgres/ env/ infrastructure/ terraform/ -*/batch-job-specs -config/externally_supplied_metadata/metasra/metasra_translated.json -config/externally_supplied_metadata/metasra/metasra_keywords.json -config/externally_supplied_metadata/metasra/*.tab -config/externally_supplied_metadata/metasra/SRAmetadb.sqlite +test_volume +volume +volumes_postgres/ diff --git a/.github/scripts/cleanup_instance.sh b/.github/scripts/cleanup_instance.sh index d270e1582..d359f6966 100755 --- a/.github/scripts/cleanup_instance.sh +++ b/.github/scripts/cleanup_instance.sh @@ -13,7 +13,14 @@ sudo apt-get remove -y '^ghc-8.*' sudo apt-get remove -y '^dotnet-.*' sudo apt-get remove -y '^llvm-.*' sudo apt-get remove -y 'php.*' -sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel +sudo apt-get remove -y \ + azure-cli \ + firefox \ + google-chrome-stable \ + google-cloud-sdk \ + hhvm \ + mono-devel \ + powershell sudo apt-get autoremove -y sudo apt-get clean diff --git a/.github/scripts/filter_tests.sh b/.github/scripts/filter_tests.sh index fca0caec3..88a3dfff5 100755 --- a/.github/scripts/filter_tests.sh +++ b/.github/scripts/filter_tests.sh @@ -1,14 +1,14 @@ #!/bin/bash -# Exit on failure +# Exit on failure. set -e git log --format=oneline -n 1 "$GITHUB_SHA" if [[ $(git log --format=oneline -n 1 "$GITHUB_SHA") = *"noslow"* ]]; then - echo "Skipping slow tests.."; + echo "Skipping slow tests..." ./workers/run_tests.sh --exclude-tag=slow "$@" else - echo "Running all tests.."; + echo "Running all tests..." ./workers/run_tests.sh "$@" fi diff --git a/.github/scripts/post_deploy_cleanup.sh b/.github/scripts/post_deploy_cleanup.sh index 467cbc50e..9901a3b26 100755 --- a/.github/scripts/post_deploy_cleanup.sh +++ b/.github/scripts/post_deploy_cleanup.sh @@ -8,4 +8,5 @@ ssh -o StrictHostKeyChecking=no \ -o ServerAliveInterval=15 \ -i infrastructure/data-refinery-key.pem \ - ubuntu@"${DEPLOY_IP_ADDRESS}" "cd refinebio && git clean -f" + "ubuntu@${DEPLOY_IP_ADDRESS}" \ + "cd refinebio && git clean -f" diff --git a/.github/scripts/pull_docker_images.sh b/.github/scripts/pull_docker_images.sh index c691a0b41..532d0ccab 100755 --- a/.github/scripts/pull_docker_images.sh +++ b/.github/scripts/pull_docker_images.sh @@ -2,14 +2,15 @@ set -e -REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]') if [ -z "$IMAGES" ]; then echo "Error: must put images to pull in \$IMAGES" >&2 exit 1 fi +REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]') + for image in $IMAGES; do PACKAGE="$REPO/dr_$image" - # Only pull the package if it already exists + # Only pull the package if it already exists. (docker pull "$PACKAGE" && docker tag "$PACKAGE" "ccdlstaging/dr_$image") || true done diff --git a/.github/scripts/push_docker_images.sh b/.github/scripts/push_docker_images.sh index 738290c95..ccd79d424 100755 --- a/.github/scripts/push_docker_images.sh +++ b/.github/scripts/push_docker_images.sh @@ -1,11 +1,14 @@ #!/bin/sh -REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]') +set -e + if [ -z "$IMAGES" ]; then echo "Error: must put images to pull in \$IMAGES" >&2 exit 1 fi +REPO=$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]') + for image in $IMAGES; do PACKAGE="$REPO/dr_$image" docker tag "ccdlstaging/dr_$image" "$PACKAGE" diff --git a/.github/scripts/remote_deploy.sh b/.github/scripts/remote_deploy.sh index 318aaac0c..aabfd0b3b 100755 --- a/.github/scripts/remote_deploy.sh +++ b/.github/scripts/remote_deploy.sh @@ -18,11 +18,10 @@ # - AWS_ACCESS_KEY_ID -- The AWS key id to use when interacting with AWS. # - AWS_SECRET_ACCESS_KEY -- The AWS secret key to use when interacting with AWS. - -echo "$INSTANCE_SSH_KEY" > infrastructure/data-refinery-key.pem +echo "$INSTANCE_SSH_KEY" >infrastructure/data-refinery-key.pem chmod 600 infrastructure/data-refinery-key.pem -run_on_deploy_box () { +run_on_deploy_box() { # shellcheck disable=SC2029 ssh -o StrictHostKeyChecking=no \ -o ServerAliveInterval=15 \ @@ -32,7 +31,7 @@ run_on_deploy_box () { # Create file containing local env vars that are needed for deploy. rm -f env_vars -cat >> env_vars <>env_vars <&1 | tee -a /var/log/docker_update_$CI_TAG.log" -run_on_deploy_box "source env_vars && ./.github/scripts/update_docker_img.sh 2>&1 | tee -a /var/log/docker_update_$CI_TAG.log" +run_on_deploy_box "source env_vars && ./.github/scripts/update_docker_image.sh 2>&1 | tee -a /var/log/docker_update_$CI_TAG.log" run_on_deploy_box "source env_vars && echo -e '######\nFinished building new images for $CI_TAG\n######' 2>&1 | tee -a /var/log/docker_update_$CI_TAG.log" -# Load docker_img_exists function and $ALL_CCDL_IMAGES -source scripts/common.sh +# Load docker_image_exists function and $ALL_IMAGES. +. ./scripts/common.sh if [[ "$MASTER_OR_DEV" == "master" ]]; then DOCKERHUB_REPO=ccdl elif [[ "$MASTER_OR_DEV" == "dev" ]]; then DOCKERHUB_REPO=ccdlstaging else - echo "Why in the world was remote_deploy.sh called from a branch other than dev or master?!?!?" + echo "Why in the world was remote_deploy.sh called from a branch other than dev or master?!" exit 1 fi @@ -89,10 +88,10 @@ fi # https://github.com/AlexsLemonade/refinebio/issues/784 # Since it's not clear how that happened, the safest thing is to add # an explicit check that the Docker images were successfully updated. -for IMAGE in $ALL_CCDL_IMAGES; do +for IMAGE in $ALL_IMAGES; do image_name="$DOCKERHUB_REPO/dr_$IMAGE" - if ! docker_img_exists "$image_name" "$CI_TAG"; then - echo "Docker image $image_name:$CI_TAG doesn't exist after running update_docker_img.sh!" + if ! docker_image_exists "$image_name" "$CI_TAG"; then + echo "Docker image $image_name:$CI_TAG doesn't exist after running update_docker_image.sh!" echo "This is generally caused by a temporary error, please try the 'Rerun workflow' button." exit 1 fi diff --git a/.github/scripts/run_terraform.sh b/.github/scripts/run_terraform.sh index 72402e520..547b8c208 100755 --- a/.github/scripts/run_terraform.sh +++ b/.github/scripts/run_terraform.sh @@ -1,17 +1,15 @@ #!/bin/bash -e -# Import Hashicorps' Key. +# Import Hashicorps' key. curl https://keybase.io/hashicorp/pgp_keys.asc | gpg --import - -# Install terraform and nomad +# Install Terraform. cd TERRAFORM_VERSION=0.13.5 wget -N https://releases.hashicorp.com/terraform/$TERRAFORM_VERSION/terraform_${TERRAFORM_VERSION}_linux_amd64.zip wget -N https://releases.hashicorp.com/terraform/$TERRAFORM_VERSION/terraform_${TERRAFORM_VERSION}_SHA256SUMS wget -N https://releases.hashicorp.com/terraform/$TERRAFORM_VERSION/terraform_${TERRAFORM_VERSION}_SHA256SUMS.sig - # Verify the signature file is untampered. gpg_ok=$(gpg --verify terraform_${TERRAFORM_VERSION}_SHA256SUMS.sig terraform_${TERRAFORM_VERSION}_SHA256SUMS |& grep Good) if [[ "$gpg_ok" == "" ]]; then @@ -32,9 +30,9 @@ sudo mv terraform /usr/local/bin/ cd ~/refinebio/infrastructure # Circle won't set the branch name for us, so do it ourselves. -source ~/refinebio/scripts/common.sh -branch=$(get_master_or_dev "$CI_TAG") +. ~/refinebio/scripts/common.sh +branch=$(get_master_or_dev "$CI_TAG") if [[ $branch == "master" ]]; then ENVIRONMENT=prod BATCH_USE_ON_DEMAND_INSTANCES="false" @@ -42,7 +40,7 @@ elif [[ $branch == "dev" ]]; then ENVIRONMENT=staging BATCH_USE_ON_DEMAND_INSTANCES="true" else - echo "Why in the world was run_terraform.sh called from a branch other than dev or master?!?!?" + echo "Why in the world was run_terraform.sh called from a branch other than dev or master?!" exit 1 fi diff --git a/.github/scripts/slackpost_deploy.sh b/.github/scripts/slackpost_deploy.sh index 9d9b501fb..707de3e17 100755 --- a/.github/scripts/slackpost_deploy.sh +++ b/.github/scripts/slackpost_deploy.sh @@ -1,26 +1,23 @@ #!/bin/bash -if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]] -then +if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]]; then echo "No webhook url. Set ENGAGEMENTBOT_WEBHOOK in the environment variables if you want to be notified of deploys on slack" exit 0 fi # ------------ channel=$1 -if [[ $channel == "" ]] -then - echo "No channel specified" - exit 1 +if [[ $channel == "" ]]; then + echo "No channel specified" + exit 1 fi # ------------ shift username=$1 -if [[ $username == "" ]] -then - echo "No username specified" - exit 1 +if [[ $username == "" ]]; then + echo "No username specified" + exit 1 fi # ------------ @@ -35,7 +32,7 @@ fi text="New deployment! Woo! $CI_USERNAME: $CI_BRANCH $CI_TAG" -escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g" ) +escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g") json="{\"channel\": \"$channel\", \"username\":\"$username\", \"icon_emoji\":\":tada:\", \"attachments\":[{\"color\":\"danger\" , \"text\": \"$escapedText\"}]}" diff --git a/.github/scripts/slackpost_end_to_end.sh b/.github/scripts/slackpost_end_to_end.sh index 7fa2e71fc..821213b6e 100755 --- a/.github/scripts/slackpost_end_to_end.sh +++ b/.github/scripts/slackpost_end_to_end.sh @@ -1,31 +1,28 @@ #!/bin/bash -if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]] -then +if [[ $ENGAGEMENTBOT_WEBHOOK == "" ]]; then echo "No webhook url. Set ENGAGEMENTBOT_WEBHOOK in the environment variables if you want to be notified of deploys on slack" exit 0 fi # ------------ channel=$1 -if [[ $channel == "" ]] -then - echo "No channel specified" - exit 1 +if [[ $channel == "" ]]; then + echo "No channel specified" + exit 1 fi # ------------ shift username=$1 -if [[ $username == "" ]] -then - echo "No username specified" - exit 1 +if [[ $username == "" ]]; then + echo "No username specified" + exit 1 fi text="The end-to-end tests passed in the staging stack!!!" -escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g" ) +escapedText=$(echo "$text" | sed 's/"/\"/g' | sed "s/'/\'/g") json="{\"channel\": \"$channel\", \"username\":\"$username\", \"icon_emoji\":\":tada:\", \"attachments\":[{\"color\":\"danger\" , \"text\": \"$escapedText\"}]}" diff --git a/.github/scripts/update_docker_img.sh b/.github/scripts/update_docker_image.sh similarity index 59% rename from .github/scripts/update_docker_img.sh rename to .github/scripts/update_docker_image.sh index 620149463..a7aa97747 100755 --- a/.github/scripts/update_docker_img.sh +++ b/.github/scripts/update_docker_image.sh @@ -1,8 +1,8 @@ #!/bin/bash set -e -# Load docker_img_exists function and $CCDL_WORKER_IMAGES -source ~/refinebio/scripts/common.sh +# Load docker_image_exists function and $WORKER_IMAGES. +. ~/refinebio/scripts/common.sh # Github won't set the branch name for us, so do it ourselves. branch=$(get_master_or_dev "$CI_TAG") @@ -12,71 +12,74 @@ if [[ "$branch" == "master" ]]; then elif [[ "$branch" == "dev" ]]; then DOCKERHUB_REPO=ccdlstaging else - echo "Why in the world was update_docker_img.sh called from a branch other than dev or master?!?!?" + echo "Why in the world was update_docker_image.sh called from a branch other than dev or master!?" exit 1 fi -echo "$CI_TAG" > ~/refinebio/common/version +echo "$CI_TAG" >~/refinebio/common/version # Create ~/refinebio/common/dist/data-refinery-common-*.tar.gz, which is # required by the workers and data_refinery_foreman images. -## Remove old common distributions if they exist +## Remove old common distributions if they exist. rm -f ~/refinebio/common/dist/* cd ~/refinebio/common && python3 setup.py sdist -# Log into DockerHub +# Log into DockerHub. docker login -u "$DOCKER_ID" -p "$DOCKER_PASSWD" cd ~/refinebio -for IMAGE in $CCDL_WORKER_IMAGES; do +for IMAGE in $WORKER_IMAGES; do image_name="$DOCKERHUB_REPO/dr_$IMAGE" - if docker_img_exists "$image_name" "$CI_TAG"; then + if docker_image_exists "$image_name" "$CI_TAG"; then echo "Docker image exists, skipping: $image_name:$CI_TAG" else echo "Building docker image: $image_name:$CI_TAG" # Build and push image. We use the CI_TAG as the system version. docker build \ - -t "$image_name:$CI_TAG" \ - -f "workers/dockerfiles/Dockerfile.$IMAGE" \ - --build-arg SYSTEM_VERSION="$CI_TAG" . + --build-arg SYSTEM_VERSION="$CI_TAG" \ + --file "workers/dockerfiles/Dockerfile.$IMAGE" \ + --tag "$image_name:$CI_TAG" \ + . docker push "$image_name:$CI_TAG" - # Update latest version + # Update latest version. docker tag "$image_name:$CI_TAG" "$image_name:latest" docker push "$image_name:latest" - # Save some space when we're through + # Save some space when we're through. docker rmi "$image_name:$CI_TAG" fi done -# Build and push foreman image +# Build and push foreman image. FOREMAN_DOCKER_IMAGE="$DOCKERHUB_REPO/dr_foreman" -if docker_img_exists "$FOREMAN_DOCKER_IMAGE" "$CI_TAG"; then +if docker_image_exists "$FOREMAN_DOCKER_IMAGE" "$CI_TAG"; then echo "Docker image exists, skipping: $FOREMAN_DOCKER_IMAGE:$CI_TAG" else # Build and push image. We use the CI_TAG as the system version. docker build \ - -t "$FOREMAN_DOCKER_IMAGE:$CI_TAG" \ - -f foreman/dockerfiles/Dockerfile.foreman \ - --build-arg SYSTEM_VERSION="$CI_TAG" . + --build-arg SYSTEM_VERSION="$CI_TAG" \ + --file foreman/dockerfiles/Dockerfile.foreman \ + --tag "$FOREMAN_DOCKER_IMAGE:$CI_TAG" \ + . docker push "$FOREMAN_DOCKER_IMAGE:$CI_TAG" - # Update latest version + # Update latest version. docker tag "$FOREMAN_DOCKER_IMAGE:$CI_TAG" "$FOREMAN_DOCKER_IMAGE:latest" docker push "$FOREMAN_DOCKER_IMAGE:latest" fi -# Build and push API image +# Build and push API image. API_DOCKER_IMAGE="$DOCKERHUB_REPO/dr_api" -if docker_img_exists "$API_DOCKER_IMAGE" "$CI_TAG"; then +if docker_image_exists "$API_DOCKER_IMAGE" "$CI_TAG"; then echo "Docker image exists, skipping: $API_DOCKER_IMAGE:$CI_TAG" else # Build and push image. We use the CI_TAG as the system version. docker build \ - -t "$API_DOCKER_IMAGE:$CI_TAG" \ - -f api/dockerfiles/Dockerfile.api_production \ - --build-arg SYSTEM_VERSION="$CI_TAG" . + --build-arg SYSTEM_VERSION="$CI_TAG" \ + --file api/dockerfiles/Dockerfile.api_production \ + --tag "$API_DOCKER_IMAGE:$CI_TAG" \ + . docker push "$API_DOCKER_IMAGE:$CI_TAG" - # Update latest version + # Update latest version. docker tag "$API_DOCKER_IMAGE:$CI_TAG" "$API_DOCKER_IMAGE:latest" docker push "$API_DOCKER_IMAGE:latest" fi diff --git a/.github/scripts/upload_test_coverage.sh b/.github/scripts/upload_test_coverage.sh index 44a009cee..12068f485 100755 --- a/.github/scripts/upload_test_coverage.sh +++ b/.github/scripts/upload_test_coverage.sh @@ -3,14 +3,12 @@ # Script to upload code coverage project=$1 -if [[ $project == "" ]] -then +if [[ $project == "" ]]; then echo "No project specified" exit 1 fi -if [[ $project == "workers" ]] -then +if [[ $project == "workers" ]]; then # the workers project uses it's own test_volume directory test_volume="workers/test_volume" else @@ -19,8 +17,7 @@ fi coverage_file="${test_volume}/coverage.xml" -if [[ ! -f $coverage_file ]] -then +if [[ ! -f $coverage_file ]]; then echo "Coverage file wasn't found, were the tests run before?" exit 0 # exit this script but don't fail the tests for this. fi @@ -30,7 +27,7 @@ output_file="${test_volume}/${project}_coverage.xml" # In the test coverage report, all file paths are relative to each project # folder. We need to be relative to the repo's root directory. That's why we # append the project folder name to each file path in coverage.xml -sed "s/filename=\"/filename=\"$project\//g" "$coverage_file" > "$output_file" +sed "s/filename=\"/filename=\"$project\//g" "$coverage_file" >"$output_file" # codecov.sh is located at https://codecov.io/bash # we downloaded it for convenience diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml index 1c2dc860f..9bbddd958 100644 --- a/.github/workflows/config.yml +++ b/.github/workflows/config.yml @@ -1,513 +1,552 @@ -name: test-and-deploy +name: refine.bio CI/CD + +env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + DEPLOY_IP_ADDRESS: ${{ secrets.DEPLOY_IP_ADDRESS }} + DOCKER_BUILDKIT: 1 + DOCKER_ID: ${{ secrets.DOCKER_ID }} + DOCKER_PASSWD: ${{ secrets.DOCKER_PASSWD }} + DOCKER_IO_USERNAME: ${{ secrets.DOCKER_IO_USERNAME }} + DOCKER_IO_PASSWORD: ${{ secrets.DOCKER_IO_PASSWORD }} + INSTANCE_SSH_KEY: ${{ secrets.INSTANCE_SSH_KEY }} on: push jobs: - syntax_test: - # ubuntu-latest is ubuntu 18.04 which has a really outdated version of shellcheck - runs-on: ubuntu-20.04 + # Code quality check jobs. + check_syntax: + name: Check Syntax env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - + - uses: actions/setup-python@v3 - uses: hashicorp/setup-terraform@v2 with: terraform_version: 0.13.5 - - name: Install build & test dependencies - run: | - python -m pip install --upgrade pip - pip install pre-commit - - - name: Init terraform in the infrastructure folder for validation + - name: Init Terraform in the Infrastructure Folder for Validation run: cd infrastructure && TF_VAR_user=ci TF_VAR_stage=dev ./init_terraform.sh - - name: Init terraform in the AMI folder for validation + - name: Init Terraform in the AMI Folder for Validation run: cd ami && terraform init - - name: Run pre-commit hooks on all files - run: PATH="$PATH:$HOME/.local/bin" pre-commit run --all-files --show-diff-on-failure + - uses: pre-commit/action@v3.0.0 - test_affy_agilent: - runs-on: ubuntu-latest + # Test jobs. + test_affymetrix: + name: Test Affymetrix env: IMAGES: migrations affymetrix - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + needs: + - test_base + runs-on: ubuntu-latest-m steps: - uses: actions/checkout@v3 - - name: Free up space - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - - name: Login to Packages Container registry + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Run affymetrix tests + - name: Run Affymetrix Tests run: | sudo chmod -R a+rw workers/test_volume .github/scripts/filter_tests.sh -t affymetrix - - name: Run agilent tests - run: | - sudo -E chown -R $USER:$USER workers/test_volume - .github/scripts/filter_tests.sh -t agilent - - # This runs downloader and NO_OP tests. - downloader_no_op_tests: - runs-on: ubuntu-latest + test_agilent: + name: Test Agilent env: - # We have issues when downloading the no_op image, so for now let's just not pull it - # IMAGES: migrations api_local downloaders no_op foreman - IMAGES: migrations api_local downloaders foreman - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + IMAGES: migrations affymetrix + needs: + - test_base + runs-on: ubuntu-latest-m steps: - uses: actions/checkout@v3 - - name: Free up space - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - - name: Login to Packages Container registry + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh - - - name: Pause for initialization - run: sleep 30 + run: scripts/run_postgres.sh - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Pause for Initialization + run: sleep 15 - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Run downloader tests - run: | - .github/scripts/filter_tests.sh -t downloaders + - name: Update Models + run: scripts/update_models.sh - - name: Run NO_OP tests + - name: Run Agilent Tests run: | sudo -E chown -R $USER:$USER workers/test_volume - .github/scripts/filter_tests.sh -t no_op + .github/scripts/filter_tests.sh -t agilent test_api: - runs-on: ubuntu-latest + name: Test API env: - IMAGES: migrations api_local - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + IMAGES: migrations api_base api_local + needs: + - check_syntax + - test_api_base + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Free up space - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - - name: Login to Packages Container registry + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - name: Start Elasticsearch - run: ./scripts/run_es.sh + run: scripts/run_es.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Rebuild the Elasticsearch index - run: ./scripts/rebuild_es_index.sh + - name: Rebuild the Elasticsearch Index + run: scripts/rebuild_es_index.sh - - name: Run API tests + - name: Run API Tests run: | mkdir -p test_volume sudo chmod -R a+rw test_volume - ./api/run_tests.sh + api/run_tests.sh .github/scripts/upload_test_coverage.sh api - - name: Push built docker images - run: ./.github/scripts/push_docker_images.sh - - test_common: + test_api_base: + name: Test API Base + env: + IMAGES: migrations api_base + needs: + - check_syntax runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} + + - name: Start Postgres + run: scripts/run_postgres.sh + + - name: Start Elasticsearch + run: scripts/run_es.sh + + - name: Pause for Initialization + run: sleep 15 + + - name: Install Database + run: scripts/install_db_docker.sh + + - name: Update Models + run: scripts/update_models.sh + + - name: Build the Docker Image + run: scripts/prepare_image.sh -i api_base -s api + + test_base: + name: Test Base env: - IMAGES: migrations common_tests - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + IMAGES: base + needs: + - check_syntax + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Login to Packages Container registry + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} + + - name: Build the Docker Image + run: scripts/prepare_image.sh -i base -s common + + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + - name: Push Built Docker Images + run: .github/scripts/push_docker_images.sh + + test_common: + name: Test Common + env: + IMAGES: migrations common_tests + needs: + - test_base + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - name: Start Elasticsearch - run: ./scripts/run_es.sh + run: scripts/run_es.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Rebuild the Elasticsearch index - run: ./scripts/rebuild_es_index.sh + - name: Rebuild the Elasticsearch Index + run: scripts/rebuild_es_index.sh - - name: Run Common tests + - name: Run Common Tests run: | mkdir -p workers/test_volume sudo chmod -R a+rw workers/test_volume - ./common/run_tests.sh + common/run_tests.sh .github/scripts/upload_test_coverage.sh common - - name: Push built docker images - run: ./.github/scripts/push_docker_images.sh + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push Built Docker Images + run: .github/scripts/push_docker_images.sh - test_foreman: - runs-on: ubuntu-latest + test_downloaders_no_op: + name: Test Downloaders and No-Op env: - IMAGES: migrations foreman - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + IMAGES: migrations api_base api_local downloaders foreman + needs: + - test_base + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Free up space - uses: jlumbroso/free-disk-space@main + - name: Login to Packages Container Registry + uses: docker/login-action@v2 with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - - name: Login to Packages Container registry + - name: Start Postgres + run: scripts/run_postgres.sh + + - name: Pause for Initialization + run: sleep 15 + + - name: Install Database + run: scripts/install_db_docker.sh + + - name: Update Models + run: scripts/update_models.sh + + - name: Run Downloader Tests + run: | + .github/scripts/filter_tests.sh -t downloaders + + - name: Run No-Op Tests + run: | + sudo -E chown -R $USER:$USER workers/test_volume + .github/scripts/filter_tests.sh -t no_op + + test_foreman: + name: Test Foreman + env: + IMAGES: migrations foreman + needs: + - test_base + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - name: Start Elasticsearch - run: ./scripts/run_es.sh + run: scripts/run_es.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Rebuild the Elasticsearch index - run: ./scripts/rebuild_es_index.sh + - name: Rebuild the Elasticsearch Index + run: scripts/rebuild_es_index.sh - - name: Run Foreman tests + - name: Run Foreman Tests run: | mkdir -p workers/test_volume sudo chmod -R a+rw workers/test_volume - ./foreman/run_tests.sh --exclude-tag=end_to_end + foreman/run_tests.sh --exclude-tag=end_to_end .github/scripts/upload_test_coverage.sh foreman - - name: Push built docker images - run: ./.github/scripts/push_docker_images.sh + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push Built Docker Images + run: .github/scripts/push_docker_images.sh test_illumina: - runs-on: ubuntu-latest + name: Test Illumina env: IMAGES: migrations illumina - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + needs: + - test_base + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Login to Packages Container registry + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Run Illumina tests + - name: Run Illumina Tests run: | mkdir -p workers/test_volume sudo chmod -R a+rw workers/test_volume .github/scripts/filter_tests.sh -t illumina .github/scripts/upload_test_coverage.sh workers - - name: Push built docker images - run: ./.github/scripts/push_docker_images.sh + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push Built Docker Images + run: .github/scripts/push_docker_images.sh test_salmon: - runs-on: ubuntu-latest + name: Test Salmon env: IMAGES: migrations salmon - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + needs: + - test_base + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Login to Packages Container registry + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Run Salmon tests + - name: Run Salmon Tests run: | sudo chmod -R a+rw workers/test_volume .github/scripts/filter_tests.sh -t salmon .github/scripts/upload_test_coverage.sh workers - - name: Push built docker images - run: ./.github/scripts/push_docker_images.sh + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push Built Docker Images + run: .github/scripts/push_docker_images.sh test_smasher: - runs-on: ubuntu-latest + name: Test Smasher env: IMAGES: migrations smasher - DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + needs: + - test_base + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Free up space - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - - name: Login to Packages Container registry + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Run smasher tests + - name: Run Smasher Tests run: | mkdir -p workers/test_volume sudo chmod -R a+rw workers/test_volume .github/scripts/filter_tests.sh -t smasher .github/scripts/upload_test_coverage.sh workers - - name: Run janitor tests + - name: Run Janitor Tests run: | sudo -E chown -R $USER:$USER workers/test_volume .github/scripts/filter_tests.sh -t janitor .github/scripts/upload_test_coverage.sh workers - - name: Run compendia tests + - name: Run Compendia Tests run: | sudo -E chown -R $USER:$USER workers/test_volume .github/scripts/filter_tests.sh -t compendia .github/scripts/upload_test_coverage.sh workers - - name: Run QN tests + - name: Run QN Tests run: | sudo -E chown -R $USER:$USER workers/test_volume .github/scripts/filter_tests.sh -t qn .github/scripts/upload_test_coverage.sh workers - - name: Push built docker images - run: ./.github/scripts/push_docker_images.sh + - name: Login to Packages Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push Built Docker Images + run: .github/scripts/push_docker_images.sh test_transcriptome: - runs-on: ubuntu-latest + name: Test Transcriptome env: - IMAGES: migrations transcriptome DOCKER_BUILDKIT: 1 - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + IMAGES: migrations transcriptome + needs: + - test_base + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Login to Packages Container registry + - name: Login to Packages Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Pull cached docker images - run: ./.github/scripts/pull_docker_images.sh + username: ${{ secrets.DOCKER_IO_USERNAME }} + password: ${{ secrets.DOCKER_IO_PASSWORD }} - name: Start Postgres - run: ./scripts/run_postgres.sh + run: scripts/run_postgres.sh - - name: Pause for initialization - run: sleep 30 + - name: Pause for Initialization + run: sleep 15 - - name: Install the database - run: ./scripts/install_db_docker.sh + - name: Install Database + run: scripts/install_db_docker.sh - - name: Update the models - run: | - sudo chmod -R a+wr common - ./scripts/update_models.sh + - name: Update Models + run: scripts/update_models.sh - - name: Run Transcriptome tests + - name: Run Transcriptome Tests run: | mkdir -p workers/test_volume sudo chmod -R a+rw workers/test_volume .github/scripts/filter_tests.sh -t transcriptome .github/scripts/upload_test_coverage.sh workers - - name: Push built docker images - run: ./.github/scripts/push_docker_images.sh - - determine_branch: - # As far as I can tell, this is the only way to use the output of - # a script in a github conditional. - if: startsWith(github.ref, 'refs/tags/v') - runs-on: ubuntu-latest - outputs: - branch: ${{ steps.set_branch.outputs.branch }} - steps: - - uses: actions/checkout@v3 + - name: Login to Packages Container Registry + uses: docker/login-action@v2 with: - fetch-depth: 0 + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - id: set_branch - name: Set the $CI_TAG environment variable - run: | - source scripts/common.sh - echo "branch=$(get_master_or_dev ${GITHUB_REF#refs/tags/})" >> $GITHUB_OUTPUT + - name: Push Built Docker Images + run: .github/scripts/push_docker_images.sh + # Deploy jobs. deploy: - if: startsWith(github.ref, 'refs/tags/v') && ! endsWith(github.ref, '-hotfix') - runs-on: ubuntu-latest - timeout-minutes: 1440 + name: Deploy env: DEPLOY_IP_ADDRESS: ${{ secrets.DEPLOY_IP_ADDRESS }} DOCKER_ID: ${{ secrets.DOCKER_ID }} @@ -526,31 +565,33 @@ jobs: PROD_DJANGO_SECRET_KEY: ${{ secrets.PROD_DJANGO_SECRET_KEY }} PROD_RAVEN_DSN: ${{ secrets.PROD_RAVEN_DSN }} PROD_RAVEN_DSN_API: ${{ secrets.PROD_RAVEN_DSN_API }} + if: startsWith(github.ref, 'refs/tags/v') && ! endsWith(github.ref, '-hotfix') needs: - determine_branch - - downloader_no_op_tests - - syntax_test - - test_affy_agilent + - test_affymetrix + - test_agilent - test_api - test_common + - test_downloaders_no_op - test_foreman - test_illumina - test_salmon - test_smasher - test_transcriptome + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Set the $MASTER_OR_DEV and $CI_TAG environment variables + - name: Set the $MASTER_OR_DEV and $CI_TAG Environment Variables run: | source scripts/common.sh echo "MASTER_OR_DEV=$(get_master_or_dev ${GITHUB_REF#refs/tags/})" >> $GITHUB_ENV # Remove /ref/tags/ from the beginning of the tag name echo "CI_TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - name: Set staging specific environment variables + - name: Set Staging Specific Environment Variables if: ${{needs.determine_branch.outputs.branch == 'dev'}} run: | echo "DATABASE_PASSWORD=${STAGING_DATABASE_PASSWORD}" >> $GITHUB_ENV @@ -560,7 +601,7 @@ jobs: # Staging doesn't notify the engagementbot, so overwrite this to be blank. echo "ENGAGEMENTBOT_WEBHOOK=" >> $GITHUB_ENV - - name: Set prod specific environment variables + - name: Set Production Specific Environment Variables if: ${{needs.determine_branch.outputs.branch == 'master'}} run: | echo "DATABASE_PASSWORD=${PROD_DATABASE_PASSWORD}" >> $GITHUB_ENV @@ -570,15 +611,14 @@ jobs: echo "ENGAGEMENTBOT_WEBHOOK=${ENGAGEMENTBOT_WEBHOOK}" >> $GITHUB_ENV - name: Deploy - run: ./.github/scripts/remote_deploy.sh + run: .github/scripts/remote_deploy.sh - - name: Cleanup deploy - run: ./.github/scripts/post_deploy_cleanup.sh - - hotfix_deploy: - if: startsWith(github.ref, 'refs/tags/v') && endsWith(github.ref, '-hotfix') - runs-on: ubuntu-latest + - name: Cleanup Deploy + run: .github/scripts/post_deploy_cleanup.sh timeout-minutes: 1440 + + deploy_hotfix: + name: Deploy Hotfix env: DEPLOY_IP_ADDRESS: ${{ secrets.DEPLOY_IP_ADDRESS }} DOCKER_ID: ${{ secrets.DOCKER_ID }} @@ -597,21 +637,23 @@ jobs: PROD_DJANGO_SECRET_KEY: ${{ secrets.PROD_DJANGO_SECRET_KEY }} PROD_RAVEN_DSN: ${{ secrets.PROD_RAVEN_DSN }} PROD_RAVEN_DSN_API: ${{ secrets.PROD_RAVEN_DSN_API }} + if: startsWith(github.ref, 'refs/tags/v') && endsWith(github.ref, '-hotfix') needs: - determine_branch + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Set the $MASTER_OR_DEV and $CI_TAG environment variables + - name: Set the $MASTER_OR_DEV and $CI_TAG Environment Variables run: | source scripts/common.sh echo "MASTER_OR_DEV=$(get_master_or_dev ${GITHUB_REF#refs/tags/})" >> $GITHUB_ENV # Remove /ref/tags/ from the beginning of the tag name echo "CI_TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - name: Set staging specific environment variables + - name: Set Staging Specific Environment Variables if: ${{needs.determine_branch.outputs.branch == 'dev'}} run: | echo "DATABASE_PASSWORD=${STAGING_DATABASE_PASSWORD}" >> $GITHUB_ENV @@ -621,7 +663,7 @@ jobs: # Staging doesn't notify the engagementbot, so overwrite this to be blank. echo "ENGAGEMENTBOT_WEBHOOK=" >> $GITHUB_ENV - - name: Set prod specific environment variables + - name: Set Production Specific Environment Variables if: ${{needs.determine_branch.outputs.branch == 'master'}} run: | echo "DATABASE_PASSWORD=${PROD_DATABASE_PASSWORD}" >> $GITHUB_ENV @@ -631,7 +673,27 @@ jobs: echo "ENGAGEMENTBOT_WEBHOOK=${ENGAGEMENTBOT_WEBHOOK}" >> $GITHUB_ENV - name: Deploy - run: ./.github/scripts/remote_deploy.sh + run: .github/scripts/remote_deploy.sh + + - name: Cleanup Deploy + run: .github/scripts/post_deploy_cleanup.sh + timeout-minutes: 1440 + + determine_branch: + name: Determene Branch + # As far as I can tell, this is the only way to use the output of + # a script in a github conditional. + if: startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + outputs: + branch: ${{ steps.set_branch.outputs.branch }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 - - name: Cleanup deploy - run: ./.github/scripts/post_deploy_cleanup.sh + - id: set_branch + name: Set the $CI_TAG Environment Variable + run: | + source scripts/common.sh + echo "branch=$(get_master_or_dev ${GITHUB_REF#refs/tags/})" >> $GITHUB_OUTPUT diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b651ce24a..381082dd2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,6 @@ --- exclude: '^(.*\.pem|.*\.tfvars)$' - repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 @@ -33,18 +32,10 @@ repos: - id: shell-lint # We didn't write codecov.sh, we just get it from their servers # The other one is a template. - exclude: "codecov.sh|infrastructure/workers-configuration/workers-instance-user-data.tpl.sh" + exclude: 'codecov.sh|infrastructure/workers-configuration/workers-instance-user-data.tpl.sh' args: - # SC2154: Exclude warnings about variables being used without being - # assigned, because we sometimes use lowercase environment - # variables which shellcheck does not recognize. - # SC1090,SC1091: Don't try to follow sourced files + # SC2154: Exclude warnings about variables being used without being + # assigned, because we sometimes use lowercase environment + # variables which shellcheck does not recognize. + # SC1090,SC1091: Don't try to follow sourced files - --exclude=SC2154,SC1090,SC1091 - - - repo: https://github.com/antonbabenko/pre-commit-terraform - rev: v1.45.0 - hooks: - - id: terraform_validate - # It's a small file that gets copied up temporarily during deploys. - # Because it's not where it gets run it seems to be in a module when it's not. - exclude: "infrastructure/deploy/ci_ingress.tf" diff --git a/README.md b/README.md index 35505fbfb..ec2ac8c4a 100644 --- a/README.md +++ b/README.md @@ -19,46 +19,49 @@ Refine.bio currently has four sub-projects contained within this repo: ## Table of Contents -- [Development](#development) - - [Git Workflow](#git-workflow) - - [Installation](#installation) - - [Automatic](#automatic) - - [Linux (Manual)](#linux-manual) - - [Mac (Manual)](#mac-manual) - - [Virtual Environment](#virtual-environment) - - [Services](#services) - - [Postgres](#postgres) - - [Common Dependecies](#common-dependecies) - - [ElasticSearch](#elasticsearch) - - [Testing](#testing) - - [API](#api) - - [Common](#common) - - [Foreman](#foreman) - - [Workers](#workers) - - [Style](#style) - - [Gotchas](#gotchas) - - [R](#r) -- [Running Locally](#running-locally) - - [API](#api-1) - - [Surveyor Jobs](#surveyor-jobs) - - [Sequence Read Archive](#sequence-read-archive) - - [Ensembl Transcriptome Indices](#ensembl-transcriptome-indices) - - [Downloader Jobs](#downloader-jobs) - - [Processor Jobs](#processor-jobs) - - [Creating Quantile Normalization Reference Targets](#creating-quantile-normalization-reference-targets) - - [Creating Compendia](#creating-compendia) - - [Running Tximport Early](#running-tximport-early) - - [Development Helpers](#development-helpers) -- [Cloud Deployment](#cloud-deployment) - - [Docker Images](#docker-images) - - [Terraform](#terraform) - - [Running Jobs](#running-jobs) - - [Log Consumption](#log-consumption) - - [Dumping and Restoring Database Backups](#dumping-and-restoring-database-backups) - - [Tearing Down](#tearing-down) -- [Support](#support) -- [Meta-README](#meta-readme) -- [License](#license) +- [Refine.bio ](#refinebio--) + - [Table of Contents](#table-of-contents) + - [Development](#development) + - [Git Workflow](#git-workflow) + - [Installation](#installation) + - [Automatic](#automatic) + - [Linux (Manual)](#linux-manual) + - [Mac (Manual)](#mac-manual) + - [Virtual Environment](#virtual-environment) + - [Services](#services) + - [Postgres](#postgres) + - [Common Dependecies](#common-dependecies) + - [ElasticSearch](#elasticsearch) + - [Testing](#testing) + - [API](#api) + - [Common](#common) + - [Foreman](#foreman) + - [Workers](#workers) + - [Style](#style) + - [Gotchas](#gotchas) + - [R](#r) + - [Running Locally](#running-locally) + - [API](#api-1) + - [Surveyor Jobs](#surveyor-jobs) + - [Sequence Read Archive](#sequence-read-archive) + - [Ensembl Transcriptome Indices](#ensembl-transcriptome-indices) + - [Downloader Jobs](#downloader-jobs) + - [Processor Jobs](#processor-jobs) + - [Creating Quantile Normalization Reference Targets](#creating-quantile-normalization-reference-targets) + - [Creating Compendia](#creating-compendia) + - [Running Tximport Early](#running-tximport-early) + - [Development Helpers](#development-helpers) + - [Cloud Deployment](#cloud-deployment) + - [Docker Images](#docker-images) + - [Terraform](#terraform) + - [AWS Batch](#aws-batch) + - [Running Jobs](#running-jobs) + - [Log Consumption](#log-consumption) + - [Dumping and Restoring Database Backups](#dumping-and-restoring-database-backups) + - [Tearing Down](#tearing-down) + - [Support](#support) + - [Meta-README](#meta-readme) + - [License](#license) @@ -630,17 +633,17 @@ Please try to keep the `dev` and `master` versions in sync for major and minor v Refine.bio uses a number of different Docker images to run different pieces of the system. By default, refine.bio will pull images from the Dockerhub repo `ccdlstaging`. -If you would like to use images you have built and pushed to Dockerhub yourself you can pass the `-d` option to the `deploy.sh` script. +If you would like to use images you have built and pushed to Dockerhub yourself you can pass the `-r` option to the `deploy.sh` script. -To make building and pushing your own images easier, the `scripts/update_my_docker_images.sh` has been provided. -The `-d` option will allow you to specify which repo you'd like to push to. +To make building and pushing your own images easier, the `scripts/update_docker_images.sh` has been provided. +The `-r` option will allow you to specify which repo you'd like to push to. If the Dockerhub repo requires you to be logged in, you should do so before running the script using `docker login`. The -v option allows you to specify the version, which will both end up on the Docker images you're building as the SYSTEM_VERSION environment variable and also will be the docker tag for the image. -`scripts/update_my_docker_images.sh` will not build the dr_affymetrix image, because this image requires a lot of resources and time to build. -It can instead be built with `./scripts/prepare_image.sh -i affymetrix -d `. +`scripts/update_docker_images.sh` will not build the dr_affymetrix image, because this image requires a lot of resources and time to build. +It can instead be built with `./scripts/prepare_image.sh -i affymetrix -r `. WARNING: The affymetrix image installs a lot of data-as-R-packages and needs a lot of disk space to build the image. -It's not recommended to build the image with less than 60GB of free space on the disk that Docker runs on. +It's not recommended to build the image with less than 75GB of free space on the disk that Docker runs on. ### Terraform @@ -667,7 +670,7 @@ The correct way to deploy to the cloud is by running the `deploy.sh` script. Thi configuration steps, such as setting environment variables, setting up Batch job specifications, and performing database migrations. It can be used from the `infrastructure` directory like so: ```bash -./deploy.sh -u myusername -e dev -r us-east-1 -v v1.0.0 -d my-dockerhub-repo +./deploy.sh -u myusername -e dev -d us-east-1 -v v1.0.0 -r my-dockerhub-repo ``` This will spin up the whole system. It will usually take about 15 minutes, most of which is spent waiting for the Postgres instance to start. @@ -811,7 +814,7 @@ This can take a long time (>30 minutes)! ### Tearing Down -A stack that has been spun up via `deploy.sh -u myusername -e dev` can be taken down with `destroy_terraform.sh -u myusername -e dev -r us-east-1`. +A stack that has been spun up via `deploy.sh -u myusername -e dev` can be taken down with `destroy_terraform.sh -u myusername -e dev -d us-east-1`. The same username and environment must be passed into `destroy_terraform.sh` as were used to run `deploy.sh` either via the -e and -u options or by specifying `TF_VAR_stage` or `TF_VAR_user` so that the script knows which to take down. Note that this will prompt you for confirmation before actually destroying all of your cloud resources. diff --git a/api/dockerfiles/Dockerfile.api_base b/api/dockerfiles/Dockerfile.api_base new file mode 100644 index 000000000..d1d751104 --- /dev/null +++ b/api/dockerfiles/Dockerfile.api_base @@ -0,0 +1,29 @@ +FROM python:3.8.5-buster + +# Fail in case of an error at any stage in the pipe. +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Install gcc and libpq-dev for psycopg2. +RUN apt-get update -qq && \ + apt-get install -y \ + gcc \ + libpq-dev && \ + groupadd user && \ + useradd --create-home --home-dir /home/user -g user user + +WORKDIR /home/user + +COPY api/requirements.txt . +RUN pip install --upgrade pip && \ + pip install --ignore-installed --no-cache-dir -r requirements.txt + +COPY common/dist/data-refinery-common-* common/ +RUN pip install --ignore-installed \ + common/$(ls common -1 | sort --version-sort | tail -1) + +COPY api/ . +COPY config/ config/ + +ENV SYSTEM_VERSION=$SYSTEM_VERSION + +ENTRYPOINT [] diff --git a/api/dockerfiles/Dockerfile.api_local b/api/dockerfiles/Dockerfile.api_local index 2018cd59b..38cd1e5b7 100644 --- a/api/dockerfiles/Dockerfile.api_local +++ b/api/dockerfiles/Dockerfile.api_local @@ -1,35 +1,10 @@ -FROM python:3.8.5-slim-buster +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_api_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# Install gcc and libpq-dev for psycopg2. -RUN apt-get update -qq && \ - apt-get install -y \ - gcc \ - libpq-dev - -RUN groupadd user && \ - useradd --create-home --home-dir /home/user -g user user -WORKDIR /home/user - -COPY api/requirements.txt . -RUN pip install --upgrade pip && \ - pip install --ignore-installed -r requirements.txt && \ - rm -r /root/.cache - -# Get the latest version from the dist directory. -COPY common/dist/data-refinery-common-* common/ -RUN pip install --ignore-installed \ - common/$(ls common -1 | sort --version-sort | tail -1) - -COPY api/ . -COPY config/ config/ -COPY setup.cfg . - -ARG SYSTEM_VERSION -ENV SYSTEM_VERSION=$SYSTEM_VERSION - USER user EXPOSE 8000 diff --git a/api/dockerfiles/Dockerfile.api_production b/api/dockerfiles/Dockerfile.api_production index 37184be03..fccc67d0f 100644 --- a/api/dockerfiles/Dockerfile.api_production +++ b/api/dockerfiles/Dockerfile.api_production @@ -1,39 +1,14 @@ -FROM python:3.8.5-buster +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_api_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# Install gcc and libpq-dev for psycopg2. -RUN apt-get update -qq && \ - apt-get install -y \ - gcc \ - libpq-dev - -RUN groupadd user && \ - useradd --create-home --home-dir /home/user -g user user -WORKDIR /home/user - -COPY api/requirements.txt . -RUN pip install --upgrade pip && \ - pip install --ignore-installed -r requirements.txt && \ - pip install --ignore-installed uwsgi && \ - rm -r /root/.cache - -# Get the latest version from the dist directory. -COPY common/dist/data-refinery-common-* common/ -RUN pip install --ignore-installed \ - common/$(ls common -1 | sort --version-sort | tail -1) - -COPY api/ . -COPY config/ config/ - RUN chmod +x /home/user/collect_and_run_uwsgi.sh && \ mkdir -p /tmp/www/static && \ chown user /tmp/www/static -ARG SYSTEM_VERSION -ENV SYSTEM_VERSION=$SYSTEM_VERSION - USER user # We collect Django's static files and expose them as a volume so that Nginx diff --git a/api/run_tests.sh b/api/run_tests.sh index f81309fbd..f4353c679 100755 --- a/api/run_tests.sh +++ b/api/run_tests.sh @@ -4,27 +4,31 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# However in order to give Docker access to all the code we have to -# move up a level +# However, in order to give Docker access to all the code we have to +# move up a level. cd .. -# Ensure that postgres is running +# Ensure that Postgres is running. if ! [ "$(docker ps --filter name=drdb -q)" ]; then echo "You must start Postgres first with:" >&2 echo "./scripts/run_postgres.sh" >&2 exit 1 fi -# Ensure that elasticsearch is running + +# Ensure that ElasticSearch is running. if ! [ "$(docker ps --filter name=dres -q)" ]; then echo "You must start elasticsearchfirst with:" >&2 echo "./scripts/run_es.sh" >&2 exit 1 fi -project_root=$(pwd) # "cd .." called above +project_root=$(pwd) # "cd .." called above. volume_directory="$project_root/test_volume" if [ ! -d "$volume_directory" ]; then mkdir "$volume_directory" @@ -34,18 +38,23 @@ chmod -R a+rwX "$volume_directory" ./scripts/prepare_image.sh -i api_local -s api . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) ES_HOST_IP=$(get_docker_es_ip_address) -# Only run interactively if we are on a TTY +# Only run interactively if we are on a TTY. if [ -t 1 ]; then - INTERACTIVE="-i" + INTERACTIVE="--interactive" fi -docker run -t $INTERACTIVE \ - --add-host=database:"$DB_HOST_IP" \ - --add-host=elasticsearch:"$ES_HOST_IP" \ - --env-file api/environments/test \ - --platform linux/amd64 \ - --volume "$volume_directory":/home/user/data_store \ - ccdlstaging/dr_api_local bash -c "$(run_tests_with_coverage "$@")" +# shellcheck disable=SC2086 +docker run \ + --add-host=database:"$DB_HOST_IP" \ + --add-host=elasticsearch:"$ES_HOST_IP" \ + --env-file api/environments/test \ + --platform linux/amd64 \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + $INTERACTIVE \ + "$DOCKERHUB_REPO/dr_api_local" \ + bash -c "$(run_tests_with_coverage "$@")" diff --git a/api/serve.sh b/api/serve.sh index 4bae59fe9..1cd32a622 100755 --- a/api/serve.sh +++ b/api/serve.sh @@ -4,22 +4,29 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# However in order to give Docker access to all the code we have to -# move up a level +# However, in order to give Docker access to all the code we have to +# move up a level. cd .. ./scripts/prepare_image.sh -i api_local -s api . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) ES_HOST_IP=$(get_docker_es_ip_address) docker run \ - --add-host=database:"$DB_HOST_IP" \ - --add-host=elasticsearch:"$ES_HOST_IP" \ - --env-file api/environments/local \ - -p 8000:8000 \ - -it ccdlstaging/dr_api_local python3 manage.py runserver 0.0.0.0:8000 "$@" + --add-host=database:"$DB_HOST_IP" \ + --add-host=elasticsearch:"$ES_HOST_IP" \ + --env-file api/environments/local \ + --interactive \ + --publish 8000:8000 \ + --tty \ + "$DOCKERHUB_REPO/dr_api_local" \ + python3 manage.py runserver 0.0.0.0:8000 "$@" diff --git a/api/serve_production.sh b/api/serve_production.sh index 530b237ca..c23382778 100755 --- a/api/serve_production.sh +++ b/api/serve_production.sh @@ -4,24 +4,31 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# However in order to give Docker access to all the code we have to -# move up a level +# However, in order to give Docker access to all the code we have to +# move up a level. cd .. ./scripts/prepare_image.sh -i api_production -s api . ./scripts/common.sh -DB_HOST_IP=$(get_docker_db_ip_address) +DB_HOST_IP=$(get_docker_db_ip_address) STATIC_VOLUMES=/tmp/volumes_static docker run \ - --add-host=database:"$DB_HOST_IP" \ - --env-file api/environments/local \ - --link drdb:postgres \ - -v "$STATIC_VOLUMES":/tmp/www/static \ - -p 8081:8081 \ - -it -d ccdlstaging/dr_api_production /bin/sh -c "/home/user/collect_and_run_uwsgi.sh" + --add-host=database:"$DB_HOST_IP" \ + --detach \ + --env-file api/environments/local \ + --interactive \ + --link drdb:postgres \ + --publish 8081:8081 \ + --tty \ + --volume "$STATIC_VOLUMES":/tmp/www/static \ + "$DOCKERHUB_REPO/dr_api_production" \ + /bin/sh -c "/home/user/collect_and_run_uwsgi.sh" diff --git a/common/R/renv_load.R b/common/R/renv_load.R index abaf13935..dd5caa080 100644 --- a/common/R/renv_load.R +++ b/common/R/renv_load.R @@ -1,13 +1,16 @@ options(warn = 2) options(repos = structure(c( - Bioconductor = "https://bioconductor.org/packages/3.11/bioc/", - BioconductorAnnotation = "https://bioconductor.org/packages/3.11/data/annotation/", - BioconductorExperiment = "https://bioconductor.org/packages/3.11/data/experiment", - CRAN = "https://cloud.r-project.org/" + Bioconductor = "https://bioconductor.org/packages/3.6/bioc", + BioconductorAnnotation = "https://bioconductor.org/packages/3.6/data/annotation", + BioconductorExperiment = "https://bioconductor.org/packages/3.6/data/experiment", + CRAN = "https://cloud.r-project.org" ))) options(Ncpus = parallel::detectCores()) +options(renv.r.version = "3.4.4") +options(renv.settings.use.cache = FALSE) -install.packages("renv") +install.packages("BiocInstaller") +install.packages("https://cran.r-project.org/src/contrib/Archive/renv/renv_0.16.0.tar.gz") renv::consent(provided = TRUE) -renv::restore(prompt = FALSE) +renv::restore(prompt = FALSE, rebuild = TRUE) diff --git a/workers/R/renv_save.R b/common/R/renv_save.R similarity index 100% rename from workers/R/renv_save.R rename to common/R/renv_save.R diff --git a/common/dockerfiles/Dockerfile.common_tests b/common/dockerfiles/Dockerfile.common_tests index 2a31940f4..26b2be946 100644 --- a/common/dockerfiles/Dockerfile.common_tests +++ b/common/dockerfiles/Dockerfile.common_tests @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -15,7 +17,6 @@ RUN pip3 install --ignore-installed --no-cache-dir -r requirements.txt COPY common/ . COPY config config -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/common/dockerfiles/Dockerfile.migrations b/common/dockerfiles/Dockerfile.migrations index 00e704fa2..da2405265 100644 --- a/common/dockerfiles/Dockerfile.migrations +++ b/common/dockerfiles/Dockerfile.migrations @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -11,7 +13,6 @@ RUN pip install --ignore-installed --no-cache-dir -r requirements.txt COPY common/ . COPY config config -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/common/run_tests.sh b/common/run_tests.sh index 51c564270..41eb8e3d1 100755 --- a/common/run_tests.sh +++ b/common/run_tests.sh @@ -1,24 +1,27 @@ #!/bin/sh -e -# script for executing Django PyUnit Tests within a Docker container. +# Script for executing Django PyUnit tests within a Docker container. # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# However in order to give Docker access to all the code we have to -# move up a level +# However, in order to give Docker access to all the code we have to +# move up a level. cd .. -# Ensure that postgres is running +# Ensure that Postgres is running. if ! [ "$(docker ps --filter name=drdb -q)" ]; then echo "You must start Postgres first with:" >&2 echo "./scripts/run_postgres.sh" >&2 exit 1 fi -project_root=$(pwd) # "cd .." called above +project_root=$(pwd) # "cd .." called above. volume_directory="$project_root/test_volume" if [ ! -d "$volume_directory" ]; then mkdir "$volume_directory" @@ -28,18 +31,23 @@ chmod -R a+rwX "$volume_directory" ./scripts/prepare_image.sh -i common_tests -s common . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) ES_HOST_IP=$(get_docker_es_ip_address) -# Only run interactively if we are on a TTY +# Only run interactively if we are on a TTY. if [ -t 1 ]; then - INTERACTIVE="-i" + INTERACTIVE="--interactive" fi -docker run -t $INTERACTIVE \ - --add-host=database:"$DB_HOST_IP" \ - --add-host=elasticsearch:"$ES_HOST_IP" \ - --env-file common/environments/test \ - --platform linux/amd64 \ - --volume "$volume_directory":/home/user/data_store \ - ccdlstaging/dr_common_tests bash -c "$(run_tests_with_coverage "$@")" --parallel +# shellcheck disable=SC2086 +docker run \ + --add-host=database:"$DB_HOST_IP" \ + --add-host=elasticsearch:"$ES_HOST_IP" \ + --env-file common/environments/test \ + --platform linux/amd64 \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + $INTERACTIVE \ + "$DOCKERHUB_REPO/dr_common_tests" \ + bash -c "$(run_tests_with_coverage "$@")" --parallel diff --git a/common/setup.py b/common/setup.py index 78339b2e1..4234b9ab5 100644 --- a/common/setup.py +++ b/common/setup.py @@ -1,4 +1,6 @@ import os +import re +from datetime import datetime from setuptools import find_packages, setup @@ -11,11 +13,21 @@ version_string = version_file.read().strip().split("-")[0] except OSError: print( - "Cannot read version to determine System Version." - " Please create a file common/version containing an up to date System Version." + "Cannot read version file to determine system version. " + "Please create a file common/version containing an up to date system version." ) raise +version_re = re.compile( + r"^([1-9][0-9]*!)?(0|[1-9][0-9]*)" + "(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))" + "?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$" +) +if not version_re.match(version_string): + # Generate version based on the datetime.now(): e.g., 2023.5.17.dev1684352560. + now = datetime.now() + version_string = f"{now.strftime('%Y.%-m.%-d.dev')}{int(datetime.timestamp(now))}" + setup( name="data-refinery-common", version=version_string, diff --git a/foreman/dockerfiles/Dockerfile.foreman b/foreman/dockerfiles/Dockerfile.foreman index 94e5c185a..89e3611f0 100644 --- a/foreman/dockerfiles/Dockerfile.foreman +++ b/foreman/dockerfiles/Dockerfile.foreman @@ -1,16 +1,19 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user -ADD https://gbnci.cancer.gov/geo/GEOmetadb.sqlite.gz data/microarray/GEOmetadb.sqlite.gz -RUN gunzip data/microarray/GEOmetadb.sqlite.gz && \ - chmod 644 data/microarray/GEOmetadb.sqlite - COPY foreman/requirements.txt . -RUN pip3 install --ignore-installed --no-cache-dir -r requirements.txt +RUN mkdir -p data/microarray && \ + wget -q https://gbnci.cancer.gov/geo/GEOmetadb.sqlite.gz \ + -O data/microarray/GEOmetadb.sqlite.gz && \ + gunzip data/microarray/GEOmetadb.sqlite.gz && \ + chmod 644 data/microarray/GEOmetadb.sqlite && \ + pip3 install --ignore-installed --no-cache-dir -r requirements.txt # Get the latest version from the dist directory. COPY common/dist/data-refinery-common-* common/ @@ -21,7 +24,6 @@ COPY .boto .boto COPY config config COPY foreman/ . -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/foreman/run_end_to_end_tests.sh b/foreman/run_end_to_end_tests.sh index 554068980..e08112ae2 100755 --- a/foreman/run_end_to_end_tests.sh +++ b/foreman/run_end_to_end_tests.sh @@ -4,7 +4,10 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # Set up the data volume directory if it does not already exist. @@ -25,30 +28,27 @@ if [ ! -e "$reference_file_dir/$quant_file" ]; then mkdir -p "$reference_file_dir" echo "Downloading quant file for Transcriptome Index validation tests." wget -q -O "$reference_file_dir/$quant_file" \ - "$test_data_repo/$quant_file" + "$test_data_repo/$quant_file" fi # temp for testing locally. ../scripts/prepare_image.sh -i foreman -s foreman - while read -r row; do # Exporting an expansion rather than a variable, which is exactly what we want to do. # shellcheck disable=SC2163 export "${row}" -done < ../infrastructure/prod_env - +done <../infrastructure/prod_env -# Hardcode ccdlstaging because this should only ever be run in staging or -# locally, and when running locally `prepare_image.sh` makes the forman -# ccdlstaging/dr_foreman. -docker run -t \ - --env-file ../infrastructure/prod_env \ - --env RUNNING_IN_CLOUD=False \ +docker run \ --env DATABASE_HOST="$DATABASE_PUBLIC_HOST" \ + --env DJANGO_SECRET_KEY="TEST_KEY_FOR_DEV" \ --env JOB_DEFINITION_PREFIX="$USER_$STAGE_" \ --env REFINEBIO_BASE_URL="http://$API_HOST/v1/" \ - --env DJANGO_SECRET_KEY="TEST_KEY_FOR_DEV" \ - --volume "$volume_directory":/home/user/data_store \ + --env RUNNING_IN_CLOUD=False \ + --env-file ../infrastructure/prod_env \ --volume "$HOME/.aws":/home/user/.aws \ - ccdlstaging/dr_foreman python3 manage.py test --no-input --parallel=2 --testrunner='tests.test_runner.NoDbTestRunner' tests.foreman.test_end_to_end + --volume "$volume_directory":/home/user/data_store \ + --tty \ + "$DOCKERHUB_REPO/dr_foreman" \ + python3 manage.py test --no-input --parallel=2 --testrunner='tests.test_runner.NoDbTestRunner' tests.foreman.test_end_to_end diff --git a/foreman/run_management_command.sh b/foreman/run_management_command.sh index a9e4ce101..d51768fd8 100755 --- a/foreman/run_management_command.sh +++ b/foreman/run_management_command.sh @@ -1,13 +1,16 @@ #!/bin/sh -# Script for running the Data Refinery Surveyor container +# Script for running the Data Refinery Surveyor container. # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# However in order to give Docker access to all the code we have to +# However, in order to give Docker access to all the code we have to # move up a level cd .. @@ -21,12 +24,16 @@ chmod -R a+rwX "$volume_directory" ./scripts/prepare_image.sh -i foreman -s foreman . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) -docker run -it \ - --add-host=database:"$DB_HOST_IP" \ - --env-file foreman/environments/local \ - --env AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ - --env AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ - --volume "$volume_directory":/home/user/data_store \ - ccdlstaging/dr_foreman python3 manage.py "$@" +docker run \ + --add-host=database:"$DB_HOST_IP" \ + --env AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ + --env AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ + --env-file foreman/environments/local \ + --interactive \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + "$DOCKERHUB_REPO/dr_foreman" \ + python3 manage.py "$@" diff --git a/foreman/run_tests.sh b/foreman/run_tests.sh index 8edbdc56f..f0ae37de4 100755 --- a/foreman/run_tests.sh +++ b/foreman/run_tests.sh @@ -4,7 +4,10 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # Set up the data volume directory if it does not already exist. @@ -18,11 +21,11 @@ if [ ! -d "$volume_directory" ]; then fi chmod -R a+rwX "$volume_directory" -# However in order to give Docker access to all the code we have to -# move up a level +# However, in order to give Docker access to all the code we have to +# move up a level. cd .. -# First ensure postgres is running +# First ensure Postgres is running. if ! [ "$(docker ps --filter name=drdb -q)" ]; then echo "You must start Postgres first with:" >&2 echo "./scripts/run_postgres.sh" >&2 @@ -32,16 +35,21 @@ fi ./scripts/prepare_image.sh -i foreman -s foreman . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) -# Only run interactively if we are on a TTY +# Only run interactively if we are on a TTY. if [ -t 1 ]; then - INTERACTIVE="-i" + INTERACTIVE="--interactive" fi -docker run -t $INTERACTIVE \ - --add-host=database:"$DB_HOST_IP" \ - --env-file foreman/environments/test \ - --platform linux/amd64 \ - --volume "$volume_directory":/home/user/data_store \ - ccdlstaging/dr_foreman bash -c "$(run_tests_with_coverage --exclude-tag=manual "$@")" +# shellcheck disable=SC2086 +docker run \ + --add-host=database:"$DB_HOST_IP" \ + --env-file foreman/environments/test \ + --platform linux/amd64 \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + $INTERACTIVE \ + "$DOCKERHUB_REPO/dr_foreman" \ + bash -c "$(run_tests_with_coverage --exclude-tag=manual "$@")" diff --git a/foreman/test_survey.sh b/foreman/test_survey.sh index 7df91a3ae..1aec44116 100755 --- a/foreman/test_survey.sh +++ b/foreman/test_survey.sh @@ -2,7 +2,7 @@ # Script for testing the surveying of an accession manually (e.g. from a dataset request) print_options() { - cat << EOF + cat <&2 echo "./scripts/run_postgres.sh" >&2 @@ -76,12 +79,16 @@ fi ./scripts/prepare_image.sh -i foreman -s foreman . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) docker run \ - --add-host=database:"$DB_HOST_IP" \ - --env-file foreman/environments/test \ - --volume "$volume_directory":/home/user/data_store \ - -e SURVEYOR="$SURVEYOR" \ - -e ACCESSION="$ACCESSION" \ - -it ccdlstaging/dr_foreman bash -c "python3 manage.py test --tag=manual ." + --add-host=database:"$DB_HOST_IP" \ + --env ACCESSION="$ACCESSION" \ + --env SURVEYOR="$SURVEYOR" \ + --env-file foreman/environments/test \ + --interactive \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + "$DOCKERHUB_REPO/dr_foreman" \ + bash -c "python3 manage.py test --tag=manual ." diff --git a/infrastructure/api-configuration/api-server-instance-user-data.tpl.sh b/infrastructure/api-configuration/api-server-instance-user-data.tpl.sh index 3f475c98b..0dfb15ddc 100644 --- a/infrastructure/api-configuration/api-server-instance-user-data.tpl.sh +++ b/infrastructure/api-configuration/api-server-instance-user-data.tpl.sh @@ -15,7 +15,7 @@ cd /home/ubuntu || exit # Install and configure Nginx. -cat <<"EOF" > nginx.conf +cat <<"EOF" >nginx.conf ${nginx_config} EOF apt-get update -y @@ -107,7 +107,7 @@ echo " size 20k daily maxage 3 -}" >> /etc/logrotate.conf +}" >>/etc/logrotate.conf echo " /tmp/access.log { missingok @@ -116,10 +116,10 @@ echo " size 20k daily maxage 3 -}" >> /etc/logrotate.conf +}" >>/etc/logrotate.conf # Install our environment variables -cat <<"EOF" > environment +cat <<"EOF" >environment ${api_environment} EOF @@ -135,36 +135,40 @@ docker pull "${dockerhub_repo}/${api_docker_image}" # These database values are created after TF # is run, so we have to pass them in programatically docker run \ - --env-file environment \ - -e DATABASE_HOST="${database_host}" \ - -e DATABASE_NAME="${database_name}" \ - -e DATABASE_USER="${database_user}" \ - -e DATABASE_PASSWORD="${database_password}" \ - -e ELASTICSEARCH_HOST="${elasticsearch_host}" \ - -e ELASTICSEARCH_PORT="${elasticsearch_port}" \ - -v "$STATIC_VOLUMES":/tmp/www/static \ - --log-driver=awslogs \ - --log-opt awslogs-region="${region}" \ - --log-opt awslogs-group="${log_group}" \ - --log-opt awslogs-stream="${log_stream}" \ - -p 8081:8081 \ - --name=dr_api \ - -it -d "${dockerhub_repo}/${api_docker_image}" /bin/sh -c "/home/user/collect_and_run_uwsgi.sh" + --detach \ + --env DATABASE_HOST="${database_host}" \ + --env DATABASE_NAME="${database_name}" \ + --env DATABASE_PASSWORD="${database_password}" \ + --env DATABASE_USER="${database_user}" \ + --env ELASTICSEARCH_HOST="${elasticsearch_host}" \ + --env ELASTICSEARCH_PORT="${elasticsearch_port}" \ + --env-file environment \ + --interactive \ + --log-driver=awslogs \ + --log-opt awslogs-group="${log_group}" \ + --log-opt awslogs-region="${region}" \ + --log-opt awslogs-stream="${log_stream}" \ + --name=dr_api \ + --tty \ + --volume "$STATIC_VOLUMES":/tmp/www/static \ + -publish 8081:8081 \ + "${dockerhub_repo}/${api_docker_image}" \ + /bin/sh -c "/home/user/collect_and_run_uwsgi.sh" # Nuke and rebuild the search index. It shouldn't take too long. sleep 30 -docker exec dr_api python3 manage.py search_index --delete -f; -docker exec dr_api python3 manage.py search_index --rebuild -f; -docker exec dr_api python3 manage.py search_index --populate -f; +docker exec dr_api python3 manage.py search_index --delete -f +docker exec dr_api python3 manage.py search_index --rebuild -f +docker exec dr_api python3 manage.py search_index --populate -f # Let's use this instance to call the populate command every twenty minutes. -crontab -l > tempcron +crontab -l >tempcron # echo new cron into cron file # TODO: stop logging this to api_cron.log once we figure out why it # hasn't been working. -echo -e "SHELL=/bin/bash\nPATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n*/20 * * * * docker exec dr_api python3 manage.py update_es_index >> /var/log/api_cron.log 2>&1" >> tempcron +echo -e "SHELL=/bin/bash\nPATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n*/20 * * * * docker exec dr_api python3 manage.py update_es_index >> /var/log/api_cron.log 2>&1" >>tempcron # Post a summary of downloads every Monday at 12:00 UTC -echo -e "SHELL=/bin/bash\nPATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n0 12 * * MON docker exec dr_api python3 manage.py post_downloads_summary >> /var/log/api_cron.log 2>&1" >> tempcron +echo -e "SHELL=/bin/bash\nPATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n0 12 * * MON docker exec dr_api python3 manage.py post_downloads_summary >> /var/log/api_cron.log 2>&1" >>tempcron # install new cron file crontab tempcron rm tempcron diff --git a/infrastructure/deploy.sh b/infrastructure/deploy.sh index d8175b423..2298c2d8a 100755 --- a/infrastructure/deploy.sh +++ b/infrastructure/deploy.sh @@ -2,7 +2,10 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit print_description() { @@ -21,7 +24,7 @@ print_options() { echo ' "-e prod" will deploy the production stack. This should only be used from a CD machine.' echo ' "-e staging" will deploy the staging stack. This should only be used from a CD machine.' echo ' "-e dev" will deploy a dev stack which is appropriate for a single developer to use to test.' - echo '-d May be used to override the Dockerhub repo where the images will be pulled from.' + echo '-r May be used to override the Dockerhub repo where the images will be pulled from.' echo ' This may also be specified by setting the TF_VAR_dockerhub_repo environment variable.' echo ' If unset, defaults to "ccdlstaging" if the version contains "-dev" and "ccdl" otherwise.' echo ' for dev and staging environments and "ccdl" for prod.' @@ -31,7 +34,7 @@ print_options() { echo '-v specifies the version of the system which is being deployed and is not optional.' echo "-u specifies the username of the deployer. Should be the developer's name in development stacks." echo ' This option may be omitted, in which case the TF_VAR_user variable MUST be set instead.' - echo '-r specifies the AWS region to deploy the stack to. Defaults to us-east-1.' + echo '-d specifies the AWS region to deploy the stack to. Defaults to us-east-1.' } while getopts ":e:d:i:v:u:r:h" opt; do @@ -40,7 +43,7 @@ while getopts ":e:d:i:v:u:r:h" opt; do export env=$OPTARG export TF_VAR_stage=$OPTARG ;; - d) + r) export TF_VAR_dockerhub_repo=$OPTARG ;; i) @@ -53,7 +56,7 @@ while getopts ":e:d:i:v:u:r:h" opt; do u) export TF_VAR_user=$OPTARG ;; - r) + d) export TF_VAR_region=$OPTARG ;; h) @@ -102,31 +105,29 @@ if [[ -z $TF_VAR_region ]]; then TF_VAR_region=us-east-1 fi - # We have terraform output environment variables via a single output # variable, which we then read in as json using the command line tool # `jq`, so that we can use them via bash. -format_environment_variables () { - json_env_vars=$(terraform output -json environment_variables | jq -c '.[]') - for row in $json_env_vars; do - name=$(echo "$row" | jq -r ".name") - value=$(echo "$row" | jq -r ".value") - env_var_assignment="$name=$value" - # Exporting an expansion rather than a variable, which is exactly what we want to do. - # shellcheck disable=SC2163 - export "${env_var_assignment?}" - echo "$env_var_assignment" >> prod_env - done +format_environment_variables() { + json_env_vars=$(terraform output -json environment_variables | jq -c '.[]') + for row in $json_env_vars; do + name=$(echo "$row" | jq -r ".name") + value=$(echo "$row" | jq -r ".value") + env_var_assignment="$name=$value" + # Exporting an expansion rather than a variable, which is exactly what we want to do. + # shellcheck disable=SC2163 + export "${env_var_assignment?}" + echo "$env_var_assignment" >>prod_env + done } - -# Load $ALL_CCDL_IMAGES and helper functions -source ../scripts/common.sh +# Load $ALL_IMAGES and helper functions. +. ../scripts/common.sh # Make our IP address known to terraform. TF_VAR_host_ip="$(dig +short myip.opendns.com @resolver1.opendns.com)" export TF_VAR_host_ip -for IMAGE in $ALL_CCDL_IMAGES; do +for IMAGE in $ALL_IMAGES; do # For each image we need to set the env var that is used by our # scripts and the env var that gets picked up by terraform because # it is preceeded with TF_VAR. @@ -140,27 +141,26 @@ cp deploy/ci_ingress.tf . # Check if a new ccdl-ubuntu ami will be needed for this region if [[ $(aws ec2 describe-images \ - --region $TF_VAR_region --owners 589864003899 \ - --filters 'Name=name,Values=ccdl-ubuntu-18.04-*' \ - --query 'length(Images)') \ - -eq 0 ]]; then + --region "$TF_VAR_region" --owners 589864003899 \ + --filters 'Name=name,Values=ccdl-ubuntu-18.04-*' \ + --query 'length(Images)') -eq 0 ]]; then echo "No ccdl-ubuntu-18.04 AMI found for this region, creating a new one" # Find most recent ccdl-ubuntu ami from us-east-1 template_ami_id=$(aws ec2 describe-images \ - --region us-east-1 --owners 589864003899 \ - --filters 'Name=name,Values=ccdl-ubuntu-18.04-*' \ - --query 'sort_by(Images,&CreationDate)[-1].ImageId' \ - --output text) + --region us-east-1 --owners 589864003899 \ + --filters 'Name=name,Values=ccdl-ubuntu-18.04-*' \ + --query 'sort_by(Images,&CreationDate)[-1].ImageId' \ + --output text) # Make a copy into this region new_ami_name="ccdl-ubuntu-18.04-$(date "+%Y-%m-%dT%H.%M.%S")" new_ami_id=$(aws ec2 copy-image \ - --source-image-id "$template_ami_id" \ - --source-region us-east-1 \ - --region "$TF_VAR_region" \ - --name "$new_ami_name" \ - --output text) + --source-image-id "$template_ami_id" \ + --source-region us-east-1 \ + --region "$TF_VAR_region" \ + --name "$new_ami_name" \ + --output text) echo "Created new AMI for $TF_VAR_region" echo " name: $new_ami_name" echo " id: $new_ami_id" @@ -174,16 +174,16 @@ fi terraform taint module.batch.aws_launch_template.data_refinery_worker || true terraform taint module.batch.aws_launch_template.data_refinery_compendia || true if terraform state list | grep -q module.batch.aws_batch_job_queue.data_refinery_; then - terraform state list \ - | grep module.batch.aws_batch_job_queue.data_refinery_ \ - | xargs -L 1 terraform taint \ - || true + terraform state list | + grep module.batch.aws_batch_job_queue.data_refinery_ | + xargs -L 1 terraform taint || + true fi if terraform state list | grep -q module.batch.aws_batch_compute_environment.data_refinery__; then - terraform state list \ - | grep module.batch.aws_batch_compute_environment.data_refinery_ \ - | xargs -L 1 terraform taint \ - || true + terraform state list | + grep module.batch.aws_batch_compute_environment.data_refinery_ | + xargs -L 1 terraform taint || + true fi if terraform output | grep -q 'No outputs found'; then @@ -232,7 +232,7 @@ format_environment_variables # Make sure to clear out any old batch job templates since we # will register everything in this directory. if [ -e batch-job-templates ]; then - rm -r batch-job-templates + rm -r batch-job-templates fi # Template the environment variables for production into the Batch Job @@ -272,47 +272,49 @@ docker pull "$DOCKERHUB_REPO/$FOREMAN_DOCKER_IMAGE" # Test that the pg_bouncer instance is up. 15 minutes should be more than enough. start_time=$(date +%s) diff=0 -until pg_isready -d "$DATABASE_NAME" -h "$DATABASE_PUBLIC_HOST" -p "$DATABASE_PORT" -U "$DATABASE_USER" &> /dev/null || [ "$diff" -gt "900" ] -do +until pg_isready -d "$DATABASE_NAME" -h "$DATABASE_PUBLIC_HOST" -p "$DATABASE_PORT" -U "$DATABASE_USER" &>/dev/null || [ "$diff" -gt "900" ]; do echo "Waiting for the pg_bouncer instance to come online ..." sleep 10 - (( diff = $(date +%s) - start_time )) + ((diff = $(date +%s) - start_time)) done -if ! pg_isready -d "$DATABASE_NAME" -h "$DATABASE_PUBLIC_HOST" -p "$DATABASE_PORT" -U "$DATABASE_USER" &> /dev/null; then +if ! pg_isready -d "$DATABASE_NAME" -h "$DATABASE_PUBLIC_HOST" -p "$DATABASE_PORT" -U "$DATABASE_USER" &>/dev/null; then echo "pg_bouncer instance failed to come up after 15 minutes." exit 1 fi # Migrate auth. docker run \ - --env-file prod_env \ - --env RUNNING_IN_CLOUD=False \ - --env DATABASE_HOST="$DATABASE_PUBLIC_HOST" \ - "$DOCKERHUB_REPO/$FOREMAN_DOCKER_IMAGE" python3 manage.py migrate auth + --env DATABASE_HOST="$DATABASE_PUBLIC_HOST" \ + --env RUNNING_IN_CLOUD=False \ + --env-file prod_env \ + "$DOCKERHUB_REPO/$FOREMAN_DOCKER_IMAGE" \ + python3 manage.py migrate auth # Apply general migrations. docker run \ - --env-file prod_env \ - --env RUNNING_IN_CLOUD=False \ - --env DATABASE_HOST="$DATABASE_PUBLIC_HOST" \ - "$DOCKERHUB_REPO/$FOREMAN_DOCKER_IMAGE" python3 manage.py migrate + --env DATABASE_HOST="$DATABASE_PUBLIC_HOST" \ + --env RUNNING_IN_CLOUD=False \ + --env-file prod_env \ + "$DOCKERHUB_REPO/$FOREMAN_DOCKER_IMAGE" \ + python3 manage.py migrate # Create the cache table if it does not already exist. docker run \ - --env-file prod_env \ - --env RUNNING_IN_CLOUD=False \ - --env DATABASE_HOST="$DATABASE_PUBLIC_HOST" \ - "$DOCKERHUB_REPO/$FOREMAN_DOCKER_IMAGE" python3 manage.py createcachetable + --env DATABASE_HOST="$DATABASE_PUBLIC_HOST" \ + --env RUNNING_IN_CLOUD=False \ + --env-file prod_env \ + "$DOCKERHUB_REPO/$FOREMAN_DOCKER_IMAGE" \ + python3 manage.py createcachetable # Terraform doesn't manage these well, so they need to be tainted to # ensure they won't require manual intervention. terraform taint module.batch.aws_launch_template.data_refinery_worker terraform taint module.batch.aws_launch_template.data_refinery_compendia -terraform state list \ - | grep module.batch.aws_batch_job_queue.data_refinery_ \ - | xargs -L 1 terraform taint \ - || true +terraform state list | + grep module.batch.aws_batch_job_queue.data_refinery_ | + xargs -L 1 terraform taint || + true # Ensure the latest image version is being used for the Foreman terraform taint aws_instance.foreman_server_1 @@ -339,10 +341,10 @@ API_IP_ADDRESS=$(terraform output -json api_server_1_ip | tr -d '"') # it's not found then grep will return a non-zero exit code so in that # case return an empty string. container_running=$(ssh -o StrictHostKeyChecking=no \ - -o ServerAliveInterval=15 \ - -o ConnectTimeout=5 \ - -i data-refinery-key.pem \ - "ubuntu@$API_IP_ADDRESS" "docker ps -a" 2> /dev/null | grep dr_api || echo "") + -o ServerAliveInterval=15 \ + -o ConnectTimeout=5 \ + -i data-refinery-key.pem \ + "ubuntu@$API_IP_ADDRESS" "docker ps -a" 2>/dev/null | grep dr_api || echo "") # If $container_running is empty, then it's because the container isn't running. # If the container isn't running, then it's because the instance is spinning up. @@ -377,21 +379,25 @@ if [[ -n $container_running ]]; then -o ConnectTimeout=5 \ -i data-refinery-key.pem \ "ubuntu@$API_IP_ADDRESS" "docker run \ - --env-file environment \ - -e DATABASE_HOST=$DATABASE_HOST \ - -e DATABASE_NAME=$DATABASE_NAME \ - -e DATABASE_USER=$DATABASE_USER \ - -e DATABASE_PASSWORD=$DATABASE_PASSWORD \ - -e ELASTICSEARCH_HOST=$ELASTICSEARCH_HOST \ - -e ELASTICSEARCH_PORT=$ELASTICSEARCH_PORT \ - -v /tmp/volumes_static:/tmp/www/static \ - --log-driver=awslogs \ - --log-opt awslogs-region=$AWS_REGION \ - --log-opt awslogs-group=data-refinery-log-group-$USER-$STAGE \ - --log-opt awslogs-stream=log-stream-api-$USER-$STAGE \ - -p 8081:8081 \ - --name=dr_api \ - -it -d $DOCKERHUB_REPO/$API_DOCKER_IMAGE /bin/sh -c /home/user/collect_and_run_uwsgi.sh" + --detach \ + --env DATABASE_HOST=$DATABASE_HOST \ + --env DATABASE_NAME=$DATABASE_NAME \ + --env DATABASE_PASSWORD=$DATABASE_PASSWORD \ + --env DATABASE_USER=$DATABASE_USER \ + --env ELASTICSEARCH_HOST=$ELASTICSEARCH_HOST \ + --env ELASTICSEARCH_PORT=$ELASTICSEARCH_PORT \ + --env-file environment \ + --interactive \ + --log-driver=awslogs \ + --log-opt awslogs-group=data-refinery-log-group-$USER-$STAGE \ + --log-opt awslogs-region=$AWS_REGION \ + --log-opt awslogs-stream=log-stream-api-$USER-$STAGE \ + --name=dr_api \ + --tty \ + --volume /tmp/volumes_static:/tmp/www/static \ + --publish 8081:8081 \ + $DOCKERHUB_REPO/$API_DOCKER_IMAGE \ + /bin/sh -c /home/user/collect_and_run_uwsgi.sh" # Don't leave secrets lying around. ssh -o StrictHostKeyChecking=no \ diff --git a/infrastructure/deploy_box_instance_data.sh b/infrastructure/deploy_box_instance_data.sh index 25f7bf5c7..dffe93ecf 100644 --- a/infrastructure/deploy_box_instance_data.sh +++ b/infrastructure/deploy_box_instance_data.sh @@ -76,5 +76,6 @@ chown ubuntu:ubuntu /var/log/deploy.log # Checkout the repo onto the box. cd /home/ubuntu +umask 0022 git clone https://github.com/AlexsLemonade/refinebio.git chown -R ubuntu:ubuntu refinebio diff --git a/infrastructure/destroy_terraform.sh b/infrastructure/destroy_terraform.sh index cf91053e4..4329cf885 100755 --- a/infrastructure/destroy_terraform.sh +++ b/infrastructure/destroy_terraform.sh @@ -2,7 +2,10 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit print_description() { @@ -10,17 +13,17 @@ print_description() { } print_options() { - echo 'This script accepts the following arguments: -e, -u, -r, and -h.' - echo 'Neither -e, -u or -r is optional unless TF_VAR_stage, TF_VAR_user,' + echo 'This script accepts the following arguments: -e, -u, -d, and -h.' + echo 'Neither -e, -u or -d is optional unless TF_VAR_stage, TF_VAR_user,' echo 'or TF_VAR_region is set, respectively.' echo '-h prints this help message and exits.' echo '-e specifies the environment you would like to destroy.' echo '-u specifies the username you used to spin up the stack.' - echo '-r specifies the region of the stack to destroy.' + echo '-d specifies the region of the stack to destroy.' echo 'All arguments are needed to determine which stack to destroy.' } -while getopts ":e:u:r:h" opt; do +while getopts ":e:u:d:h" opt; do case $opt in e) export env=$OPTARG @@ -29,7 +32,7 @@ while getopts ":e:u:r:h" opt; do u) export TF_VAR_user=$OPTARG ;; - r) + d) export TF_VAR_region=$OPTARG ;; h) @@ -62,7 +65,7 @@ if [[ -z $TF_VAR_user ]]; then fi if [[ -z $TF_VAR_region ]]; then - echo 'Error: must specify region by either providing the -r argument or setting TF_VAR_region.' + echo 'Error: must specify region by either providing the -d argument or setting TF_VAR_region.' exit 1 fi diff --git a/infrastructure/foreman-configuration/foreman-server-instance-user-data.tpl.sh b/infrastructure/foreman-configuration/foreman-server-instance-user-data.tpl.sh index c53d368bf..efc292163 100644 --- a/infrastructure/foreman-configuration/foreman-server-instance-user-data.tpl.sh +++ b/infrastructure/foreman-configuration/foreman-server-instance-user-data.tpl.sh @@ -15,30 +15,34 @@ cd /home/ubuntu || exit # Install our environment variables -cat <<"EOF" > environment +cat <<"EOF" >environment ${foreman_environment} EOF # These database values are created after TF # is run, so we have to pass them in programatically -cat >> /home/ubuntu/run_foreman.sh <>/home/ubuntu/run_foreman.sh <> /home/ubuntu/run_management_command.sh + --detach \\ + --env DATABASE_HOST=${database_host} \\ + --env DATABASE_NAME=${database_name} \\ + --env DATABASE_PASSWORD=${database_password} \\ + --env DATABASE_USER=${database_user} \\ + --env-file /home/ubuntu/environment \\ + --interactive \\ + --tty \\ + --volume /tmp:/tmp \\ + ${dockerhub_repo}/${foreman_docker_image} \\ + python3 manage.py \"\$@\" +" >>/home/ubuntu/run_management_command.sh chmod +x /home/ubuntu/run_management_command.sh echo " @@ -70,21 +78,24 @@ echo " # the first argument followed by the management command to run. docker run \\ - --env-file /home/ubuntu/environment \\ - -e DATABASE_HOST=${database_host} \\ - -e DATABASE_NAME=${database_name} \\ - -e DATABASE_USER=${database_user} \\ - -e DATABASE_PASSWORD=${database_password} \\ - -v /tmp:/tmp \\ - -it ${dockerhub_repo}/dr_\"\$1\" python3 manage.py \"\$2\" -" >> /home/ubuntu/run_manage_command.sh + --env DATABASE_HOST=${database_host} \\ + --env DATABASE_NAME=${database_name} \\ + --env DATABASE_PASSWORD=${database_password} \\ + --env DATABASE_USER=${database_user} \\ + --env-file /home/ubuntu/environment \\ + --interactive \\ + --tty \\ + --volume /tmp:/tmp \\ + ${dockerhub_repo}/dr_\"\$1\" \\ + python3 manage.py \"\$2\" +" >>/home/ubuntu/run_manage_command.sh chmod +x /home/ubuntu/run_manage_command.sh # Use Monit to ensure the Foreman is always running apt-get -y update apt-get -y install monit htop -date +%s > /tmp/foreman_last_time +date +%s >/tmp/foreman_last_time chown ubuntu:ubuntu /tmp/foreman_last_time # shellcheck disable=2016 echo ' @@ -96,7 +107,7 @@ if (( $difftime > 1800 )); then exit 1; fi exit 0; -' >> /home/ubuntu/foreman_status.sh +' >>/home/ubuntu/foreman_status.sh chmod +x /home/ubuntu/foreman_status.sh echo ' @@ -105,13 +116,13 @@ check program foreman with path "/bin/bash /home/ubuntu/foreman_status.sh" as ui if status != 0 then restart set daemon 900 -' >> /etc/monit/monitrc +' >>/etc/monit/monitrc service monit restart # Install the cron job tests -crontab -l > tempcron -cat <> tempcron +crontab -l >tempcron +cat <>tempcron 0 12 * * MON /bin/bash /home/ubuntu/run_manage_command.sh affymetrix check_brainarray_gene_agreement >> /var/log/affymetrix_checks.log 2>&1 0 12 * * MON /bin/bash /home/ubuntu/run_manage_command.sh affymetrix check_tx_index_transcript_agreement >> /var/log/affymetrix_checks.log 2>&1 0 12 * * ${accession_gathering_job_run_day} /bin/bash /home/ubuntu/run_manage_command.sh foreman gather_weekly_accessions >> /var/log/weekly_accessions.log 2>&1 @@ -122,18 +133,22 @@ rm tempcron # Make sure every downloader job has a processor job! docker run \ - --env-file /home/ubuntu/environment \ - -e DATABASE_HOST="${database_host}" \ - -e DATABASE_NAME="${database_name}" \ - -e DATABASE_USER="${database_user}" \ - -e DATABASE_PASSWORD="${database_password}" \ - -v /tmp:/tmp \ - --log-driver=awslogs \ - --log-opt awslogs-region="${region}" \ - --log-opt awslogs-group="${log_group}" \ - --log-opt awslogs-stream="log-stream-foreman-${user}-${stage}" \ - --name=job_filler \ - -it -d "${dockerhub_repo}/${foreman_docker_image}" python3 manage.py create_missing_processor_jobs + --detach \ + --env DATABASE_HOST="${database_host}" \ + --env DATABASE_NAME="${database_name}" \ + --env DATABASE_PASSWORD="${database_password}" \ + --env DATABASE_USER="${database_user}" \ + --env-file /home/ubuntu/environment \ + --interactive \ + --log-driver=awslogs \ + --log-opt awslogs-group="${log_group}" \ + --log-opt awslogs-region="${region}" \ + --log-opt awslogs-stream="log-stream-foreman-${user}-${stage}" \ + --name=job_filler \ + --tty \ + --volume /tmp:/tmp \ + "${dockerhub_repo}/${foreman_docker_image}" \ + python3 manage.py create_missing_processor_jobs # Delete the cloudinit and syslog in production. export STAGE=${stage} diff --git a/scripts/common.sh b/scripts/common.sh index 612de1a7b..b30416abc 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -2,45 +2,40 @@ # These are lists of docker images that we use. The actual names end # up being /dr_ but this is useful for scripting. -export ALL_CCDL_IMAGES="smasher compendia illumina affymetrix salmon transcriptome no_op downloaders foreman api" +export ALL_IMAGES="smasher compendia illumina affymetrix salmon transcriptome no_op downloaders foreman api" # Sometimes we only want to work with the worker images. -export CCDL_WORKER_IMAGES="smasher compendia illumina affymetrix salmon transcriptome no_op downloaders" +export WORKER_IMAGES="smasher compendia illumina affymetrix salmon transcriptome no_op downloaders" -get_docker_db_ip_address () { - docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' drdb 2> /dev/null -} - -get_docker_es_ip_address () { - docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' dres 2> /dev/null -} - -# `coverage report -m` will always have an exit code of 0 which makes -# it seem like the test is passing. Therefore we store the exit code -# of running the tests as $exit_code, then report the coverage, and -# then exit with the appropriate code. -# This is done a function so arguments to the tests can be passed through. -run_tests_with_coverage () { - COVERAGE="coverage run --source=\".\" manage.py test --settings=tests.settings --no-input $*; exit_code=\$?;" - SAVE_REPORT="coverage xml -o data_store/coverage.xml;" - PRINT_REPORT="coverage report -m;" - RETURN="exit \$exit_code" - - echo "$COVERAGE $PRINT_REPORT $SAVE_REPORT $RETURN" -} +# Default Docker registry. +if [ -z "$DOCKERHUB_REPO" ]; then + export DOCKERHUB_REPO="ccdlstaging" +fi # This function checks whether a given docker image name ($1:$2) # exists in Docker Hub or not using Docker Hub API V2. Based on: # https://stackoverflow.com/questions/32113330/check-if-imagetag-combination-already-exists-on-docker-hub -docker_img_exists() { +docker_image_exists() { TOKEN=$(curl -s -H "Content-Type: application/json" -X POST \ - -d '{"username": "'"${DOCKER_ID}"'", "password": "'"${DOCKER_PASSWD}"'"}' \ - https://hub.docker.com/v2/users/login/ | jq -r .token) + -d '{"username": "'"${DOCKER_ID}"'", "password": "'"${DOCKER_PASSWD}"'"}' \ + https://hub.docker.com/v2/users/login/ | jq -r .token) EXISTS=$(curl -s -H "Authorization: JWT ${TOKEN}" \ - "https://hub.docker.com/v2/repositories/$1/tags/?page_size=10000" \ - | jq -r "[.results | .[] | .name == \"$2\"] | any" 2> /dev/null) + "https://hub.docker.com/v2/repositories/$1/tags/?page_size=10000" | + jq -r "[.results | .[] | .name == \"$2\"] | any" 2>/dev/null) test -n "$EXISTS" -a "$EXISTS" = true } +get_branch_hash() { + git rev-parse --abbrev-ref HEAD | shasum | awk '{print $1}' +} + +get_docker_db_ip_address() { + docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' drdb 2>/dev/null +} + +get_docker_es_ip_address() { + docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' dres 2>/dev/null +} + # A tag is linked to a commit hash, not a branch. A single commit hash # can end up on multiple branches. So we first check to see if we're # on master, then on dev, then error out because we should only deploy master or dev. @@ -57,10 +52,69 @@ get_master_or_dev() { # All dev versions should end with '-dev' or '-dev-hotfix' and all master versions should not. if [ -n "$master_check" ] && ! echo "$version" | grep -Eq "\-dev(\-hotfix)?$"; then echo "master" - elif [ -n "$dev_check" ] ; then + elif [ -n "$dev_check" ]; then echo "dev" else echo "unknown" fi fi } + +# `coverage report -m` will always have an exit code of 0 which makes +# it seem like the test is passing. Therefore we store the exit code +# of running the tests as $exit_code, then report the coverage, and +# then exit with the appropriate code. +# This is done a function so arguments to the tests can be passed through. +run_tests_with_coverage() { + COVERAGE="coverage run --source=\".\" manage.py test --settings=tests.settings --no-input $*; exit_code=\$?;" + SAVE_REPORT="coverage xml -o data_store/coverage.xml;" + PRINT_REPORT="coverage report -m;" + RETURN="exit \$exit_code" + + echo "$COVERAGE $PRINT_REPORT $SAVE_REPORT $RETURN" +} + +# Create docker-container/desktop-linux Docker builder if none provided. +# Set the builder as currently used. +set_up_docker_builder() { + if [ -z "$DOCKER_BUILDER" ]; then + echo "Setting up refine.bio Docker builder." + + if test "$GITHUB_ACTION"; then + echo "$INSTANCE_SSH_KEY" >infrastructure/data-refinery-key.pem + chmod 600 infrastructure/data-refinery-key.pem + + # shellcheck disable=SC2046 + eval $(ssh-agent) + ssh-add infrastructure/data-refinery-key.pem + + if [ ! -d ~/.ssh ]; then + mkdir -m 700 ~/.ssh + fi + cat >~/.ssh/config </dev/null || + true + else + DOCKER_BUILDER="refinebio_local_builder" + echo "Creating Docker builder $DOCKER_BUILDER." + docker buildx create \ + --driver=docker-container \ + --name="$DOCKER_BUILDER" \ + --platform=linux/amd64 2>/dev/null || + true + fi + fi + + echo "Using Docker builder $DOCKER_BUILDER." + docker buildx use "$DOCKER_BUILDER" +} diff --git a/scripts/format_batch_with_env.sh b/scripts/format_batch_with_env.sh index a86ee36b5..21938e1e5 100755 --- a/scripts/format_batch_with_env.sh +++ b/scripts/format_batch_with_env.sh @@ -24,34 +24,34 @@ print_options() { while getopts ":p:e:o:v:h" opt; do case $opt in - p) - export project="$OPTARG" - ;; - e) - export env="$OPTARG" - ;; - o) - export output_dir="$OPTARG" - ;; - v) - export system_version="$OPTARG" - ;; - h) - print_description - echo - print_options - exit 0 - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - print_options >&2 - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - print_options >&2 - exit 1 - ;; + p) + export project="$OPTARG" + ;; + e) + export env="$OPTARG" + ;; + o) + export output_dir="$OPTARG" + ;; + v) + export system_version="$OPTARG" + ;; + h) + print_description + echo + print_options + exit 0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + print_options >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + print_options >&2 + exit 1 + ;; esac done @@ -102,10 +102,12 @@ if [ -z "$COMPENDIA_DOCKER_IMAGE" ]; then export COMPENDIA_DOCKER_IMAGE="dr_compendia:$system_version" fi - # This script should always run from the context of the directory of # the project it is building. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" project_directory="$script_directory/.." @@ -142,7 +144,7 @@ while read -r line; do # shellcheck disable=SC2163 export "$line" fi -done < "$environment_file" +done <"$environment_file" # If output_dir wasn't specified then assume the same folder we're # getting the templates from. @@ -154,7 +156,6 @@ if [ ! -d "$output_dir" ]; then mkdir "$output_dir" fi - # Not quite sure how to deal with this just yet, so punt. # export INDEX=0 @@ -163,7 +164,7 @@ fi if [ "$project" = "workers" ]; then # Iterate over all the template files in the directory. for template in batch-job-templates/*.tpl.json; do - template="$(basename "$template")" + template="$(basename "$template")" # Strip off the trailing .tpl for once we've formatted it. OUTPUT_BASE="$(basename "$template" .tpl.json)" FILETYPE=".json" @@ -173,66 +174,62 @@ if [ "$project" = "workers" ]; then if [ "$OUTPUT_FILE" = "downloader.json" ]; then rams="1024 4096 16384" - for r in $rams - do + for r in $rams; do export RAM_POSTFIX="_$r" export RAM="$r" FILEPATH="$output_dir/downloader$RAM_POSTFIX$FILETYPE" perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < "batch-job-templates/$template" \ - > "$FILEPATH" \ - 2> /dev/null + <"batch-job-templates/$template" \ + >"$FILEPATH" \ + 2>/dev/null echo "Made $FILEPATH" done echo "Made $output_dir/$OUTPUT_FILE" elif [ "$OUTPUT_FILE" = "create_compendia.json" ]; then rams="30000 950000" - for r in $rams - do + for r in $rams; do export RAM_POSTFIX="_$r" export RAM="$r" FILEPATH="$output_dir/$OUTPUT_BASE$RAM_POSTFIX$FILETYPE" perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < "batch-job-templates/$template" \ - > "$FILEPATH" \ - 2> /dev/null + <"batch-job-templates/$template" \ + >"$FILEPATH" \ + 2>/dev/null echo "Made $FILEPATH" done echo "Made $output_dir/$OUTPUT_FILE" elif [ "$OUTPUT_FILE" = "create_quantpendia.json" ]; then rams="30000 131000" - for r in $rams - do + for r in $rams; do export RAM_POSTFIX="_$r" export RAM="$r" FILEPATH="$output_dir/$OUTPUT_BASE$RAM_POSTFIX$FILETYPE" perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < "batch-job-templates/$template" \ - > "$FILEPATH" \ - 2> /dev/null + <"batch-job-templates/$template" \ + >"$FILEPATH" \ + 2>/dev/null echo "Made $FILEPATH" done echo "Made $output_dir/$OUTPUT_FILE" # From https://unix.stackexchange.com/a/111517 elif (echo "$NO_RAM_JOB_FILES" | grep -Fqw "$OUTPUT_FILE"); then perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < "batch-job-templates/$template" \ - > "$output_dir/$OUTPUT_FILE" \ - 2> /dev/null + <"batch-job-templates/$template" \ + >"$output_dir/$OUTPUT_FILE" \ + 2>/dev/null echo "Made $output_dir/$OUTPUT_FILE" else rams="2048 4096 8192 12288 16384 32768 65536" - for r in $rams - do + for r in $rams; do export RAM_POSTFIX="_$r" export RAM="$r" FILEPATH="$output_dir/$OUTPUT_BASE$RAM_POSTFIX$FILETYPE" perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < "batch-job-templates/$template" \ - > "$FILEPATH" \ - 2> /dev/null + <"batch-job-templates/$template" \ + >"$FILEPATH" \ + 2>/dev/null echo "Made $FILEPATH" done fi @@ -241,7 +238,7 @@ elif [ "$project" = "surveyor" ]; then # Iterate over all the template files in the directory. for template in batch-job-templates/*.tpl.json; do - template="$(basename "$template")" + template="$(basename "$template")" # Strip off the trailing .tpl for once we've formatted it. OUTPUT_BASE="$(basename "$template" .tpl.json)" FILETYPE=".json" @@ -249,21 +246,20 @@ elif [ "$project" = "surveyor" ]; then if [ "$OUTPUT_FILE" = "surveyor_dispatcher.json" ]; then perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < "batch-job-templates/$template" \ - > "$output_dir/$OUTPUT_FILE" \ - 2> /dev/null + <"batch-job-templates/$template" \ + >"$output_dir/$OUTPUT_FILE" \ + 2>/dev/null echo "Made $output_dir/$OUTPUT_FILE" else rams="1024 4096 16384" - for r in $rams - do + for r in $rams; do export RAM_POSTFIX="_$r" export RAM="$r" FILEPATH="$output_dir/$OUTPUT_BASE$RAM_POSTFIX$FILETYPE" perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < "batch-job-templates/$template" \ - > "$FILEPATH" \ - 2> /dev/null + <"batch-job-templates/$template" \ + >"$FILEPATH" \ + 2>/dev/null echo "Made $FILEPATH" done fi @@ -272,12 +268,12 @@ elif [ "$project" = "surveyor" ]; then elif [ "$project" = "foreman" ]; then # foreman sub-project perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < environment.tpl \ - > "$output_dir/environment" \ - 2> /dev/null + "$output_dir/environment" \ + 2>/dev/null elif [ "$project" = "api" ]; then perl -p -e 's/\$\{\{([^}]+)\}\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \ - < environment.tpl \ - > "$output_dir/environment" \ - 2> /dev/null + "$output_dir/environment" \ + 2>/dev/null fi diff --git a/scripts/install_all.sh b/scripts/install_all.sh index 0d389712e..30a34c080 100755 --- a/scripts/install_all.sh +++ b/scripts/install_all.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Exit on error +# Exit on error. set -e # Config variables @@ -8,7 +8,10 @@ TERRAFORM_VERSION="0.13.5" # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit print_description() { @@ -37,117 +40,117 @@ confirm() { printf "%s [y/N] " "$1" read -r confirmation if ! [ "$confirmation" = "y" ]; then - echo "Confirmation failure" >&2 + echo "Confirmation failure" >&2 exit 1 fi } while getopts "hv" opt; do case $opt in - h) - print_description - echo - print_usage - exit 0 - ;; - v) - OUTPUT="/dev/stdout" - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - print_usage >&2 - exit 1 - ;; + h) + print_description + echo + print_usage + exit 0 + ;; + v) + OUTPUT="/dev/stdout" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + print_usage >&2 + exit 1 + ;; esac done # Unless output was set to stdout by the verbose flag, set it to /dev/null -# to hide the stdout of package management commands +# to hide the stdout of package management commands. if [ -z "$OUTPUT" ]; then OUTPUT="/dev/null" fi if [ -z "$INSTALL_CMD" ]; then case "$(uname)" in - "Darwin") - if ! command -v brew >/dev/null; then - confirm "Would you like to install Homebrew?" - /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" - fi - - INSTALL_CMD="brew install" - INSTALL_CASK_CMD="brew cask install" - BREW=true - ;; - "Linux") - if command -v apt >/dev/null; then - sudo apt-get update - INSTALL_CMD="sudo apt-get install --assume-yes" - APT=true - else - echo "Your Linux distribution is not officially supported," >&2 - echo "but it *should* be able to run the required services. You need to manually" >&2 - echo "install dependencies or give the command to install dependencies with \$INSTALL_CMD." >&2 - exit 1 - fi - ;; - *) - echo "$(uname) is an unsupported operating system." >&2 - echo "You can try to provide a package manager command with \$INSTALL_CMD," >&2 - echo "but your mileage may vary." >&2 + "Darwin") + if ! command -v brew >/dev/null; then + confirm "Would you like to install Homebrew?" + /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" + fi + + INSTALL_CMD="brew install" + INSTALL_CASK_CMD="brew cask install" + BREW=true + ;; + "Linux") + if command -v apt >/dev/null; then + sudo apt-get update + INSTALL_CMD="sudo apt-get install --assume-yes" + APT=true + else + echo "Your Linux distribution is not officially supported," >&2 + echo "but it *should* be able to run the required services. You need to manually" >&2 + echo "install dependencies or give the command to install dependencies with \$INSTALL_CMD." >&2 exit 1 - ;; + fi + ;; + *) + echo "$(uname) is an unsupported operating system." >&2 + echo "You can try to provide a package manager command with \$INSTALL_CMD," >&2 + echo "but your mileage may vary." >&2 + exit 1 + ;; esac fi -if ! command -v docker > /dev/null; then +if ! command -v docker >/dev/null; then echo "Installing Docker..." - # On macOS, install docker desktop with Homebrew cask - if [ $BREW ]; then - $INSTALL_CASK_CMD docker > $OUTPUT + # On macOS, install docker desktop with Homebrew cask. + if [ "$BREW" ]; then + $INSTALL_CASK_CMD docker >"$OUTPUT" else - $INSTALL_CMD docker.io > $OUTPUT || (echo "You must manually install docker" && exit 1) + $INSTALL_CMD docker.io >"$OUTPUT" || (echo "You must manually install Docker" && exit 1) - echo "Fixing docker permissions..." + echo "Fixing Docker permissions..." sudo groupadd -f docker sudo usermod -aG docker "$USER" - echo - echo "Logout and log back in to apply the permissions changes, then execute this script again." - exit 0 + echo + echo "Logout and log back in to apply the permissions changes, then execute this script again." + exit 0 fi fi -if ! command -v pip3 > /dev/null && ! [ $BREW ]; then # Don't reinstall python on macOS - echo "Installing python and pip..." - $INSTALL_CMD python3-pip > $OUTPUT || (echo "You must manually install python and pip" && exit 1) +if ! command -v pip3 >/dev/null && ! [ "$BREW" ]; then # Don't reinstall python on macOS + echo "Installing Python and pip..." + $INSTALL_CMD python3-pip >"$OUTPUT" || (echo "You must manually install Python and pip" && exit 1) fi -if ! command -v terraform > /dev/null; then - echo "Installing terraform..." - if [ $BREW ]; then - $INSTALL_CMD terraform > $OUTPUT - elif [ $APT ] || confirm "Would you like to automatically install Terraform for amd64 linux?"; then - $INSTALL_CMD unzip > $OUTPUT +if ! command -v terraform >/dev/null; then + echo "Installing Terraform..." + if [ "$BREW" ]; then + $INSTALL_CMD terraform >"$OUTPUT" + elif [ "$APT" ] || confirm "Would you like to automatically install Terraform for amd64 Linux?"; then + $INSTALL_CMD unzip >"$OUTPUT" curl -0s "https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip" \ - > "terraform_${TERRAFORM_VERSION}_linux_amd64.zip" + >"terraform_${TERRAFORM_VERSION}_linux_amd64.zip" sudo unzip -d /usr/bin "terraform_${TERRAFORM_VERSION}_linux_amd64.zip" sudo chmod a+rx /usr/bin/terraform - rm "terraform_${TERRAFORM_VERSION}_linux_amd64.zip" + rm "terraform_${TERRAFORM_VERSION}_linux_amd64.zip" else echo "You need to manually install Terraform before continuing..." >&2 exit 1 fi fi -if ! command -v pre-commit > /dev/null; then +if ! command -v pre-commit >/dev/null; then message="Would you like to automatically install pre-commit? \ Note: This will install all the required dependencies (black, isort, etc) \ using an additional ~185MB of disk space." - if [ $APT ] || confirm "$message"; then + if [ "$APT" ] || confirm "$message"; then echo "Installing pre-commit..." - $INSTALL_CMD shellcheck > $OUTPUT + $INSTALL_CMD shellcheck >"$OUTPUT" pip3 install pre-commit pre-commit install else @@ -155,32 +158,32 @@ using an additional ~185MB of disk space." fi fi -if ! command -v jq > /dev/null; then +if ! command -v jq >/dev/null; then echo "Installing jq..." - $INSTALL_CMD jq > $OUTPUT || (echo "You must manually install jq" && exit 1) + $INSTALL_CMD jq >"$OUTPUT" || (echo "You must manually install jq" && exit 1) fi -if ! command -v ip > /dev/null; then - if [ $BREW ]; then - $INSTALL_CMD iproute2mac > $OUTPUT +if ! command -v ip >/dev/null; then + if [ "$BREW" ]; then + $INSTALL_CMD iproute2mac >"$OUTPUT" else - $INSTALL_CMD iproute2 > $OUTPUT || (echo "You must manually install iproute2" && exit 1) + $INSTALL_CMD iproute2 >"$OUTPUT" || (echo "You must manually install iproute2" && exit 1) fi fi echo "Starting postgres and installing the database..." -./run_postgres.sh > $OUTPUT -./install_db_docker.sh > $OUTPUT +./run_postgres.sh >"$OUTPUT" +./install_db_docker.sh >"$OUTPUT" echo "Starting elasticsearch and building the ES Indexes..." -./run_es.sh > $OUTPUT -./rebuild_es_index.sh > $OUTPUT +./run_es.sh >"$OUTPUT" +./rebuild_es_index.sh >"$OUTPUT" echo "Creating virtual environment..." -./create_virtualenv.sh > $OUTPUT +./create_virtualenv.sh >"$OUTPUT" echo "Run \`source dr_env/bin/activate\` to activate the virtual environment." echo "Updating common dependencies..." -# Source the virtual environment first +# Source the virtual environment first. . ../dr_env/bin/activate -./update_models.sh > $OUTPUT +./update_models.sh >"$OUTPUT" diff --git a/scripts/install_db_docker.sh b/scripts/install_db_docker.sh index 52d2e3138..8dafc2354 100755 --- a/scripts/install_db_docker.sh +++ b/scripts/install_db_docker.sh @@ -1,8 +1,45 @@ #! /bin/sh -docker run -e PGPASSWORD=mysecretpassword --rm --link drdb:postgres postgres:9.6.6 psql -c "create database data_refinery" -h postgres -U postgres -docker run -e PGPASSWORD=mysecretpassword --rm --link drdb:postgres postgres:9.6.6 psql -c "CREATE ROLE data_refinery_user WITH LOGIN PASSWORD 'data_refinery_password';" -h postgres -U postgres -docker run -e PGPASSWORD=mysecretpassword --rm --link drdb:postgres postgres:9.6.6 psql -c 'GRANT ALL PRIVILEGES ON DATABASE data_refinery TO data_refinery_user;' -h postgres -U postgres -docker run -e PGPASSWORD=mysecretpassword --rm --link drdb:postgres postgres:9.6.6 psql -c 'ALTER USER data_refinery_user CREATEDB;' -h postgres -U postgres -docker run -e PGPASSWORD=mysecretpassword --rm --link drdb:postgres postgres:9.6.6 psql -c 'ALTER ROLE data_refinery_user superuser;' -h postgres -U postgres -docker run -e PGPASSWORD=mysecretpassword --rm --link drdb:postgres postgres:9.6.6 psql -c 'CREATE EXTENSION IF NOT EXISTS hstore;' -h postgres -U postgres -d data_refinery +POSTGRES_VERSION="9.6.6" + +docker run \ + --env PGPASSWORD=mysecretpassword \ + --link drdb:postgres \ + --rm \ + "postgres:$POSTGRES_VERSION" \ + psql -c "create database data_refinery" -h postgres -U postgres + +docker run \ + --env PGPASSWORD=mysecretpassword \ + --link drdb:postgres \ + --rm \ + "postgres:$POSTGRES_VERSION" \ + psql -c "CREATE ROLE data_refinery_user WITH LOGIN PASSWORD 'data_refinery_password';" -h postgres -U postgres + +docker run \ + --env PGPASSWORD=mysecretpassword \ + --link drdb:postgres \ + --rm \ + "postgres:$POSTGRES_VERSION" \ + psql -c 'GRANT ALL PRIVILEGES ON DATABASE data_refinery TO data_refinery_user;' -h postgres -U postgres + +docker run \ + --env PGPASSWORD=mysecretpassword \ + --link drdb:postgres \ + --rm \ + "postgres:$POSTGRES_VERSION" \ + psql -c 'ALTER USER data_refinery_user CREATEDB;' -h postgres -U postgres + +docker run \ + --env PGPASSWORD=mysecretpassword \ + --link drdb:postgres \ + --rm \ + "postgres:$POSTGRES_VERSION" \ + psql -c 'ALTER ROLE data_refinery_user superuser;' -h postgres -U postgres + +docker run \ + --env PGPASSWORD=mysecretpassword \ + --link drdb:postgres \ + --rm \ + "postgres:$POSTGRES_VERSION" \ + psql -c 'CREATE EXTENSION IF NOT EXISTS hstore;' -h postgres -U postgres -d data_refinery diff --git a/scripts/kill_all_jobs.sh b/scripts/kill_all_jobs.sh index 6ef70be94..4da0eafa4 100755 --- a/scripts/kill_all_jobs.sh +++ b/scripts/kill_all_jobs.sh @@ -2,13 +2,16 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit while read -r row; do # Exporting an expansion rather than a variable, which is exactly what we want to do. # shellcheck disable=SC2163 export "${row}" -done < ../infrastructure/prod_env +done <../infrastructure/prod_env python3 kill_all_jobs.py diff --git a/scripts/make_migrations.sh b/scripts/make_migrations.sh index 25ef9e6c8..807531569 100755 --- a/scripts/make_migrations.sh +++ b/scripts/make_migrations.sh @@ -3,34 +3,44 @@ # Script for migrating the database using a Docker container so no # virtual environment is needed on the host machine. -# Exit on error +# Exit on error. set -e # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit ./prepare_image.sh -i migrations -s common . ./common.sh + DB_HOST_IP=$(get_docker_db_ip_address) docker run \ - --volume "$script_directory/../common/data_refinery_common":/home/user/data_refinery_common \ - --add-host=database:"$DB_HOST_IP" \ - --env-file ../common/environments/local \ - --interactive \ - ccdlstaging/dr_migrations python3 manage.py makemigrations data_refinery_common + --add-host=database:"$DB_HOST_IP" \ + --env-file ../common/environments/local \ + --interactive \ + --platform linux/amd64 \ + --volume "$script_directory/../common/data_refinery_common":/home/user/data_refinery_common \ + "$DOCKERHUB_REPO/dr_migrations" \ + python3 manage.py makemigrations data_refinery_common docker run \ - --volume "$script_directory/../common/data_refinery_common":/home/user/data_refinery_common \ - --add-host=database:"$DB_HOST_IP" \ - --env-file ../common/environments/local \ - ccdlstaging/dr_migrations python3 manage.py migrate + --add-host=database:"$DB_HOST_IP" \ + --env-file ../common/environments/local \ + --platform linux/amd64 \ + --volume "$script_directory/../common/data_refinery_common":/home/user/data_refinery_common \ + "$DOCKERHUB_REPO/dr_migrations" \ + python3 manage.py migrate docker run \ - --volume "$script_directory/../common/data_refinery_common":/home/user/data_refinery_common \ - --add-host=database:"$DB_HOST_IP" \ - --env-file ../common/environments/local \ - ccdlstaging/dr_migrations python3 manage.py createcachetable + --add-host=database:"$DB_HOST_IP" \ + --env-file ../common/environments/local \ + --platform linux/amd64 \ + --volume "$script_directory/../common/data_refinery_common":/home/user/data_refinery_common \ + "$DOCKERHUB_REPO/dr_migrations" \ + python3 manage.py createcachetable diff --git a/scripts/prepare_image.sh b/scripts/prepare_image.sh index 81975da90..4bdff97c4 100755 --- a/scripts/prepare_image.sh +++ b/scripts/prepare_image.sh @@ -2,10 +2,12 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# Import the functions in common.sh . ./common.sh # We need access to all of the projects @@ -18,108 +20,129 @@ print_description() { print_options() { echo "Options:" - echo " -h Prints the help message" + echo " -h Prints the help message." echo " -i IMAGE The image to be prepared. This must be specified." echo " -s SERVICE The service to seach for a dockerfile." - echo " The default option is 'workers'" - echo " -p Pull the latest version of the image from Dockerhub" - echo " -d REPO The docker repo to pull images from." - echo " The default option is 'ccdl'" + echo " The default option is 'workers'." + echo " -u Push the built image to the Dockerhub." + echo " -r REPO The docker registry to use for pull/push actions." + echo " The default option is 'ccdlstaging'." echo echo "Examples:" echo " Build the image ccdl/dr_downloaders:" - echo " ./scripts/prepare_image.sh -i downloaders -d ccdl" + echo " ./scripts/prepare_image.sh -i downloaders -r ccdlstaging" } -while getopts "phi:d:s:" opt; do +while getopts "uhi:r:s:" opt; do case $opt in - i) - image=$OPTARG - ;; - d) - dockerhub_repo=$OPTARG - ;; - p) - pull="True" - ;; - s) - service=$OPTARG - ;; - h) - print_description - echo - print_options - exit 0 - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - print_options >&2 - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - print_options >&2 - exit 1 - ;; + i) + IMAGE_NAME="$OPTARG" + ;; + r) + DOCKERHUB_REPO="$OPTARG" + ;; + + s) + SERVICE="$OPTARG" + ;; + u) + DOCKER_ACTION="--push" + ;; + h) + print_description + echo + print_options + exit 0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + print_options >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + print_options >&2 + exit 1 + ;; esac done -if [ -z "$image" ]; then +if [ -z "$IMAGE_NAME" ]; then echo "Error: you must specify an image with -i" >&2 exit 1 fi - -if [ -z "$service" ]; then - service="workers" +if [ -z "$SERVICE" ]; then + SERVICE="workers" fi -if [ -z "$dockerhub_repo" ]; then - dockerhub_repo="ccdlstaging" +if [ -z "$DOCKERHUB_REPO" ]; then + DOCKERHUB_REPO="ccdlstaging" fi -# Default to "local" for system version if we're not running in the cloud. +# Defaults to commit hash value for if we're not running in the cloud. if [ -z "$SYSTEM_VERSION" ]; then - SYSTEM_VERSION="local$(date +%s)" + SYSTEM_VERSION="$(get_branch_hash)" fi -# We want to check if a test image has been built for this branch. If -# it has we should use that rather than building it slowly. -image_name="$dockerhub_repo/dr_$image" -# shellcheck disable=SC2086 -if [ "$(docker_img_exists $image_name $branch_name)" ] ; then - docker pull "$image_name:$branch_name" -elif [ -n "$pull" ]; then - docker pull "$image_name" -else - echo "" - echo "Rebuilding the $image_name image." - finished=1 - attempts=0 - while [ $finished != 0 ] && [ $attempts -lt 3 ]; do - if [ $attempts -gt 0 ]; then - echo "Failed to build $image_name, trying again." - fi - - - if test "$GITHUB_ACTIONS"; then - # docker needs repositories to be lowercase - CACHE_REPO="$(echo "ghcr.io/$GITHUB_REPOSITORY" | tr '[:upper:]' '[:lower:]')" - CACHED_PACKAGE="$CACHE_REPO/dr_$image" - CACHE="--build-arg BUILDKIT_INLINE_CACHE=1 --cache-from $CACHED_PACKAGE" - fi - - docker build \ - -t "$image_name" \ - -f "$service/dockerfiles/Dockerfile.$image" \ - --build-arg SYSTEM_VERSION="$SYSTEM_VERSION" \ - $CACHE . - finished=$? - attempts=$((attempts+1)) - done - - if [ $finished != 0 ] && [ $attempts -ge 3 ]; then - echo "Could not build $image_name after three attempts." - exit 1 +if [ -z "$DOCKER_ACTION" ]; then + DOCKER_ACTION="--load" +fi + +DOCKERHUB_IMAGE="$DOCKERHUB_REPO/dr_$IMAGE_NAME" + +CACHE_FROM_LATEST="cache-from=type=registry,ref=${DOCKERHUB_IMAGE}_cache:latest" +CACHE_FROM_VERSION="cache-from=type=registry,ref=${DOCKERHUB_IMAGE}_cache:$SYSTEM_VERSION" +CACHE_TO_LATEST="cache-to=type=registry,ref=${DOCKERHUB_IMAGE}_cache:latest,mode=max" +CACHE_TO_VERSION="cache-to=type=registry,ref=${DOCKERHUB_IMAGE}_cache:$SYSTEM_VERSION,mode=max" + +if test "$GITHUB_ACTION"; then + CACHE_TO_LATEST="cache-to=type=gha" + CACHE_TO_VERSION="cache-to=type=gha" + DOCKER_ACTION="--push" +fi + +DOCKER_FILE_PATH="$SERVICE/dockerfiles/Dockerfile.$IMAGE_NAME" + +echo +echo "Building the $IMAGE_NAME:$SYSTEM_VERSION image from $DOCKER_FILE_PATH." +echo + +attempt=0 +attempts=3 +finished=1 +while [ $finished != 0 ] && [ $attempt -lt $attempts ]; do + if [ $attempt -gt 0 ]; then + echo "Failed to build $IMAGE_NAME:$SYSTEM_VERSION image, trying again." fi + + set_up_docker_builder + + docker buildx build \ + --build-arg DOCKERHUB_REPO="$DOCKERHUB_REPO" \ + --build-arg SYSTEM_VERSION="$SYSTEM_VERSION" \ + --"$CACHE_FROM_LATEST" \ + --"$CACHE_FROM_VERSION" \ + --"$CACHE_TO_LATEST" \ + --"$CACHE_TO_VERSION" \ + --file "$DOCKER_FILE_PATH" \ + --platform linux/amd64 \ + --tag "$DOCKERHUB_IMAGE:latest" \ + --tag "$DOCKERHUB_IMAGE:$SYSTEM_VERSION" \ + "$DOCKER_ACTION" \ + . + + finished=$? + attempt=$((attempt + 1)) +done + +if [ $finished -ne 0 ] && [ $attempt -ge $attempts ]; then + echo "Could not build $DOCKERHUB_IMAGE after $attempt attempts." + exit 1 +fi + +if test "$GITHUB_ACTION"; then + docker pull \ + --platform linux/amd64 \ + "$DOCKERHUB_IMAGE" fi diff --git a/scripts/rebuild_es_index.sh b/scripts/rebuild_es_index.sh index 30fded027..c3ac7745b 100755 --- a/scripts/rebuild_es_index.sh +++ b/scripts/rebuild_es_index.sh @@ -2,7 +2,10 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit ./run_manage.sh -i api_local -s api search_index --rebuild -f diff --git a/scripts/reinit_database.sh b/scripts/reinit_database.sh index 18107000d..d397c9c14 100755 --- a/scripts/reinit_database.sh +++ b/scripts/reinit_database.sh @@ -2,10 +2,12 @@ # Reintializes the database so there's no data or migrations run against it. - # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # Clear it out. diff --git a/scripts/run_all_tests.sh b/scripts/run_all_tests.sh index 9372e43f2..20f7b4a14 100755 --- a/scripts/run_all_tests.sh +++ b/scripts/run_all_tests.sh @@ -7,7 +7,10 @@ set -e # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # Get access to all of the refinebio project diff --git a/scripts/run_es.sh b/scripts/run_es.sh index c300aca2f..9c2a9681a 100755 --- a/scripts/run_es.sh +++ b/scripts/run_es.sh @@ -1,13 +1,21 @@ #! /bin/sh -docker rm -f dres 2> /dev/null +docker rm -f dres 2>/dev/null -# Check if a docker database named "dres" exists, and if so just start it +# Check if a docker database named "dres" exists, and if so just start it. if [ "$(docker ps -a --filter name=dres -q)" ]; then - docker start dres > /dev/null -# Otherwise, run it with `docker run` + docker start dres >/dev/null +# Otherwise, run it with `docker run`. else - docker run --name dres -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e "indices.query.bool.max_clause_count=16384" -d docker.elastic.co/elasticsearch/elasticsearch:6.5.4 + docker run \ + --detach \ + --env "discovery.type=single-node" \ + --env "indices.query.bool.max_clause_count=16384" \ + --name dres \ + --platform linux/amd64 \ + --publish 9200:9200 \ + --publish 9300:9300 \ + docker.elastic.co/elasticsearch/elasticsearch:6.5.4 fi echo "Started ElasticSearch." diff --git a/scripts/run_manage.sh b/scripts/run_manage.sh index 51cc2f550..899d05c29 100755 --- a/scripts/run_manage.sh +++ b/scripts/run_manage.sh @@ -6,13 +6,16 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# Import the functions in common.sh +# Import the functions in common.sh. . ./common.sh -# We need access to all of the projects +# We need access to all of the projects. cd .. print_description() { @@ -44,7 +47,7 @@ if [ "$1" = "-i" ]; then shift if [ -z "$1" ]; then echo "Error: Missing argument for -i" >&2 - echo + echo print_options >&2 exit 1 fi @@ -62,7 +65,7 @@ if [ "$1" = "-s" ]; then shift if [ -z "$1" ]; then echo "Error: Missing argument for -s" >&2 - echo + echo print_options >&2 exit 1 fi @@ -72,29 +75,44 @@ else service="foreman" fi -# Set up the data volume directory if it does not already exist +if [ -z "$SYSTEM_VERSION" ]; then + SYSTEM_VERSION="$(get_branch_hash)" +fi + +# Set up the data volume directory if it does not already exist. volume_directory="$script_directory/../api/volume" if [ ! -d "$volume_directory" ]; then mkdir "$volume_directory" fi chmod -R a+rwX "$volume_directory" -docker build -t dr_shell -f "$service/dockerfiles/Dockerfile.$image" . +docker build \ + --build-arg DOCKERHUB_REPO="$DOCKERHUB_REPO" \ + --build-arg SYSTEM_VERSION="$SYSTEM_VERSION" \ + --file "$service/dockerfiles/Dockerfile.$image" \ + --platform linux/amd64 \ + --tag dr_shell \ + . DB_HOST_IP=$(get_docker_db_ip_address) ES_HOST_IP=$(get_docker_es_ip_address) -# Only run interactively if we are on a TTY +# Only run interactively if we are on a TTY. if [ -t 1 ]; then - INTERACTIVE="-i" + INTERACTIVE="--interactive" fi -docker run -t $INTERACTIVE \ - --add-host=database:"$DB_HOST_IP" \ - --add-host=elasticsearch:"$ES_HOST_IP" \ - --env AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ - --env AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ - --env-file "$service/environments/local" \ - --volume /tmp:/tmp \ - --volume "$volume_directory":/home/user/data_store \ - dr_shell python3 manage.py "$@" +# shellcheck disable=SC2086 +docker run \ + --add-host=database:"$DB_HOST_IP" \ + --add-host=elasticsearch:"$ES_HOST_IP" \ + --env AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ + --env AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ + --env-file "$service/environments/local" \ + --platform linux/amd64 \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + --volume /tmp:/tmp \ + $INTERACTIVE \ + dr_shell \ + python3 manage.py "$@" diff --git a/scripts/run_postgres.sh b/scripts/run_postgres.sh index 2798dc04b..6ad077bce 100755 --- a/scripts/run_postgres.sh +++ b/scripts/run_postgres.sh @@ -2,30 +2,39 @@ # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # Get access to all of refinebio cd .. -# CircleCI Docker won't make this by default for some reason +# CircleCI Docker won't make this by default for some reason. # This doubly nested directory is a hacky workaround to prevent permissions issues. # Suggested here: # https://github.com/docker/for-linux/issues/380#issuecomment-436419102 VOLUMES="$script_directory/../volumes_postgres/volumes_postgres" if [ ! -d "$VOLUMES" ]; then - mkdir -p "$VOLUMES" + mkdir -p "$VOLUMES" fi # Check if a docker database named "drdb" exists, and if so just run it if [ "$(docker ps -a --filter name=drdb -q)" ]; then - docker start drdb > /dev/null - echo "Started database." -# Otherwise, install it from docker hub + docker start drdb >/dev/null + echo "Started database." +# Otherwise, install it from Docker Hub. else - # via https://hub.docker.com/_/postgres/ - # 9.6.6 is the current (as of Jan 23 2018) RDS most recent version. - # Password can be exposed to git/CI because this is only for dev/testing purposes, not real data. - echo "Installing database..." - docker run -p 5432:5432 --name drdb -v "$VOLUMES":/var/lib/postgresql/data -e POSTGRES_PASSWORD=mysecretpassword -d postgres:9.6.6 + # via https://hub.docker.com/_/postgres/ + # 9.6.6 is the current (as of Jan 23 2018) RDS most recent version. + # Password can be exposed to git/CI because this is only for dev/testing purposes, not real data. + echo "Installing database..." + docker run \ + --detach \ + --env POSTGRES_PASSWORD=mysecretpassword \ + --name drdb \ + --publish 5432:5432 \ + --volume "$VOLUMES":/var/lib/postgresql/data \ + postgres:9.6.6 fi diff --git a/scripts/run_psql_shell.sh b/scripts/run_psql_shell.sh index 9d99de96a..98939c0a0 100755 --- a/scripts/run_psql_shell.sh +++ b/scripts/run_psql_shell.sh @@ -1,2 +1,10 @@ #! /bin/sh -docker run -it -e PGPASSWORD=mysecretpassword --rm --link drdb:postgres postgres:9.6.6 psql -h postgres -U postgres -d data_refinery + +docker run \ + --env PGPASSWORD=mysecretpassword \ + --interactive \ + --link drdb:postgres \ + --rm \ + --tty \ + postgres:9.6.6 \ + psql -h postgres -U postgres -d data_refinery diff --git a/scripts/run_shell.sh b/scripts/run_shell.sh index cfb60d00c..7760d9f2e 100755 --- a/scripts/run_shell.sh +++ b/scripts/run_shell.sh @@ -12,7 +12,10 @@ set -e # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # Import functions in common.sh @@ -28,15 +31,21 @@ if [ ! -d "$volume_directory" ]; then fi chmod -R a+rwX "$volume_directory" -docker build -t dr_shell -f foreman/dockerfiles/Dockerfile.foreman . +docker build \ + --file foreman/dockerfiles/Dockerfile.foreman \ + --tag dr_shell \ + . DB_HOST_IP=$(get_docker_db_ip_address) -docker run -it \ - --add-host="database:$DB_HOST_IP" \ - --env AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ - --env AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ - --env-file foreman/environments/local \ - --volume /tmp:/tmp \ - --volume "$volume_directory":/home/user/data_store \ - --interactive dr_shell python3 manage.py shell +docker run \ + --add-host="database:$DB_HOST_IP" \ + --env AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ + --env AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ + --env-file foreman/environments/local \ + --interactive \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + --volume /tmp:/tmp \ + dr_shell \ + python3 manage.py shell diff --git a/scripts/update_docker_images.sh b/scripts/update_docker_images.sh new file mode 100755 index 000000000..f051e8317 --- /dev/null +++ b/scripts/update_docker_images.sh @@ -0,0 +1,136 @@ +#!/bin/sh + +# Exit on failure. +set -e + +# This script should always run as if it were being called from +# the directory it lives in. +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" +cd "$script_directory" || exit + +. ./common.sh + +# Get access to all of refinebio. +cd .. + +print_description() { + echo 'This script will re-build all refine.bio docker images and push ' + echo 'them to the specified Dockerhub repository.' +} + +print_options() { + cat <&2 + print_options >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + print_options >&2 + exit 1 + ;; + esac +done + +if [ -z "$DOCKERHUB_REPO" ]; then + echo 'Error: must specify the Dockerhub repository with -r' + exit 1 +fi + +if [ -z "$SYSTEM_VERSION" ]; then + SYSTEM_VERSION="$(get_branch_hash)" +fi + +# Intentionally omit affymetrix unless specifically requested since it is so +# intense to build. +image_names="base migrations common_tests foreman api_base api_production api_local \ + transcriptome smasher salmon no_op illumina downloaders compendia" +if [ "$BUILD_AFFYMETRIX" ]; then + image_names="$image_names affymetrix" +fi + +# Set the version for the common project. +echo "$SYSTEM_VERSION" >common/version + +# Create common/dist/data-refinery-common-*.tar.gz, which is +# required by the workers and data_refinery_foreman images. +# Remove old common distributions if they exist. +rm -f common/dist/* +(cd common && python3 setup.py sdist 1>/dev/null) # Run quietly in a subshell. + +# shellcheck disable=SC2086 +for image_name in $image_names; do + case $image_name in + api_base | api_local | api_production) + DOCKER_FILE_PATH="api/dockerfiles/Dockerfile.$image_name" + ;; + base | common_tests | migrations) + DOCKER_FILE_PATH="common/dockerfiles/Dockerfile.$image_name" + ;; + foreman) + DOCKER_FILE_PATH="foreman/dockerfiles/Dockerfile.$image_name" + ;; + *) + DOCKER_FILE_PATH="workers/dockerfiles/Dockerfile.$image_name" + ;; + esac + + echo + echo "Building the $image_name:$SYSTEM_VERSION image from $DOCKER_FILE_PATH." + echo + + DOCKERHUB_IMAGE="$DOCKERHUB_REPO/dr_$image_name" + CACHE_FROM_LATEST="cache-from=type=registry,ref=${DOCKERHUB_IMAGE}_cache:latest" + CACHE_FROM_VERSION="cache-from=type=registry,ref=${DOCKERHUB_IMAGE}_cache:$SYSTEM_VERSION" + CACHE_TO_LATEST="cache-to=type=registry,ref=${DOCKERHUB_IMAGE}_cache:latest,mode=max" + CACHE_TO_VERSION="cache-to=type=registry,ref=${DOCKERHUB_IMAGE}_cache:$SYSTEM_VERSION,mode=max" + + set_up_docker_builder + + docker buildx build \ + --build-arg DOCKERHUB_REPO="$DOCKERHUB_REPO" \ + --build-arg SYSTEM_VERSION="$SYSTEM_VERSION" \ + --"$CACHE_FROM_LATEST" \ + --"$CACHE_FROM_VERSION" \ + --"$CACHE_TO_LATEST" \ + --"$CACHE_TO_VERSION" \ + --file "$DOCKER_FILE_PATH" \ + --platform linux/amd64 \ + --push \ + --tag "$DOCKERHUB_IMAGE:latest" \ + --tag "$DOCKERHUB_IMAGE:$SYSTEM_VERSION" \ + . +done diff --git a/scripts/update_models.sh b/scripts/update_models.sh index d6648df8f..d04fd6810 100755 --- a/scripts/update_models.sh +++ b/scripts/update_models.sh @@ -1,31 +1,36 @@ #! /bin/sh -# Makes migrations and re-installs so Docker images update locally +# Makes migrations and re-installs so Docker images update locally. -# Exit on fail +# Exit on fail. set -e # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# Get access to all of refinebio +# Get access to all of refinebio. cd .. -if ! docker ps | tail -n +2 | awk '{ print $NF }' | grep drdb > /dev/null; then +if ! docker ps | tail -n +2 | awk '{ print $NF }' | grep drdb >/dev/null; then echo "You must start Postgres first with:" >&2 echo "./scripts/run_postgres.sh" >&2 exit 1 fi -# Default to "0.0.0.dev" for system version if we're not running in the cloud. +. ./scripts/common.sh + +# Default to the branch name hash for system version if we're not running in the cloud. if [ -z "$SYSTEM_VERSION" ]; then - SYSTEM_VERSION="0.0.0.dev$(date +%s)" + SYSTEM_VERSION="$(get_branch_hash)" export SYSTEM_VERSION fi # Put this in place for common to read from. -echo "$SYSTEM_VERSION" > common/version +echo "$SYSTEM_VERSION" >common/version # Ensure there is only one distribution to copy over. rm -f common/dist/* diff --git a/scripts/update_my_docker_images.sh b/scripts/update_my_docker_images.sh deleted file mode 100755 index e49d1d542..000000000 --- a/scripts/update_my_docker_images.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/sh - -# This script is very similar to .circleci/update_docker_images.sh but it has less -# production/cloud related checks. - -# Exit on failure -set -e - -# This script should always run as if it were being called from -# the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" -cd "$script_directory" || exit - -# Get access to all of refinebio -cd .. - -print_description() { - echo 'This script will re-build all refine.bio docker images and push them to' - echo 'the specified Dockerhub repo.' -} - -print_options() { - cat << EOF -There are two required arguments for this script: --d specifies the Dockerhub repo you would like to deploy to. --v specifies the version you would like to build. This version will passed into - the Docker image as the environment variable SYSTEM_VERSION. - It also will be used as the tag for the Docker images built. - -There is also one optional argument: --a also build the affymetrix image - (we normally don't because it is so intense to build) -EOF -} - -while getopts ":d:v:ah" opt; do - case $opt in - d) - export DOCKERHUB_REPO="$OPTARG" - ;; - v) - export SYSTEM_VERSION="$OPTARG" - ;; - a) - AFFYMETRIX=true - ;; - h) - print_description - echo - print_options - exit 0 - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - print_options >&2 - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - print_options >&2 - exit 1 - ;; - esac -done - -if [ -z "$DOCKERHUB_REPO" ]; then - echo 'Error: must specify the Dockerhub repo with -d' - exit 1 -fi - -if [ -z "$SYSTEM_VERSION" ]; then - echo 'Error: must specify the version repo with -v' - exit 1 -fi - -# Intentionally omit affymetrix unless specifically requested since it is so intense to build. -CCDL_WORKER_IMGS="salmon transcriptome no_op downloaders illumina smasher compendia" -if [ "$AFFYMETRIX" ]; then - CCDL_WORKER_IMGS="$CCDL_WORKER_IMGS affymetrix" -fi - -# Set the version for the common project. -echo "$SYSTEM_VERSION" > common/version - -# Create common/dist/data-refinery-common-*.tar.gz, which is -# required by the workers and data_refinery_foreman images. -## Remove old common distributions if they exist -rm -f common/dist/* -(cd common && python3 setup.py sdist) - -for IMG in $CCDL_WORKER_IMGS; do - image_name="$DOCKERHUB_REPO/dr_$IMG" - - echo "Building docker image: $image_name:$SYSTEM_VERSION" - # Build and push image. - docker build \ - -t "$image_name:$SYSTEM_VERSION" \ - -f "workers/dockerfiles/Dockerfile.$IMG" \ - --build-arg SYSTEM_VERSION="$SYSTEM_VERSION" . - docker push "$image_name:$SYSTEM_VERSION" - # Update latest version - docker tag "$image_name:$SYSTEM_VERSION" "$image_name:latest" - docker push "$image_name:latest" -done - -# Build and push Foreman image. -FOREMAN_DOCKER_IMAGE="$DOCKERHUB_REPO/dr_foreman" -docker build \ - -t "$FOREMAN_DOCKER_IMAGE:$SYSTEM_VERSION" \ - -f foreman/dockerfiles/Dockerfile.foreman \ - --build-arg SYSTEM_VERSION="$SYSTEM_VERSION" . -docker push "$FOREMAN_DOCKER_IMAGE:$SYSTEM_VERSION" -# Update latest version -docker tag "$FOREMAN_DOCKER_IMAGE:$SYSTEM_VERSION" "$FOREMAN_DOCKER_IMAGE:latest" -docker push "$FOREMAN_DOCKER_IMAGE:latest" - -# Build and push API image. -API_DOCKER_IMAGE="$DOCKERHUB_REPO/dr_api" -docker build \ - -t "$API_DOCKER_IMAGE:$SYSTEM_VERSION" \ - -f api/dockerfiles/Dockerfile.api_production \ - --build-arg SYSTEM_VERSION="$SYSTEM_VERSION" . -docker push "$API_DOCKER_IMAGE:$SYSTEM_VERSION" -# Update latest version -docker tag "$API_DOCKER_IMAGE:$SYSTEM_VERSION" "$API_DOCKER_IMAGE:latest" -docker push "$API_DOCKER_IMAGE:latest" diff --git a/workers/R/renv_load.R b/workers/R/renv_load.R deleted file mode 100644 index dd5caa080..000000000 --- a/workers/R/renv_load.R +++ /dev/null @@ -1,16 +0,0 @@ -options(warn = 2) -options(repos = structure(c( - Bioconductor = "https://bioconductor.org/packages/3.6/bioc", - BioconductorAnnotation = "https://bioconductor.org/packages/3.6/data/annotation", - BioconductorExperiment = "https://bioconductor.org/packages/3.6/data/experiment", - CRAN = "https://cloud.r-project.org" -))) -options(Ncpus = parallel::detectCores()) -options(renv.r.version = "3.4.4") -options(renv.settings.use.cache = FALSE) - -install.packages("BiocInstaller") -install.packages("https://cran.r-project.org/src/contrib/Archive/renv/renv_0.16.0.tar.gz") - -renv::consent(provided = TRUE) -renv::restore(prompt = FALSE, rebuild = TRUE) diff --git a/workers/dockerfiles/Dockerfile.affymetrix b/workers/dockerfiles/Dockerfile.affymetrix index 6ac42f4ef..8dad322b3 100644 --- a/workers/dockerfiles/Dockerfile.affymetrix +++ b/workers/dockerfiles/Dockerfile.affymetrix @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -6,7 +8,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user COPY workers/R/dependencies/affymetrix/renv.lock . -COPY workers/R/renv_load.R . +COPY common/R/renv_load.R . RUN Rscript renv_load.R COPY workers/R/dependencies/affymetrix/install_ensg_pkgs.R . @@ -26,7 +28,6 @@ COPY workers/ . RUN rm -rf /root/.cache/* -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/workers/dockerfiles/Dockerfile.affymetrix_local b/workers/dockerfiles/Dockerfile.affymetrix_local deleted file mode 100644 index 7fe881ab0..000000000 --- a/workers/dockerfiles/Dockerfile.affymetrix_local +++ /dev/null @@ -1,29 +0,0 @@ -FROM ccdlstaging/dr_affymetrix:latest - -# Fail in case of an error at any stage in the pipe. -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -USER root - -WORKDIR /home/user - -# Remove the version of common already installed. -RUN rm -rf common && \ - pip3 uninstall -y data_refinery_common - -# Get the latest version from the dist directory. -COPY common/dist/data-refinery-common-* common/ -RUN pip3 install --ignore-installed --no-cache-dir \ - common/$(ls common -1 | sort --version-sort | tail -1) - -COPY config/ config/ -COPY workers/ . - -RUN rm -rf /root/.cache/* - -ARG SYSTEM_VERSION -ENV SYSTEM_VERSION=$SYSTEM_VERSION - -USER user - -ENTRYPOINT [] diff --git a/workers/dockerfiles/Dockerfile.compendia b/workers/dockerfiles/Dockerfile.compendia index 7b8240b78..371abf2c7 100644 --- a/workers/dockerfiles/Dockerfile.compendia +++ b/workers/dockerfiles/Dockerfile.compendia @@ -68,7 +68,7 @@ WORKDIR /home/user ENV R_LIBS=/usr/local/lib/R/site-library COPY workers/R/dependencies/compendia/renv.lock . -COPY workers/R/renv_load.R . +COPY common/R/renv_load.R . RUN Rscript renv_load.R COPY workers/data_refinery_workers/requirements/compendia.txt requirements.txt @@ -82,7 +82,8 @@ RUN wget -q "https://bitbucket.org/ariya/phantomjs/downloads/\ phantomjs-2.1.1-linux-x86_64.tar.bz2" && \ tar xvjf phantomjs-2.1.1-linux-x86_64.tar.bz2 -C /usr/local/share/ && \ ln -s /usr/local/share/phantomjs-2.1.1-linux-x86_64/bin/phantomjs \ - /usr/local/bin/ + /usr/local/bin/ && \ + rm phantomjs-2.1.1-linux-x86_64.tar.bz2 # Get the latest version from the dist directory. COPY common/dist/data-refinery-common-* common/ diff --git a/workers/dockerfiles/Dockerfile.downloaders b/workers/dockerfiles/Dockerfile.downloaders index 1b19485e2..5361db49f 100644 --- a/workers/dockerfiles/Dockerfile.downloaders +++ b/workers/dockerfiles/Dockerfile.downloaders @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -6,7 +8,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user COPY workers/R/dependencies/downloaders/renv.lock . -COPY workers/R/renv_load.R . +COPY common/R/renv_load.R . RUN Rscript renv_load.R COPY workers/data_refinery_workers/downloaders/requirements.txt . @@ -34,7 +36,6 @@ COPY workers/ . RUN rm -rf /root/.cache/* -ARG SYSTEM_VERSION ENV PATH="$PATH:/home/user/.aspera/ascli/sdk" ENV SYSTEM_VERSION=$SYSTEM_VERSION diff --git a/workers/dockerfiles/Dockerfile.illumina b/workers/dockerfiles/Dockerfile.illumina index b89630096..1d771b322 100644 --- a/workers/dockerfiles/Dockerfile.illumina +++ b/workers/dockerfiles/Dockerfile.illumina @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -6,7 +8,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user COPY workers/R/dependencies/illumina/renv.lock . -COPY workers/R/renv_load.R . +COPY common/R/renv_load.R . RUN Rscript renv_load.R COPY workers/data_refinery_workers/processors/requirements.txt . @@ -25,7 +27,6 @@ COPY workers/illumina_probe_maps/ probe_maps/ RUN rm -rf /root/.cache/* -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/workers/dockerfiles/Dockerfile.no_op b/workers/dockerfiles/Dockerfile.no_op index 11729dba4..7e33a14e3 100644 --- a/workers/dockerfiles/Dockerfile.no_op +++ b/workers/dockerfiles/Dockerfile.no_op @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -6,7 +8,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user COPY workers/R/dependencies/no_op/renv.lock . -COPY workers/R/renv_load.R . +COPY common/R/renv_load.R . RUN Rscript renv_load.R COPY workers/data_refinery_workers/processors/requirements.txt . @@ -35,7 +37,6 @@ COPY workers/ . RUN rm -rf /root/.cache/* -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/workers/dockerfiles/Dockerfile.salmon b/workers/dockerfiles/Dockerfile.salmon index d9cd8818e..08a3d3299 100644 --- a/workers/dockerfiles/Dockerfile.salmon +++ b/workers/dockerfiles/Dockerfile.salmon @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -6,7 +8,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user COPY workers/R/dependencies/salmon/renv.lock . -COPY workers/R/renv_load.R . +COPY common/R/renv_load.R . RUN Rscript renv_load.R COPY workers/data_refinery_workers/processors/requirements.txt . @@ -29,7 +31,7 @@ v${SALMON_VERSION}/Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz" && \ -C "Salmon-${SALMON_VERSION}_linux_x86_64" --strip-components 1 && \ ln -sf "$(pwd)/Salmon-${SALMON_VERSION}_linux_x86_64/bin/salmon" \ /usr/local/bin/ && \ - rm -f "Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz" + rm "Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz" # End Salmon installation. ENV SRA_VERSION=2.9.1 @@ -45,7 +47,8 @@ RUN git clone https://github.com/COMBINE-lab/SalmonTools.git && \ wget -q "https://ftp.ncbi.nlm.nih.gov/sra/sdk/${SRA_VERSION}/\ sratoolkit.${SRA_VERSION}-ubuntu64.tar.gz" && \ tar zxfv "sratoolkit.${SRA_VERSION}-ubuntu64.tar.gz" && \ - cp -r "sratoolkit.${SRA_VERSION}-ubuntu64/bin/"* /usr/bin + cp -r "sratoolkit.${SRA_VERSION}-ubuntu64/bin/"* /usr/bin && \ + rm "sratoolkit.${SRA_VERSION}-ubuntu64.tar.gz" # Get the latest version from the dist directory. COPY common/dist/data-refinery-common-* common/ @@ -58,7 +61,6 @@ COPY workers/ . RUN rm -rf /root/.cache/* -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/workers/dockerfiles/Dockerfile.smasher b/workers/dockerfiles/Dockerfile.smasher index 29bba5cd8..f95da5016 100644 --- a/workers/dockerfiles/Dockerfile.smasher +++ b/workers/dockerfiles/Dockerfile.smasher @@ -1,4 +1,6 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -6,7 +8,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user COPY workers/R/dependencies/smasher/renv.lock . -COPY workers/R/renv_load.R . +COPY common/R/renv_load.R . RUN Rscript renv_load.R COPY workers/data_refinery_workers/processors/requirements.txt . @@ -23,7 +25,6 @@ COPY workers/ . RUN rm -rf /root/.cache/* -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/workers/dockerfiles/Dockerfile.transcriptome b/workers/dockerfiles/Dockerfile.transcriptome index 3aec1395b..e3c5328a4 100644 --- a/workers/dockerfiles/Dockerfile.transcriptome +++ b/workers/dockerfiles/Dockerfile.transcriptome @@ -1,17 +1,12 @@ -FROM ccdlstaging/dr_base:latest +ARG DOCKERHUB_REPO +ARG SYSTEM_VERSION +FROM $DOCKERHUB_REPO/dr_base:$SYSTEM_VERSION # Fail in case of an error at any stage in the pipe. SHELL ["/bin/bash", "-o", "pipefail", "-c"] WORKDIR /home/user -COPY workers/data_refinery_workers/processors/requirements.txt . -RUN pip3 install --ignore-installed --no-cache-dir -r requirements.txt && \ - # It's annoying that this can only be installed via git. - git clone https://github.com/deweylab/RSEM.git && \ - cd RSEM && make install && \ - rm -rf RSEM - # Install Salmon. # Tximport requires all experiments to be processed with the same version of # Salmon to work https://github.com/AlexsLemonade/refinebio/issues/1496. @@ -19,6 +14,7 @@ RUN pip3 install --ignore-installed --no-cache-dir -r requirements.txt && \ # all samples from incomplete experiments must have salmon run on them again. ENV SALMON_VERSION=0.13.1 +COPY workers/data_refinery_workers/processors/requirements.txt . # Salmon can extract to a different directory than the name of the tar file. RUN wget -q "https://github.com/COMBINE-lab/salmon/releases/download/\ v${SALMON_VERSION}/Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz" && \ @@ -30,6 +26,13 @@ v${SALMON_VERSION}/Salmon-${SALMON_VERSION}_linux_x86_64.tar.gz" && \ rm -r Salmon* # End Salmon installation. +COPY workers/data_refinery_workers/processors/requirements.txt . +RUN pip3 install --ignore-installed --no-cache-dir -r requirements.txt && \ + # It's annoying that this can only be installed via git. + git clone https://github.com/deweylab/RSEM.git && \ + cd RSEM && make install && \ + rm -rf RSEM + COPY common/dist/data-refinery-common-* common/ RUN pip3 install --ignore-installed --no-cache-dir \ common/$(ls common -1 | sort --version-sort | tail -1) @@ -40,7 +43,6 @@ COPY workers/ . RUN rm -rf /root/.cache/* -ARG SYSTEM_VERSION ENV SYSTEM_VERSION=$SYSTEM_VERSION USER user diff --git a/workers/run_command.sh b/workers/run_command.sh index da50660b1..51dc3b6c9 100755 --- a/workers/run_command.sh +++ b/workers/run_command.sh @@ -7,18 +7,18 @@ set -e while getopts "i:" opt; do case $opt in - i) - image=$OPTARG - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - exit 1 - ;; + i) + IMAGE="$OPTARG" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; esac done -if [ -z "$image" ]; then - image="smasher" +if [ -z "$IMAGE" ]; then + IMAGE="smasher" else shift shift @@ -26,7 +26,10 @@ fi # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # However in order to give Docker access to all the code we have to @@ -47,16 +50,19 @@ fi chmod -R a+rwX "$volume_directory" . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) -./scripts/prepare_image.sh -i "$image" -s workers -image_name=ccdlstaging/dr_"$image" +./scripts/prepare_image.sh -i "$IMAGE" -s workers docker run \ - --add-host=database:"$DB_HOST_IP" \ - --env-file workers/environments/local \ - --env AWS_ACCESS_KEY_ID \ - --env AWS_SECRET_ACCESS_KEY \ - --volume "$volume_directory":/home/user/data_store \ - --link drdb:postgres \ - -it "$image_name" bash -c "$@" + --add-host=database:"$DB_HOST_IP" \ + --env AWS_ACCESS_KEY_ID \ + --env AWS_SECRET_ACCESS_KEY \ + --env-file workers/environments/local \ + --ineractive \ + --link drdb:postgres \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + "$DOCKERHUB_REPO/dr_$IMAGE" \ + bash -c "$@" diff --git a/workers/run_janitor.sh b/workers/run_janitor.sh index 27c6ed6f5..70d9dcbde 100755 --- a/workers/run_janitor.sh +++ b/workers/run_janitor.sh @@ -1,19 +1,20 @@ #!/bin/sh -# the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +# The directory it lives in. +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# However in order to give Docker access to all the code we have to -# move up a level +# However, in order to give Docker access to all the code we have to +# move up a level. cd .. ./scripts/prepare_image.sh -i smasher -image_name="ccdlstaging/dr_smasher" - -volume_directory="$script_directory/volume" . ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) AWS_ACCESS_KEY_ID="$(~/bin/aws configure get default.aws_access_key_id)" @@ -22,13 +23,15 @@ AWS_SECRET_ACCESS_KEY="$(~/bin/aws configure get default.aws_secret_access_key)" export AWS_SECRET_ACCESS_KEY docker run \ - -it \ - -m 500m \ - --add-host=database:"$DB_HOST_IP" \ - --env-file workers/environments/local \ - --env AWS_ACCESS_KEY_ID \ - --env AWS_SECRET_ACCESS_KEY \ - --entrypoint ./manage.py \ - --volume "$volume_directory":/home/user/data_store \ - --link drdb:postgres \ - "$image_name" run_janitor + --add-host=database:"$DB_HOST_IP" \ + --entrypoint ./manage.py \ + --env AWS_ACCESS_KEY_ID \ + --env AWS_SECRET_ACCESS_KEY \ + --env-file workers/environments/local \ + --interactive \ + --link drdb:postgres \ + --memory 500m \ + --tty \ + --volume "$script_directory/volume":/home/user/data_store \ + "$DOCKERHUB_REPO/dr_smasher" \ + run_janitor diff --git a/workers/run_job.sh b/workers/run_job.sh index 7342103bc..47d95f192 100755 --- a/workers/run_job.sh +++ b/workers/run_job.sh @@ -1,67 +1,70 @@ #!/bin/bash -# Script for running a django management command to test the worker. +# Script for running a Django management command to test the worker. while getopts "hi:" opt; do case $opt in - i) - image=$OPTARG - ;; - h) - echo "Runs a downloader or processor job. The following arguments are supported:" - echo "-h : Print this help message and exit." - echo "-i : The image to use. Options are:" - echo " downloaders (default)" - echo " salmon" - echo " transcriptome" - echo " no_op" - echo " downloaders" - echo " illumina" - echo " affymetrix" - echo " : What kind of job to run." - echo " Must be either 'run_downloader_job' or 'run_processor_job'." - echo "--job-name= : The type of job to run." - echo " For processor jobs, options are:" - echo " AFFY_TO_PCL" - echo " AGILENT_TWOCOLOR_TO_PCL" - echo " SALMON" - echo " ILLUMINA_TO_PCL" - echo " TRANSCRIPTOME_INDEX_LONG" - echo " TRANSCRIPTOME_INDEX_SHORT" - echo " NO_OP" - echo " For downloader jobs, options are:" - echo " ARRAY_EXPRESS" - echo " SRA" - echo " TRANSCRIPTOME_INDEX" - echo " GEO" - echo "--job-id= : The id of the job you want to run. Must already exist in the database." - echo "" - echo "Note that the must correspond to the ." - echo " AGILENT_TWOCOLOR_TO_PCL is a special case because it requires the 'affymetrix' image." - echo "" - echo "Examples:" - echo " ./workers/run_job.sh run_downloader_job --job-name=SRA --job-id=12345" - echo " ./workers/run_job.sh -i affymetrix run_processor_job --job-name=AGILENT_TWOCOLOR_TO_PCL --job-id=54321" - exit 0 - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - exit 1 - ;; + i) + IMAGE="$OPTARG" + ;; + h) + echo "Runs a downloader or processor job. The following arguments are supported:" + echo "-h : Print this help message and exit." + echo "-i : The image to use. Options are:" + echo " downloaders (default)" + echo " salmon" + echo " transcriptome" + echo " no_op" + echo " downloaders" + echo " illumina" + echo " affymetrix" + echo " : What kind of job to run." + echo " Must be either 'run_downloader_job' or 'run_processor_job'." + echo "--job-name= : The type of job to run." + echo " For processor jobs, options are:" + echo " AFFY_TO_PCL" + echo " AGILENT_TWOCOLOR_TO_PCL" + echo " SALMON" + echo " ILLUMINA_TO_PCL" + echo " TRANSCRIPTOME_INDEX_LONG" + echo " TRANSCRIPTOME_INDEX_SHORT" + echo " NO_OP" + echo " For downloader jobs, options are:" + echo " ARRAY_EXPRESS" + echo " SRA" + echo " TRANSCRIPTOME_INDEX" + echo " GEO" + echo "--job-id= : The id of the job you want to run. Must already exist in the database." + echo "" + echo "Note that the must correspond to the ." + echo " AGILENT_TWOCOLOR_TO_PCL is a special case because it requires the 'affymetrix' image." + echo "" + echo "Examples:" + echo " ./workers/run_job.sh run_downloader_job --job-name=SRA --job-id=12345" + echo " ./workers/run_job.sh -i affymetrix run_processor_job --job-name=AGILENT_TWOCOLOR_TO_PCL --job-id=54321" + exit 0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + exit 1 + ;; esac done -if [[ -z "$image" ]]; then - image="downloaders" +if [[ -z "$IMAGE" ]]; then + IMAGE="downloaders" fi # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit # However in order to give Docker access to all the code we have to @@ -69,12 +72,12 @@ cd "$script_directory" || exit cd .. # Agilent uses the same image as affymetrix -if [[ "$image" == "affymetrix" || "$image" == "agilent" ]]; then - ./scripts/prepare_image.sh -p -i affymetrix - image_name="ccdlstaging/dr_affymetrix" +if [[ "$IMAGE" == "affymetrix" || "$IMAGE" == "agilent" ]]; then + ./scripts/prepare_image.sh -i affymetrix + IMAGE_NAME="$DOCKERHUB_REPO/dr_affymetrix" else - ./scripts/prepare_image.sh -i "$image" - image_name="ccdlstaging/dr_$image" + ./scripts/prepare_image.sh -i "$IMAGE" + IMAGE_NAME="$DOCKERHUB_REPO/dr_$IMAGE" fi volume_directory="$script_directory/volume" @@ -83,16 +86,20 @@ if [ ! -d "$volume_directory" ]; then fi chmod -R a+rwX "$volume_directory" -source scripts/common.sh +. scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) docker run \ - -it \ - --add-host=database:"$DB_HOST_IP" \ - --env-file workers/environments/local \ - --env AWS_ACCESS_KEY_ID \ - --env AWS_SECRET_ACCESS_KEY \ - --entrypoint ./manage.py \ - --volume "$volume_directory":/home/user/data_store \ - --link drdb:postgres \ - "$image_name" "${@: -3}" "${@: -2}" "${@: -1}" + --add-host=database:"$DB_HOST_IP" \ + --entrypoint ./manage.py \ + --env AWS_ACCESS_KEY_ID \ + --env AWS_SECRET_ACCESS_KEY \ + --env-file workers/environments/local \ + --interactive \ + --link drdb:postgres \ + --platform linux/amd64 \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + "$IMAGE_NAME" \ + "${@: -3}" "${@: -2}" "${@: -1}" diff --git a/workers/run_tests.sh b/workers/run_tests.sh index 3e03e2ada..a66d0a2f7 100755 --- a/workers/run_tests.sh +++ b/workers/run_tests.sh @@ -3,7 +3,7 @@ # Script for executing Django PyUnit tests within a Docker container. -# Exit on failure +# Exit on failure. set -e print_description() { @@ -20,47 +20,49 @@ print_options() { while getopts ":t:h" opt; do case $opt in - t) - tag=$OPTARG - ;; - h) - print_description - echo - print_options - exit 0 - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - print_options >&2 - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - print_options >&2 - exit 1 - ;; + t) + tag="$OPTARG" + ;; + h) + print_description + echo + print_options + exit 0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + print_options >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + print_options >&2 + exit 1 + ;; esac done # This script should always run as if it were being called from # the directory it lives in. -script_directory="$(cd "$(dirname "$0")" || exit; pwd)" +script_directory="$( + cd "$(dirname "$0")" || exit + pwd +)" cd "$script_directory" || exit -# However in order to give Docker access to all the code we have to +# However, in order to give Docker access to all the code we have to # move up a level cd .. -# Ensure that postgres is running +# Ensure that Postgres is running. if ! [ "$(docker ps --filter name=drdb -q)" ]; then echo "You must start Postgres first with:" >&2 echo "./scripts/run_postgres.sh" >&2 exit 1 fi -volume_directory="$script_directory/test_volume" - test_data_repo="https://s3.amazonaws.com/data-refinery-test-assets" +volume_directory="$script_directory/test_volume" if [ -z "$tag" ] || [ "$tag" = "salmon" ]; then # Download "salmon quant" test data The `newer` file was to @@ -77,11 +79,11 @@ if [ -z "$tag" ] || [ "$tag" = "salmon" ]; then rm "$volume_directory"/salmon_tests.tar.gz fi - # salmontools test data + # SalmonTools test data. salmontools_test_zip="$test_data_repo/salmontools_test_data.tar.gz" salmontools_test_dir="$volume_directory/salmontools" - # Clean the test data directory + # Clean the test data directory. rm -rf "$salmontools_test_dir" mkdir -p "$salmontools_test_dir" @@ -102,21 +104,21 @@ if [ -z "$tag" ] || [ "$tag" = "salmon" ]; then mkdir -p "$rna_seq_test_raw_dir" echo "Downloading $read_1_name for Salmon tests." wget -q -O "$rna_seq_test_data_1" \ - "$test_data_repo/$read_1_name" + "$test_data_repo/$read_1_name" echo "Downloading $read_2_name for Salmon tests." wget -q -O "$rna_seq_test_data_2" \ - "$test_data_repo/$read_2_name" + "$test_data_repo/$read_2_name" fi if [ ! -e "$dotsra" ]; then mkdir -p "$rna_seq_test_raw_dir" echo "Downloading $dotsra_name for Salmon tests." wget -q -O "$dotsra" \ - "$test_data_repo/$dotsra_name" + "$test_data_repo/$dotsra_name" fi fi if [ -z "$tag" ] || [ "$tag" = "affymetrix" ]; then - # Make sure CEL for test is downloaded from S3 + # Make sure CEL for test is downloaded from S3. cel_name="GSM1426071_CD_colon_active_1.CEL" cel_name2="GSM45588.CEL" cel_name3="GSM1364667_U_110208_7-02-10_S2.CEL" @@ -135,35 +137,35 @@ if [ -z "$tag" ] || [ "$tag" = "affymetrix" ]; then mkdir -p "$cel_test_raw_dir" echo "Downloading CEL for tests." wget -q -O "$cel_test_data_1" \ - "$test_data_repo/$cel_name" + "$test_data_repo/$cel_name" fi if [ ! -e "$cel_test_data_2" ]; then echo "Downloading Non-Brainarray CEL for tests." wget -q -O "$cel_test_data_2" \ - "$test_data_repo/$cel_name2" + "$test_data_repo/$cel_name2" fi if [ ! -e "$cel_test_data_3" ]; then echo "Downloading Huex Brain Array CEL for tests." wget -q -O "$cel_test_data_3" \ - "$test_data_repo/$cel_name3" + "$test_data_repo/$cel_name3" fi if [ ! -e "$pcl_test_data_1" ]; then mkdir -p "$pcl_test_dir" echo "Downloading pre-computed PCL for tests." wget -q -O "$pcl_test_data_1" \ - "$test_data_repo/$pcl_name" + "$test_data_repo/$pcl_name" fi if [ ! -e "$pcl_test_data_2" ]; then mkdir -p "$pcl_test_dir" echo "Downloading pre-computed Non-Brainarray PCL for tests." wget -q -O "$pcl_test_data_2" \ - "$test_data_repo/$pcl_name2" + "$test_data_repo/$pcl_name2" fi if [ ! -e "$pcl_test_data_3" ]; then mkdir -p "$pcl_test_dir" echo "Downloading pre-computed Huex Brain Array PCL for tests." wget -q -O "$pcl_test_data_3" \ - "$test_data_repo/$pcl_name3" + "$test_data_repo/$pcl_name3" fi fi @@ -176,14 +178,14 @@ if [ -z "$tag" ] || [ "$tag" = "transcriptome" ]; then mkdir -p "$tx_index_test_raw_dir" echo "Downloading fasta file for Transcriptome Index tests." wget -q -O "$tx_index_test_raw_dir/$fasta_file" \ - "$test_data_repo/$fasta_file" + "$test_data_repo/$fasta_file" fi gtf_file="aegilops_tauschii_short.gtf.gz" if [ ! -e "$tx_index_test_raw_dir/$gtf_file" ]; then mkdir -p "$tx_index_test_raw_dir" echo "Downloading gtf file for Transcriptome Index tests." wget -q -O "$tx_index_test_raw_dir/$gtf_file" \ - "$test_data_repo/$gtf_file" + "$test_data_repo/$gtf_file" fi tx_index_test_raw_dir2="$volume_directory/raw/TEST/TRANSCRIPTOME_INDEX/" gtf_file2="Homo_sapiens_testdata.gtf" @@ -191,7 +193,7 @@ if [ -z "$tag" ] || [ "$tag" = "transcriptome" ]; then mkdir -p "$tx_index_test_raw_dir2" echo "Downloading second gtf file for Transcriptome Index tests." wget -q -O "$tx_index_test_raw_dir2/$gtf_file2" \ - "$test_data_repo/$gtf_file2" + "$test_data_repo/$gtf_file2" fi fi @@ -210,18 +212,17 @@ GSE41355_non-normalized.txt GSE100301_non-normalized.txt' "$test_data_repo/$ilu_file" fi - i=$(( i + 1 )) + i=$((i + 1)) done unset i - ilu_test_ref_dir="$volume_directory/raw/TEST/ILLUMINA/reference" ilu_ref_file="Ad-Cre-2.AVG_Signal.tsv" if [ ! -e "$ilu_test_ref_dir/$ilu_ref_file" ]; then mkdir -p "$ilu_test_ref_dir" echo "Downloading Illumin reference file for Illumina tests." wget -q -O "$ilu_test_ref_dir/$ilu_ref_file" \ - "$test_data_repo/$ilu_ref_file" + "$test_data_repo/$ilu_ref_file" fi fi @@ -233,7 +234,7 @@ if [ -z "$tag" ] || [ "$tag" = "agilent" ]; then mkdir -p "$at_test_raw_dir" echo "Downloading Agilent file for A2C tests." wget -q -O "$at_test_raw_dir/$at_file" \ - "$test_data_repo/$at_file" + "$test_data_repo/$at_file" fi fi if [ -z "$tag" ] || [ "$tag" = "no_op" ]; then @@ -243,37 +244,37 @@ if [ -z "$tag" ] || [ "$tag" = "no_op" ]; then mkdir -p "$no_test_raw_dir" echo "Downloading NOOP file1." wget -q -O "$no_test_raw_dir/$no_file1" \ - "$test_data_repo/$no_file1" + "$test_data_repo/$no_file1" fi no_file2="GSM1234847_sample_table.txt" if [ ! -e "$no_test_raw_dir/$no_file2" ]; then mkdir -p "$no_test_raw_dir" echo "Downloading NOOP file2." wget -q -O "$no_test_raw_dir/$no_file2" \ - "$test_data_repo/$no_file2" + "$test_data_repo/$no_file2" fi no_file3="GSM1234847_sample_table_headerless.txt" if [ ! -e "$no_test_raw_dir/$no_file3" ]; then mkdir -p "$no_test_raw_dir" echo "Processing NOOP file3." - tail -n +2 "$no_test_raw_dir/$no_file2" > "$no_test_raw_dir/$no_file3" + tail -n +2 "$no_test_raw_dir/$no_file2" >"$no_test_raw_dir/$no_file3" fi no_file4="GSM1089291-tbl-1.txt" if [ ! -e "$no_test_raw_dir/$no_file4" ]; then mkdir -p "$no_test_raw_dir" echo "Downloading NOOP file4." wget -q -O "$no_test_raw_dir/$no_file4" \ - "$test_data_repo/$no_file4" + "$test_data_repo/$no_file4" fi no_file5="GSM1089291-tbl-1-modified.txt" if [ ! -e "$no_test_raw_dir/$no_file5" ]; then mkdir -p "$no_test_raw_dir" echo "Downloading NOOP file5." wget -q -O "$no_test_raw_dir/$no_file5" \ - "$test_data_repo/$no_file5" + "$test_data_repo/$no_file5" fi - # Reference files + # Reference files. no_test_exp_dir="$volume_directory/TEST/NO_OP/EXPECTED" no_test_exp_files='gene_converted_GSM557500-tbl-1.txt GSM269747.PCL gene_converted_GSM1234847-tbl-1.txt gene_converted_GSM1089291-tbl-1.txt' mkdir -p "$no_test_exp_dir" @@ -283,16 +284,16 @@ if [ -z "$tag" ] || [ "$tag" = "no_op" ]; then if ! [ -e "$no_test_exp_dir/$no_test_exp_file" ]; then echo "Downloading NOOP expected file$i." wget -O "$no_test_exp_dir/$no_test_exp_file" \ - "$test_data_repo/$no_test_exp_file" + "$test_data_repo/$no_test_exp_file" fi - i=$(( i + 1 )) + i=$((i + 1)) done unset i fi if [ -z "$tag" ] || [ "$tag" = "smasher" ] || [ "$tag" = "compendia" ]; then - # Make sure PCL for test is downloaded from S3 + # Make sure PCL for test is downloaded from S3. pcl_name="GSM1237810_T09-1084.PCL" pcl_name2="GSM1237812_S97-PURE.PCL" pcl_name3="GSM1238108-tbl-1.txt" @@ -334,101 +335,101 @@ if [ -z "$tag" ] || [ "$tag" = "smasher" ] || [ "$tag" = "compendia" ]; then mkdir -p "$pcl_test_raw_dir" echo "Downloading PCL for tests." wget -q -O "$pcl_test_data_1" \ - "$test_data_repo/$pcl_name" + "$test_data_repo/$pcl_name" fi if [ ! -e "$pcl_test_data_2" ]; then echo "Downloading PCL2 for tests." wget -q -O "$pcl_test_data_2" \ - "$test_data_repo/$pcl_name2" + "$test_data_repo/$pcl_name2" fi if [ ! -e "$pcl_test_data_3" ]; then echo "Downloading PCL3 for tests." wget -q -O "$pcl_test_data_3" \ - "$test_data_repo/$pcl_name3" + "$test_data_repo/$pcl_name3" fi if [ ! -e "$pcl_test_data_4" ]; then echo "Downloading PCL4 for tests." wget -q -O "$pcl_test_data_4" \ - "$test_data_repo/$pcl_name4" + "$test_data_repo/$pcl_name4" fi if [ ! -e "$pcl_test_data_5" ]; then echo "Downloading PCL5 for tests." wget -q -O "$pcl_test_data_5" \ - "$test_data_repo/$pcl_name5" + "$test_data_repo/$pcl_name5" fi if [ ! -e "$pcl_test_data_6" ]; then echo "Downloading PCL6 for tests." wget -q -O "$pcl_test_data_6" \ - "$test_data_repo/$pcl_name6" + "$test_data_repo/$pcl_name6" fi if [ ! -e "$pcl_test_data_7" ]; then echo "Downloading PCL7 for tests." wget -q -O "$pcl_test_data_7" \ - "$test_data_repo/$pcl_name7" + "$test_data_repo/$pcl_name7" fi if [ ! -e "$pcl_test_data_gs1" ]; then echo "Downloading PCLGS1 for tests." wget -q -O "$pcl_test_data_gs1" \ - "$test_data_repo/$pcl_name_gs1" + "$test_data_repo/$pcl_name_gs1" fi if [ ! -e "$pcl_test_data_gs2" ]; then echo "Downloading PCLGS2 for tests." wget -q -O "$pcl_test_data_gs2" \ - "$test_data_repo/$pcl_name_gs2" + "$test_data_repo/$pcl_name_gs2" fi if [ ! -e "$pcl_test_data_ts1" ]; then echo "Downloading PCLTS1 for tests." wget -q -O "$pcl_test_data_ts1" \ - "$test_data_repo/$pcl_name_ts1" + "$test_data_repo/$pcl_name_ts1" fi if [ ! -e "$pcl_test_data_ts2" ]; then echo "Downloading PCLTS2 for tests." wget -q -O "$pcl_test_data_ts2" \ - "$test_data_repo/$pcl_name_ts2" + "$test_data_repo/$pcl_name_ts2" fi if [ ! -e "$pcl_test_data_ta1" ]; then echo "Downloading PCLTA1 for tests." wget -q -O "$pcl_test_data_ta1" \ - "$test_data_repo/$pcl_name_ta1" + "$test_data_repo/$pcl_name_ta1" fi if [ ! -e "$bad_test_data_1" ]; then mkdir -p "$bad_test_raw_dir" echo "Downloading Bad PCL for tests." wget -q -O "$bad_test_data_1" \ - "$test_data_repo/$bad_name" + "$test_data_repo/$bad_name" fi if [ ! -e "$bad_test_data_2" ]; then mkdir -p "$bad_test_raw_dir" echo "Downloading Bad PCL for tests." wget -q -O "$bad_test_data_2" \ - "$test_data_repo/$bad_name2" + "$test_data_repo/$bad_name2" fi if [ ! -e "$bad_test_data_3" ]; then mkdir -p "$bad_test_raw_dir" echo "Downloading Bad PCL for tests." wget -q -O "$bad_test_data_3" \ - "$test_data_repo/$bad_name3" + "$test_data_repo/$bad_name3" fi if [ ! -e "$quant_test_data_1" ]; then mkdir -p "$quant_test_raw_dir" echo "Downloading Quant files for tests." wget -q -O "$quant_test_data_1" \ - "$test_data_repo/$quant_name" + "$test_data_repo/$quant_name" fi if [ ! -e "$quant_test_data_2" ]; then mkdir -p "$quant_test_raw_dir" echo "Downloading Quant files for tests." wget -q -O "$quant_test_data_2" \ - "$test_data_repo/$quant_name_2" + "$test_data_repo/$quant_name_2" fi # Mock out the AWS keys since we use VCR to mock out the request with these - # as the AWS credentials + # as the AWS credentials. export AWS_ACCESS_KEY_ID=XXX export AWS_SECRET_ACCESS_KEY=XXX fi if [ -z "$tag" ] || [ "$tag" = "qn" ]; then - # Make sure PCL for test is downloaded from S3 + # Make sure PCL for test is downloaded from S3. qn_name="1.tsv" qn_test_raw_dir="$volume_directory/QN" qn_test_data_1="$qn_test_raw_dir/$qn_name" @@ -436,7 +437,7 @@ if [ -z "$tag" ] || [ "$tag" = "qn" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for tests." wget -q -O "$qn_test_data_1" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi qn_name="2.tsv" qn_test_raw_dir="$volume_directory/QN" @@ -445,7 +446,7 @@ if [ -z "$tag" ] || [ "$tag" = "qn" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for tests." wget -q -O "$qn_test_data_2" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi qn_name="3.tsv" qn_test_raw_dir="$volume_directory/QN" @@ -454,7 +455,7 @@ if [ -z "$tag" ] || [ "$tag" = "qn" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for tests." wget -q -O "$qn_test_data_3" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi qn_name="4.tsv" qn_test_raw_dir="$volume_directory/QN" @@ -463,7 +464,7 @@ if [ -z "$tag" ] || [ "$tag" = "qn" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for tests." wget -q -O "$qn_test_data_4" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi qn_name="5.tsv" qn_test_raw_dir="$volume_directory/QN" @@ -472,7 +473,7 @@ if [ -z "$tag" ] || [ "$tag" = "qn" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for tests." wget -q -O "$qn_test_data_5" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi qn_name="6.tsv" qn_test_raw_dir="$volume_directory/QN" @@ -481,7 +482,7 @@ if [ -z "$tag" ] || [ "$tag" = "qn" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for tests." wget -q -O "$qn_test_data_6" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi qn_name="7.tsv" qn_test_raw_dir="$volume_directory/QN" @@ -490,7 +491,7 @@ if [ -z "$tag" ] || [ "$tag" = "qn" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for tests." wget -q -O "$qn_test_data_7" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi fi if [ -z "$tag" ] || [ "$tag" = "compendia" ]; then @@ -522,58 +523,61 @@ if [ -z "$tag" ] || [ "$tag" = "compendia" ]; then mkdir -p "$qn_test_raw_dir" echo "Downloading QN for compendia tests." wget -q -O "$qn_test_data_1" \ - "$test_data_repo/$qn_name" + "$test_data_repo/$qn_name" fi fi -. scripts/common.sh +. ./scripts/common.sh + DB_HOST_IP=$(get_docker_db_ip_address) # Ensure permissions are set for everything within the test data directory. chmod -R a+rwX "$volume_directory" -worker_images="salmon transcriptome no_op downloaders smasher illumina agilent affymetrix qn affymetrix_local janitor compendia" +worker_images="salmon transcriptome no_op downloaders smasher illumina agilent affymetrix qn janitor compendia" for image in $worker_images; do if [ -z "$tag" ] || [ "$tag" = "$image" ]; then if [ "$image" = "agilent" ] || [ "$image" = "affymetrix" ]; then - # Agilent uses the same docker image as Affymetrix - ./scripts/prepare_image.sh -p -i affymetrix -s workers - ./scripts/prepare_image.sh -i affymetrix_local -d ccdlstaging - docker tag ccdlstaging/dr_affymetrix_local:latest ccdlstaging/dr_affymetrix:latest - image_name=ccdlstaging/dr_affymetrix + # Agilent uses the same docker image as Affymetrix. + ./scripts/prepare_image.sh -i affymetrix -s workers + image_name="$DOCKERHUB_REPO/dr_affymetrix" elif [ "$tag" = "qn" ]; then ./scripts/prepare_image.sh -i smasher -s workers - image_name=ccdlstaging/dr_smasher + image_name="$DOCKERHUB_REPO/dr_smasher" elif [ "$tag" = "janitor" ]; then ./scripts/prepare_image.sh -i smasher -s workers - image_name=ccdlstaging/dr_smasher + image_name="$DOCKERHUB_REPO/dr_smasher" else ./scripts/prepare_image.sh -i "$image" -s workers - image_name=ccdlstaging/dr_$image + image_name="$DOCKERHUB_REPO/dr_$image" fi - # Strip out tag argument + # Strip out tag argument. # shellcheck disable=2001 args_without_tag="$(echo "$@" | sed "s/-t $tag//")" # shellcheck disable=2086 test_command="$(run_tests_with_coverage --tag="$image" $args_without_tag)" - # Only run interactively if we are on a TTY + # Only run interactively if we are on a TTY. if [ -t 1 ]; then - INTERACTIVE="-i" + INTERACTIVE="--interactive" fi echo "Running tests with the following command:" echo "$test_command" - docker run -t $INTERACTIVE \ - --add-host=database:"$DB_HOST_IP" \ - --env-file workers/environments/test \ - --env AWS_ACCESS_KEY_ID \ - --env AWS_SECRET_ACCESS_KEY \ - --memory=5G \ - --platform linux/amd64 \ - --volume "$volume_directory":/home/user/data_store \ - "$image_name" bash -c "$test_command" + # shellcheck disable=SC2086 + docker run \ + --add-host=database:"$DB_HOST_IP" \ + --env AWS_ACCESS_KEY_ID \ + --env AWS_SECRET_ACCESS_KEY \ + --env-file workers/environments/test \ + --memory=5G \ + --platform linux/amd64 \ + --tty \ + --volume "$volume_directory":/home/user/data_store \ + $INTERACTIVE \ + "$image_name" \ + bash -c "$test_command" fi done