Backup and restore test #77
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
# TODO: | |
# - [ ] test on XL, also break the puppetdb database | |
# - [ ] test custom scenario for only restoring primary | |
# - [ ] test custom scenario for only restoring puppetdb (on external server) | |
name: "Backup and restore test" | |
on: | |
pull_request_target: | |
types: [ready_for_review] | |
workflow_dispatch: | |
inputs: | |
image: | |
description: "GCP image for test cluster" | |
required: true | |
default: "almalinux-cloud/almalinux-8" | |
architecture: | |
description: "PE architecture to test" | |
required: true | |
default: "standard" | |
type: choice | |
options: | |
- standard | |
- standard-with-dr | |
- large | |
- large-with-dr | |
- extra-large | |
- extra-large-with-dr | |
version: | |
description: "PE version to install" | |
required: true | |
default: "2023.5.0" | |
ssh-debugging: | |
description: "Boolean; whether or not to pause for ssh debugging" | |
required: true | |
default: "false" | |
jobs: | |
backup-restore-test: | |
name: "Backup, break and restore cluster: PE ${{ github.event.inputs.version || '2023.5.0' }} ${{ github.event.inputs.architecture || 'extra-large' }} on ${{ github.event.inputs.image || 'almalinux-cloud/almalinux-8' }}" | |
runs-on: ubuntu-20.04 | |
env: | |
BOLT_GEM: true | |
BOLT_DISABLE_ANALYTICS: true | |
LANG: "en_US.UTF-8" | |
steps: | |
- name: "Start SSH session" | |
if: ${{ github.event.inputs.ssh-debugging == 'true' }} | |
uses: luchihoratiu/debug-via-ssh@main | |
with: | |
NGROK_AUTH_TOKEN: ${{ secrets.NGROK_AUTH_TOKEN }} | |
SSH_PASS: ${{ secrets.SSH_PASS }} | |
- name: "Checkout Source" | |
uses: actions/checkout@v2 | |
- name: "Activate Ruby 2.7" | |
uses: ruby/setup-ruby@v1 | |
with: | |
ruby-version: "2.7" | |
bundler-cache: true | |
- name: "Print bundle environment" | |
if: ${{ github.repository_owner == 'puppetlabs' }} | |
run: | | |
echo ::group::info:bundler | |
bundle env | |
echo ::endgroup:: | |
- name: "Provision test cluster" | |
timeout-minutes: 15 | |
run: | | |
echo ::group::prepare | |
mkdir -p $HOME/.ssh | |
echo 'Host *' > $HOME/.ssh/config | |
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config | |
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config | |
bundle exec rake spec_prep | |
echo ::endgroup:: | |
echo ::group::provision | |
bundle exec bolt plan run peadm_spec::provision_test_cluster \ | |
--modulepath spec/fixtures/modules \ | |
provider=provision_service \ | |
image=${{ github.event.inputs.image || 'almalinux-cloud/almalinux-8' }} \ | |
architecture=${{ github.event.inputs.architecture || 'extra-large' }} | |
echo ::endgroup:: | |
echo ::group::info:request | |
cat request.json || true; echo | |
echo ::endgroup:: | |
echo ::group::info:inventory | |
sed -e 's/password: .*/password: "[redacted]"/' < spec/fixtures/litmus_inventory.yaml || true | |
echo ::endgroup:: | |
- name: "Install PE on test cluster" | |
timeout-minutes: 120 | |
run: | | |
bundle exec bolt plan run peadm_spec::install_test_cluster \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
architecture=${{ github.event.inputs.architecture || 'extra-large' }} \ | |
version=${{ github.event.inputs.version || '2023.5.0' }} \ | |
--stream | |
- name: Perform peadm backup of cluster | |
timeout-minutes: 10 | |
continue-on-error: true | |
run: | | |
echo ::group::prepare | |
mkdir -p $HOME/.ssh | |
echo 'Host *' > $HOME/.ssh/config | |
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config | |
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config | |
bundle exec rake spec_prep | |
echo ::endgroup:: | |
echo ::group::backup | |
bundle exec bolt plan run peadm_spec::test_backup \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--stream | |
echo ::endgroup:: | |
- name: Set up yq | |
uses: frenck/action-setup-yq@v1 | |
with: | |
version: v4.30.5 | |
- name: Break the primary host | |
run: | | |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml) | |
echo "Removing ssl directories" | |
bundle exec bolt command run "rm -rf /etc/puppetlabs/puppetserver/ca /etc/puppetlabs/puppet/ssl" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
echo "Removing classifier database" | |
bundle exec bolt command run "rm -rf /opt/puppetlabs/server/data/postgresql/classifier" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
bundle exec bolt command run "puppet infrastructure status" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
#TODO if arch is XL, run pe-uninstaller on the primary database | |
- name: Output PE cluster status | |
run: | | |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml) | |
bundle exec bolt command run "puppet infrastructure status" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
- name: Reinstall PE on the primary host | |
continue-on-error: true | |
# TODO: move reinstall_pe plan to the peadm::utils folder | |
run: | | |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml) | |
bundle exec bolt task run peadm_spec::reinstall_pe uninstall=true version=${{ github.event.inputs.version || '2023.5.0' }} -t $primary \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--verbose \ | |
--stream | |
- name: Perform peadm recovery restore of primary server | |
timeout-minutes: 30 | |
continue-on-error: true | |
run: | | |
echo ::group::prepare | |
mkdir -p $HOME/.ssh | |
echo 'Host *' > $HOME/.ssh/config | |
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config | |
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config | |
bundle exec rake spec_prep | |
echo ::endgroup:: | |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml) | |
# bundle exec bolt command run "echo '[Service]' | sudo tee /etc/systemd/system/pe-puppetdb.service.d/10-shortcircuit.conf; echo 'TimeoutStartSec=1' | sudo tee -a /etc/systemd/system/pe-puppetdb.service.d/10-shortcircuit.conf; echo 'TimeoutStopSec=1' | sudo tee -a /etc/systemd/system/pe-puppetdb.service.d/10-shortcircuit.conf; echo 'Restart=no' | sudo tee -a /etc/systemd/system/pe-puppetdb.service.d/10-shortcircuit.conf; sudo systemctl daemon-reload" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
#TODO update the restore to: | |
# - restore the puppetdb in the standard cases | |
# - not restore the puppetdb if there is a broken external db | |
echo ::group::restore | |
bundle exec bolt plan run peadm_spec::test_restore \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--stream \ | |
|| true # ignore errors | |
echo ::endgroup:: | |
# bundle exec bolt command run "systemctl stop pe-puppetdb.service && rm /etc/systemd/system/pe-puppetdb.service.d/10-shortcircuit.conf && systemctl daemon-reload && systemctl start pe-puppetdb" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
- name: Output PE cluster status | |
run: | | |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml) | |
bundle exec bolt command run "puppet infrastructure status" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
# perform a smoke test. Test of "green" puppet infra status is already done insite peadm_spec::test_restore | |
- name: Smoke test | |
run: | | |
bundle exec bolt plan run peadm_spec::puppet_run_test \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--stream | |
# TODO: add docs to README about actions the customer needs to do to recover the primary db | |
- name: Break external DB | |
if: ${{ github.event.inputs.architecture || 'extra-large' }} == 'extra-large' | |
run: | | |
echo "Uninstalling PE from primary database" | |
primary_db=$(yq '.groups[].targets[] | select(.vars.role == "primary-pdb-postgresql") | .uri' spec/fixtures/litmus_inventory.yaml) | |
bundle exec bolt command run "/opt/puppetlabs/bin/puppet-enterprise-uninstaller -p -d -y || true" -t $primary_db \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--verbose \ | |
--stream | |
- name: Output PE cluster status | |
run: | | |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml) | |
bundle exec bolt command run "puppet infrastructure status" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
- name: Setup Primary DB in XL | |
if: ${{ github.event.inputs.architecture || 'extra-large' }} == 'extra-large' | |
# TODO: move peadm_spec::init_db_server plan to the peadm::utils folder | |
run: | | |
primary_db=$(yq '.groups[].targets[] | select(.vars.role == "primary-pdb-postgresql") | .uri' spec/fixtures/litmus_inventory.yaml) | |
bundle exec bolt plan run peadm_spec::init_db_server db_host=$primary_db \ | |
install_pe=true \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--stream \ | |
|| true # ignore errors | |
# restore the puppetdb database | |
bundle exec bolt plan run peadm_spec::test_restore restore_type="recovery-db" \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--stream | |
- name: Output PE cluster status | |
run: | | |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml) | |
bundle exec bolt command run "puppet infrastructure status" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml | |
# perform a smoke test. Test of "green" puppet infra status is already done insite peadm_spec::test_restore | |
- name: Smoke test | |
run: | | |
bundle exec bolt plan run peadm_spec::puppet_run_test \ | |
--inventoryfile spec/fixtures/litmus_inventory.yaml \ | |
--modulepath spec/fixtures/modules \ | |
--stream | |
- name: "Wait as long as the file ${HOME}/pause file is present" | |
continue-on-error: true | |
if: ${{ always() && github.event.inputs.ssh-debugging == 'true' }} | |
# if: github.event.inputs.ssh-debugging == 'true' | |
run: | | |
while [ -f "${HOME}/pause" ] ; do | |
echo "${HOME}/pause present, sleeping for 60 seconds..." | |
sleep 10 | |
done | |
echo "${HOME}/pause absent, continuing workflow." | |
- name: "Tear down cluster" | |
if: always() | |
run: | | |
if [ -f spec/fixtures/litmus_inventory.yaml ]; then | |
echo ::group::tear_down | |
bundle exec rake 'litmus:tear_down' | |
echo ::endgroup:: | |
echo ::group::info:request | |
cat request.json || true; echo | |
echo ::endgroup:: | |
fi |