From 373b86b302404f1a39b7c5c1cf9019eceba8e385 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Thu, 23 May 2024 16:41:54 -0400 Subject: [PATCH 1/4] Fixes for remote rsync + connection options --- defaults/main.yml | 4 +++- files/backup.py | 2 +- templates/archive_wal.sh.j2 | 8 +++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index bbf7747..1c4c1a0 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -31,11 +31,13 @@ postgresql_backup_keep: 30 __postgresql_pgdg_bin_dir: "{{ '/usr/pgsql-' ~ (postgresql_version | replace('.', '')) ~ '/bin' }}" postgresql_backup_command: >- - {{ postgresql_backup_local_dir | quote }}/bin/backup.py + {{ postgresql_backup_python_executable }} {{ postgresql_backup_local_dir | quote }}/bin/backup.py {{ '--rsync-connect-opts ' ~ (postgresql_backup_rsync_connect_opts | quote) if postgresql_backup_rsync_connect_opts else '' }} --rsync-backup-opts {{ postgresql_backup_rsync_backup_opts | regex_replace('^-', '\-') | quote }} --keep {{ postgresql_backup_keep | quote }} {{ '--pg-bin-dir ' ~ __postgresql_pgdg_bin_dir if ansible_os_family == 'RedHat' else '' }} --backup --clean-archive {{ postgresql_backup_dir | quote }} +postgresql_backup_python_executable: "python" + postgresql_default_auth_method: "{{ (postgresql_version is version('13', '>')) | ternary('scram-sha-256', 'md5') }}" diff --git a/files/backup.py b/files/backup.py index 09bf685..ed7d08d 100644 --- a/files/backup.py +++ b/files/backup.py @@ -109,7 +109,7 @@ def pg_major_version(self): def rsync_cmd(self): cmd = ['rsync'] if self._rsync_opts: - cmd.extend(shlex.split(rsync_opts)) + cmd.extend(shlex.split(self._rsync_opts)) return cmd @property diff --git a/templates/archive_wal.sh.j2 b/templates/archive_wal.sh.j2 index 925b32c..d6d4833 100644 --- a/templates/archive_wal.sh.j2 +++ b/templates/archive_wal.sh.j2 @@ -13,10 +13,16 @@ wal_archive_dir={{ (postgresql_backup_dir ~ '/wal_archive') | quote }} file_path="$1" file_name="$2" +{% if ":" in postgresql_backup_dir %} +empty=$(mktemp -d -t ansible-postgresql-empty.XXXXXX) +rsync {{ postgresql_backup_rsync_connect_opts }} "${empty}/" "$wal_archive_dir" +rmdir "$empty" +{% else %} mkdir -p "$wal_archive_dir" +{% endif %} # If rsync outputs anything to stdout, the destination already existed, which should not happen -if [ -n "$(rsync {{ postgresql_archive_wal_rsync_args }} "$file_path" "$wal_archive_dir")" ]; then +if [ -n "$(rsync {{ postgresql_backup_rsync_connect_opts }} {{ postgresql_archive_wal_rsync_args }} "$file_path" "$wal_archive_dir")" ]; then echo "ERROR: ${wal_archive_dir}/${file_name} already exists, overwriting is not allowed!" exit 1 fi From 625b196c74aebce76d89a169d7b351e71dffcb8f Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Thu, 23 May 2024 17:15:02 -0400 Subject: [PATCH 2/4] Allow psycopg2 import failure if not actually running backup --- defaults/main.yml | 2 +- files/backup.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index 1c4c1a0..e453a88 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -36,7 +36,7 @@ postgresql_backup_command: >- --rsync-backup-opts {{ postgresql_backup_rsync_backup_opts | regex_replace('^-', '\-') | quote }} --keep {{ postgresql_backup_keep | quote }} {{ '--pg-bin-dir ' ~ __postgresql_pgdg_bin_dir if ansible_os_family == 'RedHat' else '' }} - --backup --clean-archive {{ postgresql_backup_dir | quote }} + --backup {{ (':' in postgresql_backup_dir) | ternary('', '--clean-archive') }} {{ postgresql_backup_dir | quote }} postgresql_backup_python_executable: "python" diff --git a/files/backup.py b/files/backup.py index ed7d08d..338559a 100644 --- a/files/backup.py +++ b/files/backup.py @@ -30,7 +30,10 @@ except ImportError: from pipes import quote as shlex_quote -import psycopg2 +try: + import psycopg2 +except ImportError: + psycopg2 = None RSYNC_EXCLUDES = ( @@ -147,6 +150,8 @@ def parse_args(argv): parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose output') parser.add_argument('backup_path', help='Backup to location (rsync-compatible string)') args = parser.parse_args(argv) + if args.backup and psycopg2 is None: + parser.error('--backup specified but psycopg2 could not be imported') if args.clean_archive and ':' in args.backup_path: parser.error('--clean-archive cannot be used with remote backup directories') return args From 6453bfe14ff82cd28fa349c849188924459ce100 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Thu, 23 May 2024 17:15:27 -0400 Subject: [PATCH 3/4] Update docs for remote backup --- README.md | 59 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index ea2403e..307b2ef 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,21 @@ Role Variables ### Backups ### -- `postgresql_backup_dir`: If set, enables [PITR][postgresql_pitr] backups. Set this to a directory where your database - will be backed up (this can be any format supported by rsync, e.g. `user@host:/path`). The most recent backup will be - in a subdirectory named `current`. +This role can deploy and schedule the configuration and scripts to maintain Postgresql [PITR][postgresql_pitr] backups. + +Full backups will be made on the configured interval, whereas write-ahead-log (WAL) segments between full backups will +be archived to `{{ postgresql_backup_dir }}/wal_archive/` when instructed by the PostgreSQL server. WAL segments can be +removed from this directory once the oldest backup referencing them has been removed. This is done automatically for you +by the backup script if `postgresql_backup_dir` is mounted locally. + +When `postgresql_backup_dir` is a remote rsync path (containing a "`:`"), the backup script will still maintain backups +(including deleting older full backups) but cannot prune the `wal_archive/` directory automatically. If you are able to +install the standard `pg_archivecleanup` utility from the PostgreSQL client package on your backup server, you can run +this role's backup script with the `--clean-archive` option directly on the backup server instead. + +- `postgresql_backup_dir`: If set, enables PITR backups. Set this to a directory where your database will be backed up + (this can be any format supported by rsync, e.g. `user@host:/path`). The most recent backup will be in a subdirectory + named `current`. - `postgresql_backup_local_dir`: Filesystem path on the PostgreSQL server where backup scripts will be placed. @@ -122,9 +134,8 @@ Standard install: Default `postgresql.conf`, `pg_hba.conf` and default version f --- - hosts: dbservers - remote_user: root roles: - - postgresql + - galaxyproject.postgresql ``` Use the pgdg packages on a Debian-based host: @@ -133,11 +144,10 @@ Use the pgdg packages on a Debian-based host: --- - hosts: dbservers - remote_user: root vars: postgresql_flavor: pgdg roles: - - postgresql + - galaxyproject.postgresql ``` Use the PostgreSQL 9.5 packages and set some `postgresql.conf` options and `pg_hba.conf` entries: @@ -146,7 +156,6 @@ Use the PostgreSQL 9.5 packages and set some `postgresql.conf` options and `pg_h --- - hosts: dbservers - remote_user: root vars: postgresql_version: 9.5 postgresql_conf: @@ -155,18 +164,44 @@ Use the PostgreSQL 9.5 packages and set some `postgresql.conf` options and `pg_h postgresql_pg_hba_conf: - host all all 10.0.0.0/8 md5 roles: - - postgresql + - galaxyproject.postgresql ``` -Enable backups to /archive +Enable backups to /archive: ```yaml - hosts: all - remote_user: root vars: postgresql_backup_dir: /archive roles: - - postgresql + - galaxyproject.postgresql +``` + +Enable backups to /archive on a remote server: + +```yaml +- hosts: dbservers + vars: + postgresql_backup_dir: backup.example.org:/archive + roles: + - galaxyproject.postgresql + +- hosts: backupservers + tasks: + - name: Install PostgreSQL scripts + ansible.builtin.apt: + name: postgresql-common + - name: Copy backup script + ansible.builtin.copy: + src: roles/galaxyproject.postgresql/files/backup.py + dest: /usr/local/bin/pgbackup.py + mode: "0755" + - name: Schedule WAL pruning + ansible.builtin.cron: + name: Prune PostgreSQL Archived WALs + hour: 22 + minute: 0 + job: /usr/local/bin/pgbackup.py --clean-archive /archive ``` License From 21fcdfc419ff48b6e948d0c9ee17da6985f571a8 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Fri, 24 May 2024 10:49:29 -0400 Subject: [PATCH 4/4] Make a var for whether `postgresql_backup_dir` is remote and set it based on the first char being `/` rather than containing a `:`. --- defaults/main.yml | 5 ++++- tasks/backup.yml | 2 +- templates/archive_wal.sh.j2 | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index e453a88..4ef9d6c 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -17,6 +17,9 @@ postgresql_backup_local_dir: >- '/var/lib/postgresql' if ansible_os_family == 'Debian' else '~postgres') }}/backup postgresql_create_backup_dir: true +# Controls whether the wal_archive directory is cleaned and whether postgresql_create_backup_dir can be used +postgresql_backup_dir_is_remote: "{{ postgresql_backup_dir[0] != '/' }}" + # Options used for the WAL archive command - do not change this unless you have read the PITR documentation and # understand how this command must work. postgresql_archive_wal_rsync_args: '--ignore-existing -ptg --info=skip1' @@ -36,7 +39,7 @@ postgresql_backup_command: >- --rsync-backup-opts {{ postgresql_backup_rsync_backup_opts | regex_replace('^-', '\-') | quote }} --keep {{ postgresql_backup_keep | quote }} {{ '--pg-bin-dir ' ~ __postgresql_pgdg_bin_dir if ansible_os_family == 'RedHat' else '' }} - --backup {{ (':' in postgresql_backup_dir) | ternary('', '--clean-archive') }} {{ postgresql_backup_dir | quote }} + --backup {{ postgresql_backup_dir_is_remote | ternary('', '--clean-archive') }} {{ postgresql_backup_dir | quote }} postgresql_backup_python_executable: "python" diff --git a/tasks/backup.yml b/tasks/backup.yml index f3d440d..40e5334 100644 --- a/tasks/backup.yml +++ b/tasks/backup.yml @@ -18,7 +18,7 @@ mode: 0750 state: directory path: "{{ postgresql_backup_dir }}" - when: postgresql_backup_dir[0] == '/' and postgresql_create_backup_dir + when: not postgresql_backup_dir_is_remote and postgresql_create_backup_dir - name: Install backup script templates template: diff --git a/templates/archive_wal.sh.j2 b/templates/archive_wal.sh.j2 index d6d4833..33d9941 100644 --- a/templates/archive_wal.sh.j2 +++ b/templates/archive_wal.sh.j2 @@ -13,7 +13,7 @@ wal_archive_dir={{ (postgresql_backup_dir ~ '/wal_archive') | quote }} file_path="$1" file_name="$2" -{% if ":" in postgresql_backup_dir %} +{% if postgresql_backup_dir_is_remote %} empty=$(mktemp -d -t ansible-postgresql-empty.XXXXXX) rsync {{ postgresql_backup_rsync_connect_opts }} "${empty}/" "$wal_archive_dir" rmdir "$empty"