From c71e5f20f2eba6caed9c147aad9fc751c06efa37 Mon Sep 17 00:00:00 2001 From: "Nathan J. Mehl" Date: Wed, 18 Sep 2019 21:19:10 -0400 Subject: [PATCH 1/3] better clusterIP filtering logic, add default services - pull the inline python one-liner out into a separate script so that we can: - apply better logic around filtering out service.spec.clusterIP: if the value is "None" that's a [headless service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services) and _not_ preserving the value will result in the creation of a NON-headless service, which is likely to be extremely wrong. - set `explicit_start=True` in the call to pyyaml.safe_dump() so that backup files can be safely concatenated together - add a few more default resource types to back up: - [backendconfigs](https://cloud.google.com/kubernetes-engine/docs/concepts/backendconfig) - [serviceaccount](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/) --- Dockerfile | 1 + backup.sh | 3 +++ entrypoint.sh | 6 +++--- reflow.py | 18 ++++++++++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100755 backup.sh create mode 100755 reflow.py diff --git a/Dockerfile b/Dockerfile index e64face..3bf415b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,5 +38,6 @@ RUN echo "${KUBECTL_SHA256} kubectl" | sha256sum -c - || exit 10 ENV PATH="/:${PATH}" COPY entrypoint.sh / +COPY reflow.py /bin/ USER backup ENTRYPOINT ["/entrypoint.sh"] diff --git a/backup.sh b/backup.sh new file mode 100755 index 0000000..4aca9e9 --- /dev/null +++ b/backup.sh @@ -0,0 +1,3 @@ +#!/bin/bash +mkdir -p /tmp/backup +DRY_RUN=true GIT_REPO_PATH=/tmp/backup ./entrypoint.sh diff --git a/entrypoint.sh b/entrypoint.sh index 04372ba..2e46c6a 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -4,7 +4,7 @@ if [ -z "$NAMESPACES" ]; then NAMESPACES=$(kubectl get ns -o jsonpath={.items[*].metadata.name}) fi -RESOURCETYPES="${RESOURCETYPES:-"ingress deployment configmap svc rc ds networkpolicy statefulset cronjob pvc"}" +RESOURCETYPES="${RESOURCETYPES:-"ingress deployment configmap svc rc ds networkpolicy statefulset cronjob pvc backendconfig serviceaccount"}" GLOBALRESOURCES="${GLOBALRESOURCES:-"namespace storageclass clusterrole clusterrolebinding customresourcedefinition"}" # Initialize git repo @@ -81,6 +81,7 @@ for namespace in $NAMESPACES; do continue fi + OUTFILE="$GIT_REPO_PATH/$GIT_PREFIX_PATH/${namespace}/${name}.${type}.yaml" kubectl --namespace="${namespace}" get -o=json "$type" "$name" | jq --sort-keys \ 'del( .metadata.annotations."control-plane.alpha.kubernetes.io/leader", @@ -90,9 +91,8 @@ for namespace in $NAMESPACES; do .metadata.resourceVersion, .metadata.selfLink, .metadata.uid, - .spec.clusterIP, .status - )' | python -c 'import sys, yaml, json; yaml.safe_dump(json.load(sys.stdin), sys.stdout, default_flow_style=False)' >"$GIT_REPO_PATH/$GIT_PREFIX_PATH/${namespace}/${name}.${type}.yaml" + )' | /bin/reflow.py >"${OUTFILE}" done done done diff --git a/reflow.py b/reflow.py new file mode 100755 index 0000000..6f6b23f --- /dev/null +++ b/reflow.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +import sys +import yaml +import json + +data = json.load(sys.stdin) + +# we don't want to preserve 'clusterIP: ' because in +# a restore-from-scratch situation the value will be bogus, but +# if the value is "None", that indicates a headless service and +# we _do_ want to preserve that. +if 'spec' in data: + if 'clusterIP' in data['spec']: + if data['spec']['clusterIP'] != "None": + del(data['spec']['clusterIP']) + +yaml.safe_dump(data, sys.stdout, explicit_start=True, default_flow_style=False) From 65ddd4dd23c61cdc952af746d5918711451ac123 Mon Sep 17 00:00:00 2001 From: "Nathan J. Mehl" Date: Thu, 19 Sep 2019 11:03:55 -0400 Subject: [PATCH 2/3] address comments - remove backendconfig from default resource set; it's a gke-only thing presently - add "EXTRA" env vars for the resource list variables so that they can be appended to as well as overwritten. --- README.md | 6 ++++-- entrypoint.sh | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7fcb69f..b5c36d8 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,10 @@ Define the following environment parameters: * `GIT_REPO` - GIT repo url. **Required** * `GIT_PREFIX_PATH` - Path to the subdirectory in your repository. Default: `.` * `NAMESPACES` - List of namespaces to export. Default: all - * `GLOBALRESOURCES` - List of global resource types to export. Default: `namespace` - * `RESOURCETYPES` - List of resource types to export. Default: `ingress deployment configmap svc rc ds networkpolicy statefulset storageclass cronjob`. Notice that `Secret` objects are intentionally not exported by default (see [git-crypt section](#git-crypt) for details). + * `DEFAULT_GLOBALRESOURCES` - Base list of global resource types to export. Default: `namespace storageclass clusterrole clusterrolebinding customresourcedefinition` + * `EXTRA_GLOBALRESOURCES` - List of additional global resource types to export, should you simply want to append to the default list rather than replacing it entirely. Optional. Default: empty. + * `DEFAULT_RESOURCETYPES` - Base list of resource types to export. Default: `ingress deployment configmap svc rc ds networkpolicy statefulset storageclass cronjob`. Notice that `Secret` objects are intentionally not exported by default (see [git-crypt section](#git-crypt) for details). + * `EXTRA_RESOURCETYPES` - List of additional resource types to export, should you simply want to append to the default list rather than replacing it entirely. Optional. Default: empty. * `GIT_USERNAME` - Display name of git user. Default: `kube-backup` * `GIT_EMAIL` - Email address of git user. Default: `kube-backup@example.com` * `GIT_BRANCH` - Use a specific git branch . Default: `master` diff --git a/entrypoint.sh b/entrypoint.sh index 2e46c6a..2203a0a 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -4,8 +4,10 @@ if [ -z "$NAMESPACES" ]; then NAMESPACES=$(kubectl get ns -o jsonpath={.items[*].metadata.name}) fi -RESOURCETYPES="${RESOURCETYPES:-"ingress deployment configmap svc rc ds networkpolicy statefulset cronjob pvc backendconfig serviceaccount"}" -GLOBALRESOURCES="${GLOBALRESOURCES:-"namespace storageclass clusterrole clusterrolebinding customresourcedefinition"}" +DEFAULT_RESOURCETYPES="${DEFAULT_RESOURCETYPES:-"ingress deployment configmap svc rc ds networkpolicy statefulset cronjob pvc serviceaccount"}" +RESOURCETYPES="${DEFAULT_RESOURCETYPES} ${EXTRA_RESOURCETYPES}" +DEFAULT_GLOBALRESOURCES="${DEFAULT_GLOBALRESOURCES:-"namespace storageclass clusterrole clusterrolebinding customresourcedefinition"}" +GLOBALRESOURCES="${DEFAULT_GLOBALRESOURCES} ${EXTRA_GLOBALRESOURCES}" # Initialize git repo [ -z "$DRY_RUN" ] && [ -z "$GIT_REPO" ] && echo "Need to define GIT_REPO environment variable" && exit 1 From 93313f2b983e76d023854aac0e1f3821288abb67 Mon Sep 17 00:00:00 2001 From: "Nathan J. Mehl" Date: Mon, 23 Sep 2019 10:46:00 -0400 Subject: [PATCH 3/3] also use reflow script for global resources --- entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/entrypoint.sh b/entrypoint.sh index 2203a0a..8d58046 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -61,7 +61,7 @@ for resource in $GLOBALRESOURCES; do .items[].metadata.resourceVersion, .items[].metadata.creationTimestamp, .items[].metadata.generation - )' | python -c 'import sys, yaml, json; yaml.safe_dump(json.load(sys.stdin), sys.stdout, default_flow_style=False)' >"$GIT_REPO_PATH/$GIT_PREFIX_PATH/${resource}.yaml" + )' | /bin/reflow.py >"$GIT_REPO_PATH/$GIT_PREFIX_PATH/${resource}.yaml" done for namespace in $NAMESPACES; do