From e12dc3a26337f8e614635a3418bc477025ab3e3b Mon Sep 17 00:00:00 2001 From: PhilexWong <142860658+PhilexWong@users.noreply.github.com> Date: Fri, 6 Sep 2024 11:32:30 -0700 Subject: [PATCH] [juno-node] Feat/upload juno data file (#130) * feat: add chart to backup juno data and upload to cloudflare r2 * feat: add chart to backup juno data and upload to cloudflare r2 - add new line at the end of file * feat: add chart to backup juno data and upload to cloudflare r2 - change readme and change typo. * feat: add chart to backup juno data and upload to cloudflare r2 - auto generate README.md and helm-docs v1.13.1 changed. * Fix: change dynamic name * Fix: enhance to replace pod with job. * Fix: remove tail space * Fix: add space * Fix: add space to correct syntax error * Fix: change rclone's secret from local file to secret managers * Fix: change schedule * Fix: add blank line and add externalsecret-common.yaml * Fix: add relative path ofdataFromKey * Fix: add relative path ofdataFromKey * Fix: change DB size * Revert "Fix: change DB size" This reverts commit 9e5871061f1b3fbea354187858091300b98d5107. * Fix: change DB size -v * Fix: test purpose to exclude sst files * Fix: test purpose to exclude sst files * Fix: test purpose to exclude sst files * Update externalsecret-common.yaml Signed-off-by: PhilexWong <142860658+PhilexWong@users.noreply.github.com> * Update externalsecret-common.yaml Signed-off-by: PhilexWong <142860658+PhilexWong@users.noreply.github.com> * Update externalsecret-common.yaml Signed-off-by: PhilexWong <142860658+PhilexWong@users.noreply.github.com> * Fix: test purpose to exclude sst files - revert * Fix: rename the jar file * Fix: rename the jar file -exclude sst file for testing * feat: add retention function * feat: add retention function * feat: add retention function - add values * feat: add retention function * feat: add retention function * Remove -z param during tar package * Fix: Remove -z param during tar package * Fix: add values.yaml files * Fix: Remove -z param during tar package * Fix: Remove -z param during tar package * Fix: add values.yaml files * Fix: change format. --------- Signed-off-by: PhilexWong <142860658+PhilexWong@users.noreply.github.com> --- charts/juno-node/Chart.yaml | 2 +- charts/juno-node/README.md | 14 +- .../templates/externalsecret-common.yaml | 20 ++ .../templates/juno-data-backup-cronjob.yaml | 309 ++++++++++++++++++ charts/juno-node/values.yaml | 36 +- 5 files changed, 376 insertions(+), 5 deletions(-) create mode 100644 charts/juno-node/templates/externalsecret-common.yaml create mode 100644 charts/juno-node/templates/juno-data-backup-cronjob.yaml diff --git a/charts/juno-node/Chart.yaml b/charts/juno-node/Chart.yaml index 5fab205e..f043b956 100644 --- a/charts/juno-node/Chart.yaml +++ b/charts/juno-node/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: juno-chart -version: 0.1.4 +version: 0.1.5 appVersion: "1" description: A Helm chart for deploying Juno service maintainers: diff --git a/charts/juno-node/README.md b/charts/juno-node/README.md index a6506beb..41794a0a 100644 --- a/charts/juno-node/README.md +++ b/charts/juno-node/README.md @@ -1,6 +1,6 @@ # juno-chart -![Version: 0.1.4](https://img.shields.io/badge/Version-0.1.4-informational?style=flat-square) ![AppVersion: 1](https://img.shields.io/badge/AppVersion-1-informational?style=flat-square) +![Version: 0.1.5](https://img.shields.io/badge/Version-0.1.5-informational?style=flat-square) ![AppVersion: 1](https://img.shields.io/badge/AppVersion-1-informational?style=flat-square) A Helm chart for deploying Juno service @@ -27,6 +27,15 @@ A Helm chart for deploying Juno service | args.--ws | string | `"true"` | | | args.--ws-host | string | `"0.0.0.0"` | | | args.--ws-port | string | `"6061"` | | +| backupJunoDataJob.backupSchedule | string | `"*/20 * * * *"` | | +| backupJunoDataJob.cleanupSchedule | string | `"*/40 * * * *"` | | +| backupJunoDataJob.dataSource | string | `"juno-sepolia-pv-ssd-juno-sepolia-0"` | | +| backupJunoDataJob.enabled | bool | `true` | | +| backupJunoDataJob.endpoint | string | `"https://12345543.r2.cloudflarestorage.com"` | | +| backupJunoDataJob.key | string | `"key-1234"` | | +| backupJunoDataJob.network | string | `"sepolia"` | | +| backupJunoDataJob.secret | string | `"secret-12345"` | | +| backupJunoDataJob.storageSize | string | `"200Gi"` | | | batchjob.enabled | bool | `false` | | | batchjob.schedule | string | `"* */1 * * *"` | | | deployment.healthCheck.enabled | bool | `false` | | @@ -84,6 +93,7 @@ A Helm chart for deploying Juno service | serviceAccount.enabled | bool | `false` | | | serviceAccount.gcpServiceAccount | string | `"monitoring-sa-euw1@juno-prod-nth.iam.gserviceaccount.com"` | | | serviceAccount.name | string | `"juno-pgo"` | | +| svc.externalTrafficPolicy | string | `""` | | | svc.globalStaticInternalIpName | string | `""` | | | svc.globalStaticIpName | string | `""` | | | svc.ingress.enabled | bool | `true` | | @@ -155,4 +165,4 @@ A Helm chart for deploying Juno service | taintsToleration.tolerations.network | string | `"juno"` | | ---------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.12.0](https://github.com/norwoodj/helm-docs/releases/v1.12.0) +Autogenerated from chart metadata using [helm-docs v1.13.1](https://github.com/norwoodj/helm-docs/releases/v1.13.1) diff --git a/charts/juno-node/templates/externalsecret-common.yaml b/charts/juno-node/templates/externalsecret-common.yaml new file mode 100644 index 00000000..c877ce3b --- /dev/null +++ b/charts/juno-node/templates/externalsecret-common.yaml @@ -0,0 +1,20 @@ +{{- if .Values.secret }} +{{- with .Values.secret.data }} +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: {{ $.Values.deployment.projectName }}-external-secret + namespace: {{ $.Values.deployment.namespace }} +spec: + refreshInterval: {{ $.Values.secret.data.refreshInterval }} + secretStoreRef: + name: {{ $.Values.secret.data.secretStoreName }} + kind: {{ $.Values.secret.data.secretStoreKind }} + target: + name: {{ $.Values.secret.data.targetName }} + creationPolicy: {{ $.Values.secret.data.targetCreationPolicy }} + dataFrom: + - extract: + key: {{ $.Values.secret.data.dataFromKey }} # name of the secret in secret manager (GCP secret manager) +{{- end }} +{{- end }} diff --git a/charts/juno-node/templates/juno-data-backup-cronjob.yaml b/charts/juno-node/templates/juno-data-backup-cronjob.yaml new file mode 100644 index 00000000..22f91371 --- /dev/null +++ b/charts/juno-node/templates/juno-data-backup-cronjob.yaml @@ -0,0 +1,309 @@ +{{- if .Values.backupJunoDataJob.enabled -}} +# Service Account for the Backup Job +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Values.deployment.namespace }}-backup-junodata-sa + namespace: {{ .Values.deployment.namespace }} +--- + +# Role for Backup Job with necessary permissions +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ .Values.deployment.namespace }}-backup-junodata-role + namespace: {{ .Values.deployment.namespace }} +rules: + - apiGroups: [ "", "apps","batch"] + resources: ["pods", "jobs", "persistentvolumeclaims"] + verbs: ["get", "list","create", "update", "patch", "delete"] +--- +# RoleBinding to bind Role with ServiceAccount +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ .Values.deployment.namespace }}-backup-junodata-rolebinding + namespace: {{ .Values.deployment.namespace }} +subjects: + - kind: ServiceAccount + name: {{ .Values.deployment.namespace }}-backup-junodata-sa + namespace: {{ .Values.deployment.namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ .Values.deployment.namespace }}-backup-junodata-role +--- + +# Secret to store R2 Cloud credentials +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.deployment.namespace }}-rclone-config + namespace: {{ .Values.deployment.namespace }} +stringData: + rclone.conf: | + [R2] + type = s3 + provider = Cloudflare + env_auth = true + endpoint = https://d1cc7d59ae8f8dc2b1aa530c41b5c6ec.r2.cloudflarestorage.com +--- +# ConfigMap for cloning disk manifest +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.deployment.namespace }}-cloning-disk-manifest + namespace: {{ .Values.deployment.namespace }} +data: + cloning-disk-manifest.yaml: | + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: {{ .Values.deployment.namespace }}-pv-ssd-snapshot + namespace: {{ .Values.deployment.namespace }} + spec: + dataSource: + name: {{ .Values.backupJunoDataJob.dataSource }} + kind: PersistentVolumeClaim + accessModes: + - ReadWriteOnce + storageClassName: premium-rwo + resources: + requests: + storage: {{ .Values.backupJunoDataJob.storageSize }} + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: {{ .Values.deployment.namespace }}-juno-data-backup-pvc + namespace: {{ .Values.deployment.namespace }} + spec: + accessModes: + - ReadWriteOnce + storageClassName: premium-rwo + resources: + requests: + storage: {{ .Values.backupJunoDataJob.storageSize }} +--- +# ConfigMap for cloning juno manifest +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.deployment.namespace }}-cloning-juno-manifest + namespace: {{ .Values.deployment.namespace }} +data: + cloning-juno-manifest.yaml: | + apiVersion: batch/v1 + kind: Job + metadata: + name: {{ .Values.deployment.namespace }}-juno-data-archival-job + namespace: {{ .Values.deployment.namespace }} + spec: + ttlSecondsAfterFinished: 60 + template: + spec: + serviceAccountName: {{ .Values.deployment.namespace }}-backup-junodata-sa + volumes: + - name: juno-data-volume + persistentVolumeClaim: + claimName: {{ .Values.deployment.namespace }}-pv-ssd-snapshot + - name: {{ .Values.deployment.namespace }}-rclone-config + secret: + secretName: {{ .Values.deployment.namespace }}-rclone-config + - name: tar-backup-volume + persistentVolumeClaim: + claimName: {{ .Values.deployment.namespace }}-juno-data-backup-pvc + initContainers: + - name: juno-archival-tar + image: busybox + command: ["/bin/sh", "-c"] + args: + - | + rm -rf /mnt/juno-tar-backup/*.tar && + rm -rf /mnt/data/*.tar && + tar -cf /mnt/juno-tar-backup/juno_{{ .Values.backupJunoDataJob.network }}_{{ .Values.deployment.imagetag }}_$(date +\%Y\%m\%d).tar --exclude=./lost+found -C /mnt/data . && sleep 10 + volumeMounts: + - name: juno-data-volume + mountPath: /mnt/data + - name: tar-backup-volume + mountPath: /mnt/juno-tar-backup + containers: + - name: rclone-upload-container + image: rclone/rclone:latest + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Values.secret.data.targetName }} + key: r2_access_key_id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.secret.data.targetName }} + key: r2_secret_access_key + command: ["/bin/sh", "-c"] + args: + - | + apk add --no-cache curl && + apk add --no-cache jq && + latestBlockNumber=$(curl --location '{{ .Values.backupJunoDataJob.junoFreeEndpoint }}/{{ .Values.backupJunoDataJob.network }}-juno' --header 'Content-Type: application/json' --data '{ "jsonrpc": "2.0","method": "starknet_blockNumber", "id": 1}' | jq '.result') && + echo "latestBlockNumber is $latestBlockNumber" && + mv /mnt/juno-tar-backup/juno_{{ .Values.backupJunoDataJob.network }}_{{ .Values.deployment.imagetag }}*.tar /mnt/juno-tar-backup/juno_{{ .Values.backupJunoDataJob.network }}_{{ .Values.deployment.imagetag }}_$latestBlockNumber.tar && + echo "/mnt/juno-tar-backup/juno_{{ .Values.backupJunoDataJob.network }}_{{ .Values.deployment.imagetag }}_$latestBlockNumber.tar" && + rclone copy /mnt/juno-tar-backup/*.tar R2:/{{ .Values.backupJunoDataJob.bucketName }}/{{ .Values.backupJunoDataJob.network }} + volumeMounts: + - name: {{ .Values.deployment.namespace }}-rclone-config + mountPath: /config/rclone + - name: tar-backup-volume + mountPath: /mnt/juno-tar-backup + restartPolicy: OnFailure +--- +# CronJob for Backup Task +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Values.deployment.namespace }}-backup-junodata-cronjob + namespace: {{ .Values.deployment.namespace }} +spec: + schedule: "{{ .Values.backupJunoDataJob.backupSchedule }}" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + completions: 1 + ttlSecondsAfterFinished: 30 + template: + spec: + serviceAccountName: {{ .Values.deployment.namespace }}-backup-junodata-sa + restartPolicy: Never + initContainers: + - name: copy-disk-kubectl-container + image: bitnami/kubectl:latest + command: ["/bin/sh"] + args: ["-c", "kubectl apply -f /cloning-disk-manifest/cloning-disk-manifest.yaml"] + volumeMounts: + - name: cloning-disk-manifest-volume + mountPath: /cloning-disk-manifest + containers: + - name: clone-juno-kubectl-container + image: bitnami/kubectl:latest + command: ["/bin/sh"] + args: ["-c", "kubectl apply -f /cloning-juno-manifest/cloning-juno-manifest.yaml"] + volumeMounts: + - name: cloning-juno-manifest-volume + mountPath: /cloning-juno-manifest + volumes: + - name: cloning-disk-manifest-volume + configMap: + name: {{ .Values.deployment.namespace }}-cloning-disk-manifest + - name: cloning-juno-manifest-volume + configMap: + name: {{ .Values.deployment.namespace }}-cloning-juno-manifest +--- +# CronJob for Cleaning up Completed Pods and PVCs +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Values.deployment.namespace }}-delete-used-pvc + namespace: {{ .Values.deployment.namespace }} +spec: + schedule: "{{ .Values.backupJunoDataJob.cleanupSchedule }}" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + completions: 1 + ttlSecondsAfterFinished: 30 + template: + spec: + serviceAccountName: {{ .Values.deployment.namespace }}-backup-junodata-sa + restartPolicy: OnFailure + containers: + - name: kubectl-container + image: bitnami/kubectl:latest + command: + - "/bin/bash" + - "-c" + - | + # Delete PVC if not used + describe_output=$(kubectl describe pvc {{ .Values.deployment.namespace }}-pv-ssd-snapshot) + if echo "$describe_output" | grep -q "Used By:[[:space:]]*"; then + echo "Deleting {{ .Values.deployment.namespace }}-pv-ssd-snapshot..." + kubectl delete pvc {{ .Values.deployment.namespace }}-pv-ssd-snapshot + sleep 30 + fi + describe_output=$(kubectl describe pvc {{ .Values.deployment.namespace }}-juno-data-backup-pvc) + if echo "$describe_output" | grep -q "Used By:[[:space:]]*"; then + echo "Deleting {{ .Values.deployment.namespace }}-juno-data-backup-pvc..." + kubectl delete pvc {{ .Values.deployment.namespace }}-juno-data-backup-pvc + sleep 30 + fi +--- +# CronJob for Cleaning up Completed Pods and PVCs +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Values.deployment.namespace }}-r2-retention-cronjob + namespace: {{ .Values.deployment.namespace }} +spec: + schedule: "0 0 * * */2" + jobTemplate: + spec: + completions: 1 + ttlSecondsAfterFinished: 30 + template: + spec: + restartPolicy: OnFailure + containers: + - name: {{ .Values.deployment.namespace }}-r2-retention + image: ubuntu:latest + env: + - name: RETENTION_LIMIT + value: "{{ .Values.backupJunoDataJob.retentionLimit }}" + - name: API_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.secret.data.targetName }} + key: r2_api_token + - name: ACCOUNT_ID + value: "d1cc7d59ae8f8dc2b1aa530c41b5c6ec" + - name: BUCKET_NAME + value: "{{ .Values.backupJunoDataJob.bucketName }}" + command: + - /bin/sh + - -c + - | + #!/bin/sh + mkdir -p /var/lib/apt/lists/partial + apt-get update && apt-get install -y curl jq + # Constants + API_TOKEN="$API_TOKEN" + RETENTION_LIMIT="$RETENTION_LIMIT" + ACCOUNT_ID="$ACCOUNT_ID" + BUCKET_NAME="$BUCKET_NAME" + + # Construct the Cloudflare API URL with account ID and bucket name + CLOUDFLARE_API_URL="https://api.cloudflare.com/client/v4/accounts/$ACCOUNT_ID/r2/buckets/$BUCKET_NAME/objects?prefix={{ .Values.backupJunoDataJob.network }}/" + # Get the list of objects with the specified prefix + objects=$(curl -s -X GET "$CLOUDFLARE_API_URL" -H "Authorization: Bearer $API_TOKEN" | jq -r '.result') + + # Check if the number of objects exceeds the retention limit + object_count=$(echo "$objects" | jq length) + echo "total backup number is $object_count" + + if [ "$object_count" -le "$RETENTION_LIMIT" ]; then + echo "exiting...." + exit 0 + fi + delete_number=$((object_count - RETENTION_LIMIT)) + # Sort the objects by last_modified date and delete the oldest ones + echo "$objects" | jq -r '.[] | [.key, .last_modified] | @tsv' | sort -k2 | head -n "$delete_number" | while IFS=$'\t' read -r key last_modified; do + delete_url="https://api.cloudflare.com/client/v4/accounts/$ACCOUNT_ID/r2/buckets/$BUCKET_NAME/objects/${key}tar" + echo "Deleting ${key}tar at $delete_url" + delete_response=$(curl -s -X DELETE "$delete_url" -H "Authorization: Bearer $API_TOKEN") + echo "Delete response: $delete_response" + done +--- +{{- end -}} \ No newline at end of file diff --git a/charts/juno-node/values.yaml b/charts/juno-node/values.yaml index cfb6b148..32e90ade 100644 --- a/charts/juno-node/values.yaml +++ b/charts/juno-node/values.yaml @@ -75,7 +75,7 @@ pgo: ENV: "juno-integration" URL: "http://localhost:6062/debug/pprof/profile" - ## cache warmup side container + ## cache warmup side container cache: enabled: false image: "us-east1-docker.pkg.dev/juno-stg-nth/juno-cache/cache:2.0" @@ -85,7 +85,7 @@ cache: memory: 512Mi requests: cpu: "100m" - memory: 100Mi + memory: 100Mi ### Service account serviceAccount: @@ -221,3 +221,35 @@ env: data: - name: NETWORK value: "juno" + +secret: + feederGateway: + refreshInterval: 10m + secretStoreName: juno-store + secretStoreKind: ClusterSecretStore + targetName: juno-goerli # name of the k8s secret to be created + targetCreationPolicy: Owner + key: feeder-gateway # name of the secret to target secret manager + property: testnet # name of the property to retrieve from secret manager + version: "1" # version of secret + secretKey: testnet # name of the secret data key + data: + refreshInterval: 10m + secretStoreName: juno-store # external store name (ClusterSecretStore), it is used to connect to a secret manager. + secretStoreKind: ClusterSecretStore # external store name + targetName: juno-sepolia-common # name of the k8s secret to be created + targetCreationPolicy: Owner + dataFromKey: secret-store # name of the secret in secret manager (GCP secret manager) + +### Back up juno data and upload to R2 cloud +backupJunoDataJob: + enabled: true + dataSource: "juno-sepolia-pv-ssd-juno-sepolia-0" + backupSchedule: "0 3 * * *" + cleanupSchedule: "0 10 * * *" + network: "sepolia" + storageSize: 250Gi + bucketName: "juno-snapshot" + enableRetention: false + retentionLimit: 10 + junoFreeEndpoint: "https://free-rpc-staging.nethermind.dev"