Difference between revisions of "Prometheus backup"
Jump to navigation
Jump to search
Line 1: | Line 1: | ||
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/ | https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/ | ||
+ | |||
+ | |||
+ | # Big script example that might need some work | ||
+ | |||
+ | ``` | ||
+ | helm upgrade --install prometheus prometheus-community/prometheus \ | ||
+ | --set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \ | ||
+ | -f prometheus.values.yaml | ||
+ | ``` | ||
+ | |||
+ | ``` | ||
+ | backup(){ | ||
+ | LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1) | ||
+ | POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}") | ||
+ | kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot | ||
+ | } | ||
+ | |||
+ | restore(){ | ||
+ | kubectl scale deployment prometheus-server --replicas=0 -n monitoring | ||
+ | kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data | ||
+ | kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old | ||
+ | kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data | ||
+ | kubectl scale deployment prometheus-server --replicas=1 -n monitoring | ||
+ | } | ||
+ | |||
+ | az_snap(){ | ||
+ | az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot | ||
+ | } | ||
+ | |||
+ | check(){ | ||
+ | kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2 | ||
+ | } | ||
+ | |||
+ | other(){ | ||
+ | #!/bin/bash | ||
+ | |||
+ | NAMESPACE="prometheus" | ||
+ | DEPLOYMENT="prometheus-server" | ||
+ | PVC_NAME="prometheus-server" | ||
+ | LOCAL_BACKUP_DIR="./prometheus-latest-snapshot" | ||
+ | |||
+ | echo "🔍 Finding Prometheus pod..." | ||
+ | POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}") | ||
+ | |||
+ | if [[ -z "$POD_NAME" ]]; then | ||
+ | echo "❌ Error: Prometheus pod not found!" | ||
+ | exit 1 | ||
+ | fi | ||
+ | |||
+ | echo "⏳ Scaling down Prometheus..." | ||
+ | kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE | ||
+ | sleep 5 | ||
+ | |||
+ | echo "📦 Creating temporary restore pod..." | ||
+ | kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \ | ||
+ | --image=busybox --overrides=' | ||
+ | { | ||
+ | "apiVersion": "v1", | ||
+ | "kind": "Pod", | ||
+ | "metadata": { | ||
+ | "name": "restore-pod" | ||
+ | }, | ||
+ | "spec": { | ||
+ | "containers": [{ | ||
+ | "name": "restore-container", | ||
+ | "image": "busybox", | ||
+ | "command": ["sleep", "3600"], | ||
+ | "volumeMounts": [{ | ||
+ | "name": "prometheus-data", | ||
+ | "mountPath": "/data" | ||
+ | }] | ||
+ | }], | ||
+ | "volumes": [{ | ||
+ | "name": "prometheus-data", | ||
+ | "persistentVolumeClaim": { | ||
+ | "claimName": "'"$PVC_NAME"'" | ||
+ | } | ||
+ | }] | ||
+ | } | ||
+ | }' -- sleep 3600 & | ||
+ | |||
+ | # Wait for the restore pod to be ready | ||
+ | echo "⏳ Waiting for restore pod to be ready..." | ||
+ | sleep 10 | ||
+ | |||
+ | echo "📤 Copying local snapshot to PVC..." | ||
+ | kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data | ||
+ | |||
+ | echo "✅ Restore completed! Deleting restore pod..." | ||
+ | kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0 | ||
+ | |||
+ | echo "🚀 Scaling up Prometheus..." | ||
+ | kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE | ||
+ | |||
+ | echo "🎉 Snapshot restore completed successfully!" | ||
+ | |||
+ | } | ||
+ | |||
+ | ``` | ||
/data or /var/lib/prometheus | /data or /var/lib/prometheus |
Latest revision as of 09:02, 1 February 2025
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/
Big script example that might need some work
helm upgrade --install prometheus prometheus-community/prometheus \ --set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \ -f prometheus.values.yaml
backup(){ LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1) POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}") kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot } restore(){ kubectl scale deployment prometheus-server --replicas=0 -n monitoring kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data kubectl scale deployment prometheus-server --replicas=1 -n monitoring } az_snap(){ az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot } check(){ kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2 } other(){ #!/bin/bash NAMESPACE="prometheus" DEPLOYMENT="prometheus-server" PVC_NAME="prometheus-server" LOCAL_BACKUP_DIR="./prometheus-latest-snapshot" echo "🔍 Finding Prometheus pod..." POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}") if [[ -z "$POD_NAME" ]]; then echo "❌ Error: Prometheus pod not found!" exit 1 fi echo "⏳ Scaling down Prometheus..." kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE sleep 5 echo "📦 Creating temporary restore pod..." kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \ --image=busybox --overrides=' { "apiVersion": "v1", "kind": "Pod", "metadata": { "name": "restore-pod" }, "spec": { "containers": [{ "name": "restore-container", "image": "busybox", "command": ["sleep", "3600"], "volumeMounts": [{ "name": "prometheus-data", "mountPath": "/data" }] }], "volumes": [{ "name": "prometheus-data", "persistentVolumeClaim": { "claimName": "'"$PVC_NAME"'" } }] } }' -- sleep 3600 & # Wait for the restore pod to be ready echo "⏳ Waiting for restore pod to be ready..." sleep 10 echo "📤 Copying local snapshot to PVC..." kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data echo "✅ Restore completed! Deleting restore pod..." kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0 echo "🚀 Scaling up Prometheus..." kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE echo "🎉 Snapshot restore completed successfully!" }
/data or /var/lib/prometheus
cat backup-resore.sh kubectl exec -n monitoring <prometheus-pod-name> -- curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot kubectl cp monitoring/<prometheus-pod-name>:/data/snapshots ./prometheus-snapshots or shutdown kubectl scale deployment prometheus-server --replicas=0 -n monitoring kubectl cp monitoring/<prometheus-pod-name>:/data ./prometheus-backup kubectl scale deployment prometheus-server --replicas=1 -n monitoring or using snapshot pvc az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot