Prometheus backup
Jump to navigation
Jump to search
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/
Big script example that might need some work
helm upgrade --install prometheus prometheus-community/prometheus \ --set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \ -f prometheus.values.yaml
backup(){ LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1) POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}") kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot } restore(){ kubectl scale deployment prometheus-server --replicas=0 -n monitoring kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data kubectl scale deployment prometheus-server --replicas=1 -n monitoring } az_snap(){ az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot } check(){ kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2 } other(){ #!/bin/bash NAMESPACE="prometheus" DEPLOYMENT="prometheus-server" PVC_NAME="prometheus-server" LOCAL_BACKUP_DIR="./prometheus-latest-snapshot" echo "🔍 Finding Prometheus pod..." POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}") if [[ -z "$POD_NAME" ]]; then echo "❌ Error: Prometheus pod not found!" exit 1 fi echo "⏳ Scaling down Prometheus..." kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE sleep 5 echo "📦 Creating temporary restore pod..." kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \ --image=busybox --overrides=' { "apiVersion": "v1", "kind": "Pod", "metadata": { "name": "restore-pod" }, "spec": { "containers": [{ "name": "restore-container", "image": "busybox", "command": ["sleep", "3600"], "volumeMounts": [{ "name": "prometheus-data", "mountPath": "/data" }] }], "volumes": [{ "name": "prometheus-data", "persistentVolumeClaim": { "claimName": "'"$PVC_NAME"'" } }] } }' -- sleep 3600 & # Wait for the restore pod to be ready echo "⏳ Waiting for restore pod to be ready..." sleep 10 echo "📤 Copying local snapshot to PVC..." kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data echo "✅ Restore completed! Deleting restore pod..." kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0 echo "🚀 Scaling up Prometheus..." kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE echo "🎉 Snapshot restore completed successfully!" }
/data or /var/lib/prometheus
cat backup-resore.sh kubectl exec -n monitoring <prometheus-pod-name> -- curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot kubectl cp monitoring/<prometheus-pod-name>:/data/snapshots ./prometheus-snapshots or shutdown kubectl scale deployment prometheus-server --replicas=0 -n monitoring kubectl cp monitoring/<prometheus-pod-name>:/data ./prometheus-backup kubectl scale deployment prometheus-server --replicas=1 -n monitoring or using snapshot pvc az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot