Prometheus backup
Jump to navigation
Jump to search
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/
Big script example that might need some work
helm upgrade --install prometheus prometheus-community/prometheus \
--set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \
-f prometheus.values.yaml
backup(){
LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1)
POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}")
kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot
}
restore(){
kubectl scale deployment prometheus-server --replicas=0 -n monitoring
kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data
kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old
kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data
kubectl scale deployment prometheus-server --replicas=1 -n monitoring
}
az_snap(){
az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot
}
check(){
kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2
}
other(){
#!/bin/bash
NAMESPACE="prometheus"
DEPLOYMENT="prometheus-server"
PVC_NAME="prometheus-server"
LOCAL_BACKUP_DIR="./prometheus-latest-snapshot"
echo "🔍 Finding Prometheus pod..."
POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}")
if [[ -z "$POD_NAME" ]]; then
echo "❌ Error: Prometheus pod not found!"
exit 1
fi
echo "⏳ Scaling down Prometheus..."
kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE
sleep 5
echo "📦 Creating temporary restore pod..."
kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \
--image=busybox --overrides='
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"name": "restore-pod"
},
"spec": {
"containers": [{
"name": "restore-container",
"image": "busybox",
"command": ["sleep", "3600"],
"volumeMounts": [{
"name": "prometheus-data",
"mountPath": "/data"
}]
}],
"volumes": [{
"name": "prometheus-data",
"persistentVolumeClaim": {
"claimName": "'"$PVC_NAME"'"
}
}]
}
}' -- sleep 3600 &
# Wait for the restore pod to be ready
echo "⏳ Waiting for restore pod to be ready..."
sleep 10
echo "📤 Copying local snapshot to PVC..."
kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data
echo "✅ Restore completed! Deleting restore pod..."
kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0
echo "🚀 Scaling up Prometheus..."
kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE
echo "🎉 Snapshot restore completed successfully!"
}
/data or /var/lib/prometheus
cat backup-resore.sh kubectl exec -n monitoring <prometheus-pod-name> -- curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot kubectl cp monitoring/<prometheus-pod-name>:/data/snapshots ./prometheus-snapshots or shutdown kubectl scale deployment prometheus-server --replicas=0 -n monitoring kubectl cp monitoring/<prometheus-pod-name>:/data ./prometheus-backup kubectl scale deployment prometheus-server --replicas=1 -n monitoring or using snapshot pvc az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot