Difference between revisions of "Prometheus backup"
Jump to navigation
Jump to search
| Line 1: | Line 1: | ||
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/ | https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/ | ||
| + | |||
| + | |||
| + | # Big script example that might need some work | ||
| + | |||
| + | ``` | ||
| + | helm upgrade --install prometheus prometheus-community/prometheus \ | ||
| + | --set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \ | ||
| + | -f prometheus.values.yaml | ||
| + | ``` | ||
| + | |||
| + | ``` | ||
| + | backup(){ | ||
| + | LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1) | ||
| + | POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}") | ||
| + | kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot | ||
| + | } | ||
| + | |||
| + | restore(){ | ||
| + | kubectl scale deployment prometheus-server --replicas=0 -n monitoring | ||
| + | kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data | ||
| + | kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old | ||
| + | kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data | ||
| + | kubectl scale deployment prometheus-server --replicas=1 -n monitoring | ||
| + | } | ||
| + | |||
| + | az_snap(){ | ||
| + | az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot | ||
| + | } | ||
| + | |||
| + | check(){ | ||
| + | kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2 | ||
| + | } | ||
| + | |||
| + | other(){ | ||
| + | #!/bin/bash | ||
| + | |||
| + | NAMESPACE="prometheus" | ||
| + | DEPLOYMENT="prometheus-server" | ||
| + | PVC_NAME="prometheus-server" | ||
| + | LOCAL_BACKUP_DIR="./prometheus-latest-snapshot" | ||
| + | |||
| + | echo "🔍 Finding Prometheus pod..." | ||
| + | POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}") | ||
| + | |||
| + | if [[ -z "$POD_NAME" ]]; then | ||
| + | echo "❌ Error: Prometheus pod not found!" | ||
| + | exit 1 | ||
| + | fi | ||
| + | |||
| + | echo "⏳ Scaling down Prometheus..." | ||
| + | kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE | ||
| + | sleep 5 | ||
| + | |||
| + | echo "📦 Creating temporary restore pod..." | ||
| + | kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \ | ||
| + | --image=busybox --overrides=' | ||
| + | { | ||
| + | "apiVersion": "v1", | ||
| + | "kind": "Pod", | ||
| + | "metadata": { | ||
| + | "name": "restore-pod" | ||
| + | }, | ||
| + | "spec": { | ||
| + | "containers": [{ | ||
| + | "name": "restore-container", | ||
| + | "image": "busybox", | ||
| + | "command": ["sleep", "3600"], | ||
| + | "volumeMounts": [{ | ||
| + | "name": "prometheus-data", | ||
| + | "mountPath": "/data" | ||
| + | }] | ||
| + | }], | ||
| + | "volumes": [{ | ||
| + | "name": "prometheus-data", | ||
| + | "persistentVolumeClaim": { | ||
| + | "claimName": "'"$PVC_NAME"'" | ||
| + | } | ||
| + | }] | ||
| + | } | ||
| + | }' -- sleep 3600 & | ||
| + | |||
| + | # Wait for the restore pod to be ready | ||
| + | echo "⏳ Waiting for restore pod to be ready..." | ||
| + | sleep 10 | ||
| + | |||
| + | echo "📤 Copying local snapshot to PVC..." | ||
| + | kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data | ||
| + | |||
| + | echo "✅ Restore completed! Deleting restore pod..." | ||
| + | kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0 | ||
| + | |||
| + | echo "🚀 Scaling up Prometheus..." | ||
| + | kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE | ||
| + | |||
| + | echo "🎉 Snapshot restore completed successfully!" | ||
| + | |||
| + | } | ||
| + | |||
| + | ``` | ||
/data or /var/lib/prometheus | /data or /var/lib/prometheus | ||
Latest revision as of 09:02, 1 February 2025
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/
Big script example that might need some work
helm upgrade --install prometheus prometheus-community/prometheus \
--set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \
-f prometheus.values.yaml
backup(){
LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1)
POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}")
kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot
}
restore(){
kubectl scale deployment prometheus-server --replicas=0 -n monitoring
kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data
kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old
kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data
kubectl scale deployment prometheus-server --replicas=1 -n monitoring
}
az_snap(){
az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot
}
check(){
kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2
}
other(){
#!/bin/bash
NAMESPACE="prometheus"
DEPLOYMENT="prometheus-server"
PVC_NAME="prometheus-server"
LOCAL_BACKUP_DIR="./prometheus-latest-snapshot"
echo "🔍 Finding Prometheus pod..."
POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}")
if [[ -z "$POD_NAME" ]]; then
echo "❌ Error: Prometheus pod not found!"
exit 1
fi
echo "⏳ Scaling down Prometheus..."
kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE
sleep 5
echo "📦 Creating temporary restore pod..."
kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \
--image=busybox --overrides='
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"name": "restore-pod"
},
"spec": {
"containers": [{
"name": "restore-container",
"image": "busybox",
"command": ["sleep", "3600"],
"volumeMounts": [{
"name": "prometheus-data",
"mountPath": "/data"
}]
}],
"volumes": [{
"name": "prometheus-data",
"persistentVolumeClaim": {
"claimName": "'"$PVC_NAME"'"
}
}]
}
}' -- sleep 3600 &
# Wait for the restore pod to be ready
echo "⏳ Waiting for restore pod to be ready..."
sleep 10
echo "📤 Copying local snapshot to PVC..."
kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data
echo "✅ Restore completed! Deleting restore pod..."
kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0
echo "🚀 Scaling up Prometheus..."
kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE
echo "🎉 Snapshot restore completed successfully!"
}
/data or /var/lib/prometheus
cat backup-resore.sh kubectl exec -n monitoring <prometheus-pod-name> -- curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot kubectl cp monitoring/<prometheus-pod-name>:/data/snapshots ./prometheus-snapshots or shutdown kubectl scale deployment prometheus-server --replicas=0 -n monitoring kubectl cp monitoring/<prometheus-pod-name>:/data ./prometheus-backup kubectl scale deployment prometheus-server --replicas=1 -n monitoring or using snapshot pvc az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot