Prometheus backup

From UVOO Tech Wiki
Revision as of 09:02, 1 February 2025 by Busk (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/

Big script example that might need some work

helm upgrade --install prometheus prometheus-community/prometheus \
    --set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \
    -f prometheus.values.yaml
backup(){
  LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1)
  POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}")
  kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot
}

restore(){
  kubectl scale deployment prometheus-server --replicas=0 -n monitoring
  kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data
  kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old
  kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data
  kubectl scale deployment prometheus-server --replicas=1 -n monitoring
}

az_snap(){
  az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot
}

check(){
  kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2
}

other(){
  #!/bin/bash

  NAMESPACE="prometheus"
  DEPLOYMENT="prometheus-server"
  PVC_NAME="prometheus-server"
  LOCAL_BACKUP_DIR="./prometheus-latest-snapshot"

  echo "🔍 Finding Prometheus pod..."
  POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}")

  if [[ -z "$POD_NAME" ]]; then
      echo "❌ Error: Prometheus pod not found!"
      exit 1
  fi

  echo "⏳ Scaling down Prometheus..."
  kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE
  sleep 5

  echo "📦 Creating temporary restore pod..."
  kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \
    --image=busybox --overrides='
  {
    "apiVersion": "v1",
    "kind": "Pod",
    "metadata": {
      "name": "restore-pod"
    },
    "spec": {
      "containers": [{
        "name": "restore-container",
        "image": "busybox",
        "command": ["sleep", "3600"],
        "volumeMounts": [{
          "name": "prometheus-data",
          "mountPath": "/data"
        }]
      }],
      "volumes": [{
        "name": "prometheus-data",
        "persistentVolumeClaim": {
          "claimName": "'"$PVC_NAME"'"
        }
      }]
    }
  }' -- sleep 3600 &

  # Wait for the restore pod to be ready
  echo "⏳ Waiting for restore pod to be ready..."
  sleep 10

  echo "📤 Copying local snapshot to PVC..."
  kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data

  echo "✅ Restore completed! Deleting restore pod..."
  kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0

  echo "🚀 Scaling up Prometheus..."
  kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE

  echo "🎉 Snapshot restore completed successfully!"

}

/data or /var/lib/prometheus

cat backup-resore.sh
kubectl exec -n monitoring <prometheus-pod-name> -- curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot

kubectl cp monitoring/<prometheus-pod-name>:/data/snapshots ./prometheus-snapshots

or shutdown

kubectl scale deployment prometheus-server --replicas=0 -n monitoring

kubectl cp monitoring/<prometheus-pod-name>:/data ./prometheus-backup

kubectl scale deployment prometheus-server --replicas=1 -n monitoring

or using snapshot pvc

az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot