Difference between revisions of "Prometheus backup"

From UVOO Tech Wiki
Jump to navigation Jump to search
 
Line 1: Line 1:
 
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/
 
https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/
 +
 +
 +
# Big script example that might need some work
 +
 +
```
 +
helm upgrade --install prometheus prometheus-community/prometheus \
 +
    --set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \
 +
    -f prometheus.values.yaml
 +
```
 +
 +
```
 +
backup(){
 +
  LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1)
 +
  POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}")
 +
  kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot
 +
}
 +
 +
restore(){
 +
  kubectl scale deployment prometheus-server --replicas=0 -n monitoring
 +
  kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data
 +
  kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old
 +
  kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data
 +
  kubectl scale deployment prometheus-server --replicas=1 -n monitoring
 +
}
 +
 +
az_snap(){
 +
  az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot
 +
}
 +
 +
check(){
 +
  kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2
 +
}
 +
 +
other(){
 +
  #!/bin/bash
 +
 +
  NAMESPACE="prometheus"
 +
  DEPLOYMENT="prometheus-server"
 +
  PVC_NAME="prometheus-server"
 +
  LOCAL_BACKUP_DIR="./prometheus-latest-snapshot"
 +
 +
  echo "🔍 Finding Prometheus pod..."
 +
  POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}")
 +
 +
  if [[ -z "$POD_NAME" ]]; then
 +
      echo "❌ Error: Prometheus pod not found!"
 +
      exit 1
 +
  fi
 +
 +
  echo "⏳ Scaling down Prometheus..."
 +
  kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE
 +
  sleep 5
 +
 +
  echo "📦 Creating temporary restore pod..."
 +
  kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \
 +
    --image=busybox --overrides='
 +
  {
 +
    "apiVersion": "v1",
 +
    "kind": "Pod",
 +
    "metadata": {
 +
      "name": "restore-pod"
 +
    },
 +
    "spec": {
 +
      "containers": [{
 +
        "name": "restore-container",
 +
        "image": "busybox",
 +
        "command": ["sleep", "3600"],
 +
        "volumeMounts": [{
 +
          "name": "prometheus-data",
 +
          "mountPath": "/data"
 +
        }]
 +
      }],
 +
      "volumes": [{
 +
        "name": "prometheus-data",
 +
        "persistentVolumeClaim": {
 +
          "claimName": "'"$PVC_NAME"'"
 +
        }
 +
      }]
 +
    }
 +
  }' -- sleep 3600 &
 +
 +
  # Wait for the restore pod to be ready
 +
  echo "⏳ Waiting for restore pod to be ready..."
 +
  sleep 10
 +
 +
  echo "📤 Copying local snapshot to PVC..."
 +
  kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data
 +
 +
  echo "✅ Restore completed! Deleting restore pod..."
 +
  kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0
 +
 +
  echo "🚀 Scaling up Prometheus..."
 +
  kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE
 +
 +
  echo "🎉 Snapshot restore completed successfully!"
 +
 +
}
 +
 +
```
  
 
/data or /var/lib/prometheus
 
/data or /var/lib/prometheus

Latest revision as of 09:02, 1 February 2025

https://devopstales.github.io/kubernetes/backup-and-retore-prometheus/

Big script example that might need some work

helm upgrade --install prometheus prometheus-community/prometheus \
    --set "server.extraFlags={web.enable-lifecycle,web.enable-admin-api}" \
    -f prometheus.values.yaml
backup(){
  LATEST_SNAPSHOT=$(kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- ls -t /data/snapshots/ | head -n 1)
  POD_NAME=$(kubectl get pods -n prometheus -l app.kubernetes.io/component=server -l app.kubernetes.io/instance=prometheus -l app.kubernetes.io/name=prometheus -o jsonpath="{.items[0].metadata.name}")
  kubectl -n prometheus cp $POD_NAME:/data/snapshots/$LATEST_SNAPSHOT -c prometheus-server ./prometheus-latest-snapshot
}

restore(){
  kubectl scale deployment prometheus-server --replicas=0 -n monitoring
  kubectl cp ./prometheus-latest-snapshot monitoring/<prometheus-pod-name>:/data
  kubectl exec -n monitoring <prometheus-pod-name> -- mv /data /data-old
  kubectl exec -n monitoring <prometheus-pod-name> -- mv /data/snapshots/<snapshot-id> /data
  kubectl scale deployment prometheus-server --replicas=1 -n monitoring
}

az_snap(){
  az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot
}

check(){
  kubectl exec -n prometheus deploy/prometheus-server -c prometheus-server -- find /data -mindepth 1 -maxdepth 2
}

other(){
  #!/bin/bash

  NAMESPACE="prometheus"
  DEPLOYMENT="prometheus-server"
  PVC_NAME="prometheus-server"
  LOCAL_BACKUP_DIR="./prometheus-latest-snapshot"

  echo "🔍 Finding Prometheus pod..."
  POD_NAME=$(kubectl get pods -n $NAMESPACE -l app=prometheus -o jsonpath="{.items[0].metadata.name}")

  if [[ -z "$POD_NAME" ]]; then
      echo "❌ Error: Prometheus pod not found!"
      exit 1
  fi

  echo "⏳ Scaling down Prometheus..."
  kubectl scale deployment $DEPLOYMENT --replicas=0 -n $NAMESPACE
  sleep 5

  echo "📦 Creating temporary restore pod..."
  kubectl run restore-pod --rm -it --restart=Never -n $NAMESPACE \
    --image=busybox --overrides='
  {
    "apiVersion": "v1",
    "kind": "Pod",
    "metadata": {
      "name": "restore-pod"
    },
    "spec": {
      "containers": [{
        "name": "restore-container",
        "image": "busybox",
        "command": ["sleep", "3600"],
        "volumeMounts": [{
          "name": "prometheus-data",
          "mountPath": "/data"
        }]
      }],
      "volumes": [{
        "name": "prometheus-data",
        "persistentVolumeClaim": {
          "claimName": "'"$PVC_NAME"'"
        }
      }]
    }
  }' -- sleep 3600 &

  # Wait for the restore pod to be ready
  echo "⏳ Waiting for restore pod to be ready..."
  sleep 10

  echo "📤 Copying local snapshot to PVC..."
  kubectl cp $LOCAL_BACKUP_DIR $NAMESPACE/restore-pod:/data

  echo "✅ Restore completed! Deleting restore pod..."
  kubectl delete pod restore-pod -n $NAMESPACE --force --grace-period=0

  echo "🚀 Scaling up Prometheus..."
  kubectl scale deployment $DEPLOYMENT --replicas=1 -n $NAMESPACE

  echo "🎉 Snapshot restore completed successfully!"

}

/data or /var/lib/prometheus

cat backup-resore.sh
kubectl exec -n monitoring <prometheus-pod-name> -- curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot

kubectl cp monitoring/<prometheus-pod-name>:/data/snapshots ./prometheus-snapshots

or shutdown

kubectl scale deployment prometheus-server --replicas=0 -n monitoring

kubectl cp monitoring/<prometheus-pod-name>:/data ./prometheus-backup

kubectl scale deployment prometheus-server --replicas=1 -n monitoring

or using snapshot pvc

az snapshot create --resource-group myResourceGroup --source myDisk --name prometheus-snapshot