K8s monitor namespace pd

From UVOO Tech Wiki
Revision as of 10:57, 29 January 2025 by Busk (talk | contribs) (Created page with "``` # Use Golang as the base image FROM golang:1.20 AS builder # Set working directory WORKDIR /app # Copy Go modules and download dependencies COPY go.mod go.sum ./ RUN go...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
# Use Golang as the base image
FROM golang:1.20 AS builder

# Set working directory
WORKDIR /app

# Copy Go modules and download dependencies
COPY go.mod go.sum ./
RUN go mod download

# Copy the rest of the application code
COPY . .

# Build the Go binary
RUN go build -o health-checker main.go

# Use a minimal base image
FROM alpine:latest

# Install required dependencies
RUN apk --no-cache add ca-certificates

# Set working directory
WORKDIR /root/

# Copy the built binary from the builder stage
COPY --from=builder /app/health-checker .

# Run the application
CMD ["./health-checker"]

Go

package main

import (
    "context"
    "encoding/json"
    "fmt"
    "log"
    "net/http"
    "os"

    "k8s.io/client-go/kubernetes"
    "k8s.io/client-go/rest"
    metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const pagerDutyURL = "https://events.pagerduty.com/v2/enqueue"

// PagerDuty event payload
type PagerDutyEvent struct {
    RoutingKey  string `json:"routing_key"`
    EventAction string `json:"event_action"`
    Payload     struct {
        Summary   string `json:"summary"`
        Severity  string `json:"severity"`
        Source    string `json:"source"`
        Component string `json:"component"`
    } `json:"payload"`
}

// Send alert to PagerDuty
func sendPagerDutyAlert(nodeName string) {
    pdKey := os.Getenv("PAGERDUTY_KEY")
    if pdKey == "" {
        log.Println("PagerDuty API key not set")
        return
    }

    event := PagerDutyEvent{
        RoutingKey:  pdKey,
        EventAction: "trigger",
    }
    event.Payload.Summary = fmt.Sprintf("Node %s is unhealthy", nodeName)
    event.Payload.Severity = "critical"
    event.Payload.Source = "Kubernetes"
    event.Payload.Component = nodeName

    data, err := json.Marshal(event)
    if err != nil {
        log.Printf("Error marshaling JSON: %v\n", err)
        return
    }

    resp, err := http.Post(pagerDutyURL, "application/json", bytes.NewBuffer(data))
    if err != nil {
        log.Printf("Error sending alert to PagerDuty: %v\n", err)
        return
    }
    defer resp.Body.Close()

    log.Println("PagerDuty alert sent successfully")
}

// Check Kubernetes node health
func checkNodeHealth() {
    config, err := rest.InClusterConfig()
    if err != nil {
        log.Fatalf("Error getting Kubernetes config: %v\n", err)
    }

    clientset, err := kubernetes.NewForConfig(config)
    if err != nil {
        log.Fatalf("Error creating Kubernetes client: %v\n", err)
    }

    nodes, err := clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
    if err != nil {
        log.Fatalf("Error listing nodes: %v\n", err)
    }

    for _, node := range nodes.Items {
        for _, condition := range node.Status.Conditions {
            if condition.Type == "Ready" && condition.Status != "True" {
                log.Printf("Node %s is unhealthy\n", node.Name)
                sendPagerDutyAlert(node.Name)
            }
        }
    }
}

func main() {
    for {
        checkNodeHealth()
        // Sleep before checking again
        time.Sleep(60 * time.Second)
    }
}

```