Sumo jira example

From UVOO Tech Wiki
Revision as of 20:23, 11 November 2024 by Busk (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Sumo, Pagerduty, Jira Automation based on logs per seconds

Requirements

pip install requests

Service via Systemd

/etc/systemd/system/sumo-monitor

[Unit]
Description=Sumo Logic Monitor Service
After=network.target

[Service]
ExecStart=/usr/bin/python3 /path/to/sumo-monitor.py
Restart=always
User=nobody
Group=nogroup

[Install]
WantedBy=multi-user.target

Pagerduty, Jira, Sumo

import requests
import time
from requests.auth import HTTPBasicAuth

# Configuration
SUMO_API_URL = "https://api.sumologic.com/api/v1/collectors"
SUMO_SEARCH_URL = "https://api.sumologic.com/api/v1/logs/search"
SUMO_ACCESS_ID = "your_sumo_access_id"
SUMO_ACCESS_KEY = "your_sumo_access_key"
COLLECTOR_ID = "your_collector_id"
SEARCH_QUERY = '_sourceCategory=your_source_category | count by _sourceHost'
CHECK_INTERVAL = 600  # 600 seconds

PAGERDUTY_API_URL = "https://events.pagerduty.com/v2/enqueue"
PAGERDUTY_ROUTING_KEY = "your_pagerduty_routing_key"

def check_collector_health():
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Basic {SUMO_ACCESS_ID}:{SUMO_ACCESS_KEY}'
    }
    response = requests.get(f"{SUMO_API_URL}/{COLLECTOR_ID}", headers=headers)
    if response.status_code == 200:
        collector = response.json()
        return collector['collector']['alive']
    else:
        print(f"Failed to get collector health: {response.status_code}")
        return False

def check_log_count():
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Basic {SUMO_ACCESS_ID}:{SUMO_ACCESS_KEY}'
    }
    params = {
        'q': SEARCH_QUERY,
        'from': 'now-10m',
        'to': 'now'
    }
    response = requests.get(SUMO_SEARCH_URL, headers=headers, params=params)
    if response.status_code == 200:
        logs = response.json()
        return logs['count'] > 10
    else:
        print(f"Failed to get log count: {response.status_code}")
        return False

def create_pagerduty_alert():
    headers = {
        'Content-Type': 'application/json'
    }
    data = {
        "routing_key": PAGERDUTY_ROUTING_KEY,
        "event_action": "trigger",
        "payload": {
            "summary": "Log count below threshold",
            "severity": "critical",
            "source": "sumo_monitor.py",
            "component": "log_monitor",
            "group": "log_group",
            "class": "log_class",
            "custom_details": {
                "description": "The log count is below 10 in the last 600 seconds."
            }
        }
    }
    response = requests.post(PAGERDUTY_API_URL, headers=headers, json=data)
    if response.status_code == 202:
        print("PagerDuty alert created successfully.")
    else:
        print(f"Failed to create PagerDuty alert: {response.status_code} - {response.text}")

def main():
    while True:
        collector_health = check_collector_health()
        log_count_ok = check_log_count()

        if collector_health and log_count_ok:
            print("Collector health is good and log count is sufficient.")
        else:
            print("Collector health or log count check failed.")
            if not log_count_ok:
                create_pagerduty_alert()

        time.sleep(CHECK_INTERVAL)

if __name__ == "__main__":
    main()

NO PD

sudo systemctl daemon-reload
sudo systemctl enable sumo-monitor.service
sudo systemctl start sumo-monitor.service
sudo systemctl status sumo-monitor.service

sumo-monitor.py

import requests
import time
from requests.auth import HTTPBasicAuth

# Configuration
SUMO_API_URL = "https://api.sumologic.com/api/v1/collectors"
SUMO_SEARCH_URL = "https://api.sumologic.com/api/v1/logs/search"
SUMO_ACCESS_ID = "your_sumo_access_id"
SUMO_ACCESS_KEY = "your_sumo_access_key"
COLLECTOR_ID = "your_collector_id"
SEARCH_QUERY = '_sourceCategory=your_source_category | count by _sourceHost'
CHECK_INTERVAL = 600  # 600 seconds

JIRA_API_URL = "https://your_jira_instance.atlassian.net/rest/api/2/issue"
JIRA_USERNAME = "your_jira_username"
JIRA_API_TOKEN = "your_jira_api_token"
JIRA_PROJECT_KEY = "your_project_key"
JIRA_ISSUE_TYPE = "Task"

def check_collector_health():
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Basic {SUMO_ACCESS_ID}:{SUMO_ACCESS_KEY}'
    }
    response = requests.get(f"{SUMO_API_URL}/{COLLECTOR_ID}", headers=headers)
    if response.status_code == 200:
        collector = response.json()
        return collector['collector']['alive']
    else:
        print(f"Failed to get collector health: {response.status_code}")
        return False

def check_log_count():
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Basic {SUMO_ACCESS_ID}:{SUMO_ACCESS_KEY}'
    }
    params = {
        'q': SEARCH_QUERY,
        'from': 'now-10m',
        'to': 'now'
    }
    response = requests.get(SUMO_SEARCH_URL, headers=headers, params=params)
    if response.status_code == 200:
        logs = response.json()
        return logs['count'] > 10
    else:
        print(f"Failed to get log count: {response.status_code}")
        return False

def create_jira_ticket():
    headers = {
        'Content-Type': 'application/json'
    }
    auth = HTTPBasicAuth(JIRA_USERNAME, JIRA_API_TOKEN)
    data = {
        "fields": {
            "project": {
                "key": JIRA_PROJECT_KEY
            },
            "summary": "Log count below threshold",
            "description": "The log count is below 10 in the last 600 seconds.",
            "issuetype": {
                "name": JIRA_ISSUE_TYPE
            }
        }
    }
    response = requests.post(JIRA_API_URL, headers=headers, auth=auth, json=data)
    if response.status_code == 201:
        print("Jira ticket created successfully.")
    else:
        print(f"Failed to create Jira ticket: {response.status_code} - {response.text}")

def main():
    while True:
        collector_health = check_collector_health()
        log_count_ok = check_log_count()

        if collector_health and log_count_ok:
            print("Collector health is good and log count is sufficient.")
        else:
            print("Collector health or log count check failed.")
            if not log_count_ok:
                create_jira_ticket()

        time.sleep(CHECK_INTERVAL)

if __name__ == "__main__":
    main()