Add periodic resource alerts
This commit is contained in:
@@ -12,6 +12,12 @@ thresholds:
|
|||||||
disk_warn: 80
|
disk_warn: 80
|
||||||
load_warn: 2.0
|
load_warn: 2.0
|
||||||
|
|
||||||
|
alerts:
|
||||||
|
enabled: true
|
||||||
|
interval_sec: 60
|
||||||
|
cooldown_sec: 900
|
||||||
|
notify_recovery: true
|
||||||
|
|
||||||
docker:
|
docker:
|
||||||
# If true, discover containers by name/label
|
# If true, discover containers by name/label
|
||||||
autodiscovery: true
|
autodiscovery: true
|
||||||
|
|||||||
3
main.py
3
main.py
@@ -4,6 +4,7 @@ from datetime import datetime
|
|||||||
from app import bot, dp, cfg, ADMIN_ID
|
from app import bot, dp, cfg, ADMIN_ID
|
||||||
from keyboards import menu_kb
|
from keyboards import menu_kb
|
||||||
from services.docker import discover_containers, docker_watchdog
|
from services.docker import discover_containers, docker_watchdog
|
||||||
|
from services.alerts import monitor_resources
|
||||||
from services.notify import notify
|
from services.notify import notify
|
||||||
import state
|
import state
|
||||||
import handlers.menu
|
import handlers.menu
|
||||||
@@ -29,6 +30,8 @@ async def main():
|
|||||||
state.DOCKER_MAP.update(await discover_containers(cfg))
|
state.DOCKER_MAP.update(await discover_containers(cfg))
|
||||||
if cfg.get("docker", {}).get("watchdog", True):
|
if cfg.get("docker", {}).get("watchdog", True):
|
||||||
asyncio.create_task(docker_watchdog(state.DOCKER_MAP, notify, bot, ADMIN_ID))
|
asyncio.create_task(docker_watchdog(state.DOCKER_MAP, notify, bot, ADMIN_ID))
|
||||||
|
if cfg.get("alerts", {}).get("enabled", True):
|
||||||
|
asyncio.create_task(monitor_resources(cfg, notify, bot, ADMIN_ID))
|
||||||
await notify_start()
|
await notify_start()
|
||||||
await dp.start_polling(bot)
|
await dp.start_polling(bot)
|
||||||
|
|
||||||
|
|||||||
54
services/alerts.py
Normal file
54
services/alerts.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
import psutil
|
||||||
|
from services.system import worst_disk_usage
|
||||||
|
|
||||||
|
|
||||||
|
async def monitor_resources(cfg, notify, bot, chat_id):
|
||||||
|
alerts_cfg = cfg.get("alerts", {})
|
||||||
|
interval = int(alerts_cfg.get("interval_sec", 60))
|
||||||
|
cooldown = int(alerts_cfg.get("cooldown_sec", 900))
|
||||||
|
notify_recovery = bool(alerts_cfg.get("notify_recovery", True))
|
||||||
|
|
||||||
|
disk_warn = int(cfg.get("thresholds", {}).get("disk_warn", 80))
|
||||||
|
load_warn = float(cfg.get("thresholds", {}).get("load_warn", 2.0))
|
||||||
|
|
||||||
|
last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0}
|
||||||
|
state = {"disk_high": False, "load_high": False, "disk_na": False}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
usage, mount = worst_disk_usage()
|
||||||
|
if usage is None:
|
||||||
|
if not state["disk_na"] or now - last_sent["disk_na"] >= cooldown:
|
||||||
|
await notify(bot, chat_id, "⚠️ Disk usage n/a")
|
||||||
|
state["disk_na"] = True
|
||||||
|
last_sent["disk_na"] = now
|
||||||
|
else:
|
||||||
|
if state["disk_na"] and notify_recovery:
|
||||||
|
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})")
|
||||||
|
state["disk_na"] = False
|
||||||
|
|
||||||
|
if usage >= disk_warn:
|
||||||
|
if not state["disk_high"] or now - last_sent["disk"] >= cooldown:
|
||||||
|
await notify(bot, chat_id, f"🟡 Disk usage {usage}% ({mount})")
|
||||||
|
state["disk_high"] = True
|
||||||
|
last_sent["disk"] = now
|
||||||
|
else:
|
||||||
|
if state["disk_high"] and notify_recovery:
|
||||||
|
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})")
|
||||||
|
state["disk_high"] = False
|
||||||
|
|
||||||
|
load = psutil.getloadavg()[0]
|
||||||
|
if load >= load_warn:
|
||||||
|
if not state["load_high"] or now - last_sent["load"] >= cooldown:
|
||||||
|
await notify(bot, chat_id, f"🟡 Load high: {load:.2f}")
|
||||||
|
state["load_high"] = True
|
||||||
|
last_sent["load"] = now
|
||||||
|
else:
|
||||||
|
if state["load_high"] and notify_recovery:
|
||||||
|
await notify(bot, chat_id, f"🟢 Load OK: {load:.2f}")
|
||||||
|
state["load_high"] = False
|
||||||
|
|
||||||
|
await asyncio.sleep(interval)
|
||||||
Reference in New Issue
Block a user