diff --git a/config.example.yaml b/config.example.yaml index 2b360d8..243adc2 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -17,6 +17,10 @@ alerts: interval_sec: 60 cooldown_sec: 900 notify_recovery: true + smart_enabled: true + smart_interval_sec: 3600 + smart_cooldown_sec: 21600 + smart_temp_warn: 50 docker: # If true, discover containers by name/label diff --git a/main.py b/main.py index f5c1b1b..d8dc96e 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ from datetime import datetime from app import bot, dp, cfg, ADMIN_ID from keyboards import menu_kb from services.docker import discover_containers, docker_watchdog -from services.alerts import monitor_resources +from services.alerts import monitor_resources, monitor_smart from services.notify import notify import state import handlers.menu @@ -32,6 +32,8 @@ async def main(): asyncio.create_task(docker_watchdog(state.DOCKER_MAP, notify, bot, ADMIN_ID)) if cfg.get("alerts", {}).get("enabled", True): asyncio.create_task(monitor_resources(cfg, notify, bot, ADMIN_ID)) + if cfg.get("alerts", {}).get("smart_enabled", True): + asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID)) await notify_start() await dp.start_polling(bot) diff --git a/services/alerts.py b/services/alerts.py index 94a28da..69ce2d6 100644 --- a/services/alerts.py +++ b/services/alerts.py @@ -1,6 +1,7 @@ import asyncio import time import psutil +from system_checks import list_disks, smart_health, disk_temperature from services.system import worst_disk_usage @@ -52,3 +53,39 @@ async def monitor_resources(cfg, notify, bot, chat_id): state["load_high"] = False await asyncio.sleep(interval) + + +async def monitor_smart(cfg, notify, bot, chat_id): + alerts_cfg = cfg.get("alerts", {}) + interval = int(alerts_cfg.get("smart_interval_sec", 3600)) + cooldown = int(alerts_cfg.get("smart_cooldown_sec", 6 * 3600)) + temp_warn = int(alerts_cfg.get("smart_temp_warn", 50)) + + last_sent = {} + + while True: + for dev in list_disks(): + health = smart_health(dev) + temp = disk_temperature(dev) + + key = f"{dev}:{health}:{temp}" + now = time.time() + if last_sent.get(key, 0) + cooldown > now: + continue + + if "FAILED" in health: + await notify(bot, chat_id, f"🔴 SMART FAIL {dev}: {health}, 🌡 {temp}") + last_sent[key] = now + continue + + if temp != "n/a": + try: + t = int(temp.replace("°C", "")) + except ValueError: + t = None + if t is not None and t >= temp_warn: + await notify(bot, chat_id, f"🟡 SMART HOT {dev}: {health}, 🌡 {temp}") + last_sent[key] = now + continue + + await asyncio.sleep(interval)