Add SMART alert scheduler

This commit is contained in:
2026-02-07 22:46:31 +03:00
parent e3a1321d3f
commit 745a5171a1
3 changed files with 44 additions and 1 deletions

View File

@@ -17,6 +17,10 @@ alerts:
interval_sec: 60 interval_sec: 60
cooldown_sec: 900 cooldown_sec: 900
notify_recovery: true notify_recovery: true
smart_enabled: true
smart_interval_sec: 3600
smart_cooldown_sec: 21600
smart_temp_warn: 50
docker: docker:
# If true, discover containers by name/label # If true, discover containers by name/label

View File

@@ -4,7 +4,7 @@ from datetime import datetime
from app import bot, dp, cfg, ADMIN_ID from app import bot, dp, cfg, ADMIN_ID
from keyboards import menu_kb from keyboards import menu_kb
from services.docker import discover_containers, docker_watchdog from services.docker import discover_containers, docker_watchdog
from services.alerts import monitor_resources from services.alerts import monitor_resources, monitor_smart
from services.notify import notify from services.notify import notify
import state import state
import handlers.menu import handlers.menu
@@ -32,6 +32,8 @@ async def main():
asyncio.create_task(docker_watchdog(state.DOCKER_MAP, notify, bot, ADMIN_ID)) asyncio.create_task(docker_watchdog(state.DOCKER_MAP, notify, bot, ADMIN_ID))
if cfg.get("alerts", {}).get("enabled", True): if cfg.get("alerts", {}).get("enabled", True):
asyncio.create_task(monitor_resources(cfg, notify, bot, ADMIN_ID)) asyncio.create_task(monitor_resources(cfg, notify, bot, ADMIN_ID))
if cfg.get("alerts", {}).get("smart_enabled", True):
asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID))
await notify_start() await notify_start()
await dp.start_polling(bot) await dp.start_polling(bot)

View File

@@ -1,6 +1,7 @@
import asyncio import asyncio
import time import time
import psutil import psutil
from system_checks import list_disks, smart_health, disk_temperature
from services.system import worst_disk_usage from services.system import worst_disk_usage
@@ -52,3 +53,39 @@ async def monitor_resources(cfg, notify, bot, chat_id):
state["load_high"] = False state["load_high"] = False
await asyncio.sleep(interval) await asyncio.sleep(interval)
async def monitor_smart(cfg, notify, bot, chat_id):
alerts_cfg = cfg.get("alerts", {})
interval = int(alerts_cfg.get("smart_interval_sec", 3600))
cooldown = int(alerts_cfg.get("smart_cooldown_sec", 6 * 3600))
temp_warn = int(alerts_cfg.get("smart_temp_warn", 50))
last_sent = {}
while True:
for dev in list_disks():
health = smart_health(dev)
temp = disk_temperature(dev)
key = f"{dev}:{health}:{temp}"
now = time.time()
if last_sent.get(key, 0) + cooldown > now:
continue
if "FAILED" in health:
await notify(bot, chat_id, f"🔴 SMART FAIL {dev}: {health}, 🌡 {temp}")
last_sent[key] = now
continue
if temp != "n/a":
try:
t = int(temp.replace("°C", ""))
except ValueError:
t = None
if t is not None and t >= temp_warn:
await notify(bot, chat_id, f"🟡 SMART HOT {dev}: {health}, 🌡 {temp}")
last_sent[key] = now
continue
await asyncio.sleep(interval)