import asyncio import time import psutil from system_checks import list_disks, smart_health, disk_temperature from services.system import worst_disk_usage from services.disk_report import build_disk_report async def monitor_resources(cfg, notify, bot, chat_id): alerts_cfg = cfg.get("alerts", {}) interval = int(alerts_cfg.get("interval_sec", 60)) cooldown = int(alerts_cfg.get("cooldown_sec", 900)) notify_recovery = bool(alerts_cfg.get("notify_recovery", True)) load_only_critical = bool(alerts_cfg.get("load_only_critical", False)) disk_warn = int(cfg.get("thresholds", {}).get("disk_warn", 80)) snapshot_warn = int(cfg.get("disk_report", {}).get("threshold", disk_warn)) snapshot_cooldown = int(cfg.get("disk_report", {}).get("cooldown_sec", 21600)) load_warn = float(cfg.get("thresholds", {}).get("load_warn", 2.0)) high_warn = float(cfg.get("thresholds", {}).get("high_load_warn", load_warn * 1.5)) last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0, "disk_report": 0.0} state = {"disk_high": False, "disk_na": False, "load_level": 0} while True: now = time.time() usage, mount = worst_disk_usage() if usage is None: if not state["disk_na"] or now - last_sent["disk_na"] >= cooldown: await notify(bot, chat_id, "⚠️ Disk usage n/a", level="warn", key="disk_na", category="disk") state["disk_na"] = True last_sent["disk_na"] = now else: if state["disk_na"] and notify_recovery and not load_only_critical: await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})", level="info", key="disk_ok", category="disk") state["disk_na"] = False if usage >= disk_warn: if not state["disk_high"] or now - last_sent["disk"] >= cooldown: await notify(bot, chat_id, f"🟡 Disk usage {usage}% ({mount})", level="warn", key="disk_high", category="disk") state["disk_high"] = True last_sent["disk"] = now else: if state["disk_high"] and notify_recovery and not load_only_critical: await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})", level="info", key="disk_ok", category="disk") state["disk_high"] = False if usage >= snapshot_warn and now - last_sent["disk_report"] >= snapshot_cooldown: report = await build_disk_report(cfg, mount or "/", usage) await notify(bot, chat_id, f"📦 Disk snapshot\n\n{report}", level="info", key="disk_snapshot", category="disk") last_sent["disk_report"] = now load = psutil.getloadavg()[0] if load >= high_warn: level = 2 elif load >= load_warn: level = 1 else: level = 0 if load_only_critical and level == 1: level = 0 if level == 0: if state["load_level"] > 0 and notify_recovery and not load_only_critical: await notify(bot, chat_id, f"🟢 Load OK: {load:.2f}", level="info", key="load_ok", category="load") state["load_level"] = 0 else: if level != state["load_level"] or now - last_sent["load"] >= cooldown: icon = "🔴" if level == 2 else "🟡" level_name = "critical" if level == 2 else "warn" key = "load_high_crit" if level == 2 else "load_high_warn" await notify(bot, chat_id, f"{icon} Load high: {load:.2f}", level=level_name, key=key, category="load") last_sent["load"] = now state["load_level"] = level await asyncio.sleep(interval) async def monitor_smart(cfg, notify, bot, chat_id): alerts_cfg = cfg.get("alerts", {}) interval = int(alerts_cfg.get("smart_interval_sec", 3600)) cooldown = int(alerts_cfg.get("smart_cooldown_sec", 6 * 3600)) temp_warn = int(alerts_cfg.get("smart_temp_warn", 50)) last_sent = {} while True: for dev in list_disks(): health = smart_health(dev) temp = disk_temperature(dev) key = f"{dev}:{health}:{temp}" now = time.time() if last_sent.get(key, 0) + cooldown > now: continue if "FAILED" in health: await notify( bot, chat_id, f"🔴 SMART FAIL {dev}: {health}, 🌡 {temp}", level="critical", key=f"smart_fail:{dev}", category="smart", ) last_sent[key] = now continue if temp != "n/a": try: t = int(temp.replace("°C", "")) except ValueError: t = None if t is not None and t >= temp_warn: await notify( bot, chat_id, f"🟡 SMART HOT {dev}: {health}, 🌡 {temp}", level="warn", key=f"smart_hot:{dev}", category="smart", ) last_sent[key] = now continue await asyncio.sleep(interval)