Add quiet hours, health checks, and logging
This commit is contained in:
@@ -27,27 +27,27 @@ async def monitor_resources(cfg, notify, bot, chat_id):
|
||||
usage, mount = worst_disk_usage()
|
||||
if usage is None:
|
||||
if not state["disk_na"] or now - last_sent["disk_na"] >= cooldown:
|
||||
await notify(bot, chat_id, "⚠️ Disk usage n/a")
|
||||
await notify(bot, chat_id, "⚠️ Disk usage n/a", level="warn", key="disk_na")
|
||||
state["disk_na"] = True
|
||||
last_sent["disk_na"] = now
|
||||
else:
|
||||
if state["disk_na"] and notify_recovery:
|
||||
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})")
|
||||
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})", level="info", key="disk_ok")
|
||||
state["disk_na"] = False
|
||||
|
||||
if usage >= disk_warn:
|
||||
if not state["disk_high"] or now - last_sent["disk"] >= cooldown:
|
||||
await notify(bot, chat_id, f"🟡 Disk usage {usage}% ({mount})")
|
||||
await notify(bot, chat_id, f"🟡 Disk usage {usage}% ({mount})", level="warn", key="disk_high")
|
||||
state["disk_high"] = True
|
||||
last_sent["disk"] = now
|
||||
else:
|
||||
if state["disk_high"] and notify_recovery:
|
||||
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})")
|
||||
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})", level="info", key="disk_ok")
|
||||
state["disk_high"] = False
|
||||
|
||||
if usage >= snapshot_warn and now - last_sent["disk_report"] >= snapshot_cooldown:
|
||||
report = await build_disk_report(cfg, mount or "/", usage)
|
||||
await notify(bot, chat_id, f"📦 Disk snapshot\n\n{report}")
|
||||
await notify(bot, chat_id, f"📦 Disk snapshot\n\n{report}", level="info", key="disk_snapshot")
|
||||
last_sent["disk_report"] = now
|
||||
|
||||
load = psutil.getloadavg()[0]
|
||||
@@ -60,12 +60,14 @@ async def monitor_resources(cfg, notify, bot, chat_id):
|
||||
|
||||
if level == 0:
|
||||
if state["load_level"] > 0 and notify_recovery:
|
||||
await notify(bot, chat_id, f"🟢 Load OK: {load:.2f}")
|
||||
await notify(bot, chat_id, f"🟢 Load OK: {load:.2f}", level="info", key="load_ok")
|
||||
state["load_level"] = 0
|
||||
else:
|
||||
if level != state["load_level"] or now - last_sent["load"] >= cooldown:
|
||||
icon = "🔴" if level == 2 else "🟡"
|
||||
await notify(bot, chat_id, f"{icon} Load high: {load:.2f}")
|
||||
level_name = "critical" if level == 2 else "warn"
|
||||
key = "load_high_crit" if level == 2 else "load_high_warn"
|
||||
await notify(bot, chat_id, f"{icon} Load high: {load:.2f}", level=level_name, key=key)
|
||||
last_sent["load"] = now
|
||||
state["load_level"] = level
|
||||
|
||||
@@ -91,7 +93,13 @@ async def monitor_smart(cfg, notify, bot, chat_id):
|
||||
continue
|
||||
|
||||
if "FAILED" in health:
|
||||
await notify(bot, chat_id, f"🔴 SMART FAIL {dev}: {health}, 🌡 {temp}")
|
||||
await notify(
|
||||
bot,
|
||||
chat_id,
|
||||
f"🔴 SMART FAIL {dev}: {health}, 🌡 {temp}",
|
||||
level="critical",
|
||||
key=f"smart_fail:{dev}",
|
||||
)
|
||||
last_sent[key] = now
|
||||
continue
|
||||
|
||||
@@ -101,7 +109,13 @@ async def monitor_smart(cfg, notify, bot, chat_id):
|
||||
except ValueError:
|
||||
t = None
|
||||
if t is not None and t >= temp_warn:
|
||||
await notify(bot, chat_id, f"🟡 SMART HOT {dev}: {health}, 🌡 {temp}")
|
||||
await notify(
|
||||
bot,
|
||||
chat_id,
|
||||
f"🟡 SMART HOT {dev}: {health}, 🌡 {temp}",
|
||||
level="warn",
|
||||
key=f"smart_hot:{dev}",
|
||||
)
|
||||
last_sent[key] = now
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user