Add dedicated RAID alert category and monitor
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import asyncio
|
||||
import time
|
||||
import psutil
|
||||
from system_checks import list_disks, smart_health, disk_temperature
|
||||
from system_checks import list_disks, smart_health, disk_temperature, list_md_arrays, md_array_status
|
||||
from services.system import worst_disk_usage
|
||||
from services.disk_report import build_disk_report
|
||||
|
||||
@@ -130,3 +130,54 @@ async def monitor_smart(cfg, notify, bot, chat_id):
|
||||
continue
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
async def monitor_raid(cfg, notify, bot, chat_id):
|
||||
alerts_cfg = cfg.get("alerts", {})
|
||||
interval = int(alerts_cfg.get("raid_interval_sec", 300))
|
||||
cooldown = int(alerts_cfg.get("raid_cooldown_sec", 1800))
|
||||
notify_recovery = bool(alerts_cfg.get("notify_recovery", True))
|
||||
|
||||
last_sent: dict[str, float] = {}
|
||||
bad_state: dict[str, bool] = {}
|
||||
|
||||
while True:
|
||||
now = time.time()
|
||||
for dev in list_md_arrays():
|
||||
status = md_array_status(dev)
|
||||
lower = status.lower()
|
||||
level = None
|
||||
key_suffix = None
|
||||
if "inactive" in lower:
|
||||
level = "critical"
|
||||
key_suffix = "inactive"
|
||||
elif "degraded" in lower:
|
||||
level = "warn"
|
||||
key_suffix = "degraded"
|
||||
|
||||
if level:
|
||||
if not bad_state.get(dev) or (now - last_sent.get(dev, 0.0) >= cooldown):
|
||||
icon = "🔴" if level == "critical" else "🟡"
|
||||
await notify(
|
||||
bot,
|
||||
chat_id,
|
||||
f"{icon} RAID {dev}: {status}",
|
||||
level=level,
|
||||
key=f"raid_{key_suffix}:{dev}",
|
||||
category="raid",
|
||||
)
|
||||
last_sent[dev] = now
|
||||
bad_state[dev] = True
|
||||
else:
|
||||
if bad_state.get(dev) and notify_recovery:
|
||||
await notify(
|
||||
bot,
|
||||
chat_id,
|
||||
f"🟢 RAID {dev}: {status}",
|
||||
level="info",
|
||||
key=f"raid_ok:{dev}",
|
||||
category="raid",
|
||||
)
|
||||
bad_state[dev] = False
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
Reference in New Issue
Block a user