diff --git a/CONFIG.en.md b/CONFIG.en.md index 28995e1..5a2782e 100644 --- a/CONFIG.en.md +++ b/CONFIG.en.md @@ -29,6 +29,14 @@ This project uses `config.yaml`. Start from `config.example.yaml`. - `smart_cooldown_sec` (int): SMART alert cooldown. - `smart_temp_warn` (int): SMART temperature warning (C). +## disk_report + +- `threshold` (int): Disk usage threshold for auto snapshot. +- `cooldown_sec` (int): Cooldown between snapshots. +- `top_dirs` (int): How many directories to show. +- `docker_dir` (string): Path to docker data. +- `logs_dir` (string): Path to logs. + ## audit - `enabled` (bool): Enable audit logging. diff --git a/CONFIG.md b/CONFIG.md index a6aa387..a15ec48 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -29,6 +29,14 @@ - `smart_cooldown_sec` (int): кулдаун SMART. - `smart_temp_warn` (int): порог температуры (C). +## disk_report + +- `threshold` (int): порог диска для авто‑снимка. +- `cooldown_sec` (int): кулдаун между снимками. +- `top_dirs` (int): сколько директорий показывать. +- `docker_dir` (string): путь к docker данным. +- `logs_dir` (string): путь к логам. + ## audit - `enabled` (bool): включить аудит. diff --git a/config.example.yaml b/config.example.yaml index 90e21ad..27863c1 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -23,6 +23,13 @@ alerts: smart_cooldown_sec: 21600 smart_temp_warn: 50 +disk_report: + threshold: 90 + cooldown_sec: 21600 + top_dirs: 8 + docker_dir: "/var/lib/docker" + logs_dir: "/var/log" + audit: enabled: true path: "/var/server-bot/audit.log" diff --git a/services/alerts.py b/services/alerts.py index 5150396..c4cb8af 100644 --- a/services/alerts.py +++ b/services/alerts.py @@ -3,6 +3,7 @@ import time import psutil from system_checks import list_disks, smart_health, disk_temperature from services.system import worst_disk_usage +from services.disk_report import build_disk_report async def monitor_resources(cfg, notify, bot, chat_id): @@ -12,10 +13,12 @@ async def monitor_resources(cfg, notify, bot, chat_id): notify_recovery = bool(alerts_cfg.get("notify_recovery", True)) disk_warn = int(cfg.get("thresholds", {}).get("disk_warn", 80)) + snapshot_warn = int(cfg.get("disk_report", {}).get("threshold", disk_warn)) + snapshot_cooldown = int(cfg.get("disk_report", {}).get("cooldown_sec", 21600)) load_warn = float(cfg.get("thresholds", {}).get("load_warn", 2.0)) high_warn = float(cfg.get("thresholds", {}).get("high_load_warn", load_warn * 1.5)) - last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0} + last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0, "disk_report": 0.0} state = {"disk_high": False, "disk_na": False, "load_level": 0} while True: @@ -42,6 +45,11 @@ async def monitor_resources(cfg, notify, bot, chat_id): await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})") state["disk_high"] = False + if usage >= snapshot_warn and now - last_sent["disk_report"] >= snapshot_cooldown: + report = await build_disk_report(cfg, mount or "/", usage) + await notify(bot, chat_id, f"📦 Disk snapshot\n\n{report}") + last_sent["disk_report"] = now + load = psutil.getloadavg()[0] if load >= high_warn: level = 2 diff --git a/services/disk_report.py b/services/disk_report.py new file mode 100644 index 0000000..2734603 --- /dev/null +++ b/services/disk_report.py @@ -0,0 +1,38 @@ +import os +from typing import Any + +from services.runner import run_cmd + + +def _top_dirs_cmd(path: str, limit: int) -> list[str]: + return ["bash", "-lc", f"du -xhd1 {path} 2>/dev/null | sort -h | tail -n {limit}"] + + +async def build_disk_report(cfg: dict[str, Any], mount: str, usage: int) -> str: + limit = int(cfg.get("disk_report", {}).get("top_dirs", 8)) + + lines = ["🧱 Disk report", f"💽 {mount}: {usage}%"] + + rc, out = await run_cmd(_top_dirs_cmd(mount, limit), timeout=30) + if rc == 0 and out.strip(): + lines.append("") + lines.append("Top directories:") + lines.append(out.strip()) + + docker_dir = cfg.get("disk_report", {}).get("docker_dir", "/var/lib/docker") + if docker_dir and os.path.exists(docker_dir): + rc2, out2 = await run_cmd(_top_dirs_cmd(docker_dir, limit), timeout=30) + if rc2 == 0 and out2.strip(): + lines.append("") + lines.append(f"Docker dir: {docker_dir}") + lines.append(out2.strip()) + + logs_dir = cfg.get("disk_report", {}).get("logs_dir", "/var/log") + if logs_dir and os.path.exists(logs_dir): + rc3, out3 = await run_cmd(_top_dirs_cmd(logs_dir, limit), timeout=30) + if rc3 == 0 and out3.strip(): + lines.append("") + lines.append(f"Logs dir: {logs_dir}") + lines.append(out3.strip()) + + return "\n".join(lines)