Add disk usage snapshot reports

This commit is contained in:
2026-02-08 02:21:15 +03:00
parent 3df9db3bf7
commit c34a142698
5 changed files with 70 additions and 1 deletions

View File

@@ -29,6 +29,14 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
- `smart_cooldown_sec` (int): SMART alert cooldown. - `smart_cooldown_sec` (int): SMART alert cooldown.
- `smart_temp_warn` (int): SMART temperature warning (C). - `smart_temp_warn` (int): SMART temperature warning (C).
## disk_report
- `threshold` (int): Disk usage threshold for auto snapshot.
- `cooldown_sec` (int): Cooldown between snapshots.
- `top_dirs` (int): How many directories to show.
- `docker_dir` (string): Path to docker data.
- `logs_dir` (string): Path to logs.
## audit ## audit
- `enabled` (bool): Enable audit logging. - `enabled` (bool): Enable audit logging.

View File

@@ -29,6 +29,14 @@
- `smart_cooldown_sec` (int): кулдаун SMART. - `smart_cooldown_sec` (int): кулдаун SMART.
- `smart_temp_warn` (int): порог температуры (C). - `smart_temp_warn` (int): порог температуры (C).
## disk_report
- `threshold` (int): порог диска для авто‑снимка.
- `cooldown_sec` (int): кулдаун между снимками.
- `top_dirs` (int): сколько директорий показывать.
- `docker_dir` (string): путь к docker данным.
- `logs_dir` (string): путь к логам.
## audit ## audit
- `enabled` (bool): включить аудит. - `enabled` (bool): включить аудит.

View File

@@ -23,6 +23,13 @@ alerts:
smart_cooldown_sec: 21600 smart_cooldown_sec: 21600
smart_temp_warn: 50 smart_temp_warn: 50
disk_report:
threshold: 90
cooldown_sec: 21600
top_dirs: 8
docker_dir: "/var/lib/docker"
logs_dir: "/var/log"
audit: audit:
enabled: true enabled: true
path: "/var/server-bot/audit.log" path: "/var/server-bot/audit.log"

View File

@@ -3,6 +3,7 @@ import time
import psutil import psutil
from system_checks import list_disks, smart_health, disk_temperature from system_checks import list_disks, smart_health, disk_temperature
from services.system import worst_disk_usage from services.system import worst_disk_usage
from services.disk_report import build_disk_report
async def monitor_resources(cfg, notify, bot, chat_id): async def monitor_resources(cfg, notify, bot, chat_id):
@@ -12,10 +13,12 @@ async def monitor_resources(cfg, notify, bot, chat_id):
notify_recovery = bool(alerts_cfg.get("notify_recovery", True)) notify_recovery = bool(alerts_cfg.get("notify_recovery", True))
disk_warn = int(cfg.get("thresholds", {}).get("disk_warn", 80)) disk_warn = int(cfg.get("thresholds", {}).get("disk_warn", 80))
snapshot_warn = int(cfg.get("disk_report", {}).get("threshold", disk_warn))
snapshot_cooldown = int(cfg.get("disk_report", {}).get("cooldown_sec", 21600))
load_warn = float(cfg.get("thresholds", {}).get("load_warn", 2.0)) load_warn = float(cfg.get("thresholds", {}).get("load_warn", 2.0))
high_warn = float(cfg.get("thresholds", {}).get("high_load_warn", load_warn * 1.5)) high_warn = float(cfg.get("thresholds", {}).get("high_load_warn", load_warn * 1.5))
last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0} last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0, "disk_report": 0.0}
state = {"disk_high": False, "disk_na": False, "load_level": 0} state = {"disk_high": False, "disk_na": False, "load_level": 0}
while True: while True:
@@ -42,6 +45,11 @@ async def monitor_resources(cfg, notify, bot, chat_id):
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})") await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})")
state["disk_high"] = False state["disk_high"] = False
if usage >= snapshot_warn and now - last_sent["disk_report"] >= snapshot_cooldown:
report = await build_disk_report(cfg, mount or "/", usage)
await notify(bot, chat_id, f"📦 Disk snapshot\n\n{report}")
last_sent["disk_report"] = now
load = psutil.getloadavg()[0] load = psutil.getloadavg()[0]
if load >= high_warn: if load >= high_warn:
level = 2 level = 2

38
services/disk_report.py Normal file
View File

@@ -0,0 +1,38 @@
import os
from typing import Any
from services.runner import run_cmd
def _top_dirs_cmd(path: str, limit: int) -> list[str]:
return ["bash", "-lc", f"du -xhd1 {path} 2>/dev/null | sort -h | tail -n {limit}"]
async def build_disk_report(cfg: dict[str, Any], mount: str, usage: int) -> str:
limit = int(cfg.get("disk_report", {}).get("top_dirs", 8))
lines = ["🧱 Disk report", f"💽 {mount}: {usage}%"]
rc, out = await run_cmd(_top_dirs_cmd(mount, limit), timeout=30)
if rc == 0 and out.strip():
lines.append("")
lines.append("Top directories:")
lines.append(out.strip())
docker_dir = cfg.get("disk_report", {}).get("docker_dir", "/var/lib/docker")
if docker_dir and os.path.exists(docker_dir):
rc2, out2 = await run_cmd(_top_dirs_cmd(docker_dir, limit), timeout=30)
if rc2 == 0 and out2.strip():
lines.append("")
lines.append(f"Docker dir: {docker_dir}")
lines.append(out2.strip())
logs_dir = cfg.get("disk_report", {}).get("logs_dir", "/var/log")
if logs_dir and os.path.exists(logs_dir):
rc3, out3 = await run_cmd(_top_dirs_cmd(logs_dir, limit), timeout=30)
if rc3 == 0 and out3.strip():
lines.append("")
lines.append(f"Logs dir: {logs_dir}")
lines.append(out3.strip())
return "\n".join(lines)