Add disk usage snapshot reports
This commit is contained in:
@@ -29,6 +29,14 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
|
|||||||
- `smart_cooldown_sec` (int): SMART alert cooldown.
|
- `smart_cooldown_sec` (int): SMART alert cooldown.
|
||||||
- `smart_temp_warn` (int): SMART temperature warning (C).
|
- `smart_temp_warn` (int): SMART temperature warning (C).
|
||||||
|
|
||||||
|
## disk_report
|
||||||
|
|
||||||
|
- `threshold` (int): Disk usage threshold for auto snapshot.
|
||||||
|
- `cooldown_sec` (int): Cooldown between snapshots.
|
||||||
|
- `top_dirs` (int): How many directories to show.
|
||||||
|
- `docker_dir` (string): Path to docker data.
|
||||||
|
- `logs_dir` (string): Path to logs.
|
||||||
|
|
||||||
## audit
|
## audit
|
||||||
|
|
||||||
- `enabled` (bool): Enable audit logging.
|
- `enabled` (bool): Enable audit logging.
|
||||||
|
|||||||
@@ -29,6 +29,14 @@
|
|||||||
- `smart_cooldown_sec` (int): кулдаун SMART.
|
- `smart_cooldown_sec` (int): кулдаун SMART.
|
||||||
- `smart_temp_warn` (int): порог температуры (C).
|
- `smart_temp_warn` (int): порог температуры (C).
|
||||||
|
|
||||||
|
## disk_report
|
||||||
|
|
||||||
|
- `threshold` (int): порог диска для авто‑снимка.
|
||||||
|
- `cooldown_sec` (int): кулдаун между снимками.
|
||||||
|
- `top_dirs` (int): сколько директорий показывать.
|
||||||
|
- `docker_dir` (string): путь к docker данным.
|
||||||
|
- `logs_dir` (string): путь к логам.
|
||||||
|
|
||||||
## audit
|
## audit
|
||||||
|
|
||||||
- `enabled` (bool): включить аудит.
|
- `enabled` (bool): включить аудит.
|
||||||
|
|||||||
@@ -23,6 +23,13 @@ alerts:
|
|||||||
smart_cooldown_sec: 21600
|
smart_cooldown_sec: 21600
|
||||||
smart_temp_warn: 50
|
smart_temp_warn: 50
|
||||||
|
|
||||||
|
disk_report:
|
||||||
|
threshold: 90
|
||||||
|
cooldown_sec: 21600
|
||||||
|
top_dirs: 8
|
||||||
|
docker_dir: "/var/lib/docker"
|
||||||
|
logs_dir: "/var/log"
|
||||||
|
|
||||||
audit:
|
audit:
|
||||||
enabled: true
|
enabled: true
|
||||||
path: "/var/server-bot/audit.log"
|
path: "/var/server-bot/audit.log"
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import time
|
|||||||
import psutil
|
import psutil
|
||||||
from system_checks import list_disks, smart_health, disk_temperature
|
from system_checks import list_disks, smart_health, disk_temperature
|
||||||
from services.system import worst_disk_usage
|
from services.system import worst_disk_usage
|
||||||
|
from services.disk_report import build_disk_report
|
||||||
|
|
||||||
|
|
||||||
async def monitor_resources(cfg, notify, bot, chat_id):
|
async def monitor_resources(cfg, notify, bot, chat_id):
|
||||||
@@ -12,10 +13,12 @@ async def monitor_resources(cfg, notify, bot, chat_id):
|
|||||||
notify_recovery = bool(alerts_cfg.get("notify_recovery", True))
|
notify_recovery = bool(alerts_cfg.get("notify_recovery", True))
|
||||||
|
|
||||||
disk_warn = int(cfg.get("thresholds", {}).get("disk_warn", 80))
|
disk_warn = int(cfg.get("thresholds", {}).get("disk_warn", 80))
|
||||||
|
snapshot_warn = int(cfg.get("disk_report", {}).get("threshold", disk_warn))
|
||||||
|
snapshot_cooldown = int(cfg.get("disk_report", {}).get("cooldown_sec", 21600))
|
||||||
load_warn = float(cfg.get("thresholds", {}).get("load_warn", 2.0))
|
load_warn = float(cfg.get("thresholds", {}).get("load_warn", 2.0))
|
||||||
high_warn = float(cfg.get("thresholds", {}).get("high_load_warn", load_warn * 1.5))
|
high_warn = float(cfg.get("thresholds", {}).get("high_load_warn", load_warn * 1.5))
|
||||||
|
|
||||||
last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0}
|
last_sent = {"disk": 0.0, "load": 0.0, "disk_na": 0.0, "disk_report": 0.0}
|
||||||
state = {"disk_high": False, "disk_na": False, "load_level": 0}
|
state = {"disk_high": False, "disk_na": False, "load_level": 0}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@@ -42,6 +45,11 @@ async def monitor_resources(cfg, notify, bot, chat_id):
|
|||||||
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})")
|
await notify(bot, chat_id, f"🟢 Disk usage OK ({usage}% {mount})")
|
||||||
state["disk_high"] = False
|
state["disk_high"] = False
|
||||||
|
|
||||||
|
if usage >= snapshot_warn and now - last_sent["disk_report"] >= snapshot_cooldown:
|
||||||
|
report = await build_disk_report(cfg, mount or "/", usage)
|
||||||
|
await notify(bot, chat_id, f"📦 Disk snapshot\n\n{report}")
|
||||||
|
last_sent["disk_report"] = now
|
||||||
|
|
||||||
load = psutil.getloadavg()[0]
|
load = psutil.getloadavg()[0]
|
||||||
if load >= high_warn:
|
if load >= high_warn:
|
||||||
level = 2
|
level = 2
|
||||||
|
|||||||
38
services/disk_report.py
Normal file
38
services/disk_report.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from services.runner import run_cmd
|
||||||
|
|
||||||
|
|
||||||
|
def _top_dirs_cmd(path: str, limit: int) -> list[str]:
|
||||||
|
return ["bash", "-lc", f"du -xhd1 {path} 2>/dev/null | sort -h | tail -n {limit}"]
|
||||||
|
|
||||||
|
|
||||||
|
async def build_disk_report(cfg: dict[str, Any], mount: str, usage: int) -> str:
|
||||||
|
limit = int(cfg.get("disk_report", {}).get("top_dirs", 8))
|
||||||
|
|
||||||
|
lines = ["🧱 Disk report", f"💽 {mount}: {usage}%"]
|
||||||
|
|
||||||
|
rc, out = await run_cmd(_top_dirs_cmd(mount, limit), timeout=30)
|
||||||
|
if rc == 0 and out.strip():
|
||||||
|
lines.append("")
|
||||||
|
lines.append("Top directories:")
|
||||||
|
lines.append(out.strip())
|
||||||
|
|
||||||
|
docker_dir = cfg.get("disk_report", {}).get("docker_dir", "/var/lib/docker")
|
||||||
|
if docker_dir and os.path.exists(docker_dir):
|
||||||
|
rc2, out2 = await run_cmd(_top_dirs_cmd(docker_dir, limit), timeout=30)
|
||||||
|
if rc2 == 0 and out2.strip():
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"Docker dir: {docker_dir}")
|
||||||
|
lines.append(out2.strip())
|
||||||
|
|
||||||
|
logs_dir = cfg.get("disk_report", {}).get("logs_dir", "/var/log")
|
||||||
|
if logs_dir and os.path.exists(logs_dir):
|
||||||
|
rc3, out3 = await run_cmd(_top_dirs_cmd(logs_dir, limit), timeout=30)
|
||||||
|
if rc3 == 0 and out3.strip():
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"Logs dir: {logs_dir}")
|
||||||
|
lines.append(out3.strip())
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
Reference in New Issue
Block a user