import os import subprocess import psutil from app import RESTIC_ENV from services.system import worst_disk_usage def _containers_from_cfg(cfg) -> dict: return cfg.get("docker", {}).get("containers", {}) def health(cfg, container_map: dict | None = None) -> str: lines = ["🩺 Health check\n"] try: env = os.environ.copy() env.update(RESTIC_ENV) subprocess.check_output(["restic", "snapshots"], timeout=10, env=env) lines.append("🟢 Backup repo reachable") except Exception: lines.append("🔴 Backup repo unreachable") containers = container_map if container_map is not None else _containers_from_cfg(cfg) for alias, real in containers.items(): out = subprocess.getoutput( f"docker inspect -f '{{{{.State.Status}}}}' {real}" ) if out.strip() != "running": lines.append(f"🔴 {alias} down") else: lines.append(f"🟢 {alias} OK") usage, mount = worst_disk_usage() if usage is None: lines.append("⚠️ Disk n/a") elif usage > cfg["thresholds"]["disk_warn"]: lines.append(f"🟡 Disk {usage}% ({mount})") else: lines.append(f"🟢 Disk {usage}% ({mount})") load = psutil.getloadavg()[0] lines.append(f"{'🟢' if load < cfg['thresholds']['load_warn'] else '🟡'} Load {load}") return "\n".join(lines)