import os import ssl import subprocess import psutil from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen from app import RESTIC_ENV from services.system import worst_disk_usage def _containers_from_cfg(cfg) -> dict: return cfg.get("docker", {}).get("containers", {}) def _request_status(url: str, verify_tls: bool) -> int | None: context = None if not verify_tls: context = ssl._create_unverified_context() # nosec - config-controlled req = Request(url, headers={"User-Agent": "tg-admin-bot"}) try: with urlopen(req, timeout=8, context=context) as resp: return int(resp.status) except HTTPError as e: return int(e.code) except URLError: return None def _npm_api_base(cfg) -> str | None: npm_cfg = cfg.get("npmplus", {}) base = (npm_cfg.get("base_url") or "").rstrip("/") if not base: return None if not base.endswith("/api"): base = f"{base}/api" return base def health(cfg, container_map: dict | None = None) -> str: lines = ["🩺 Health check\n"] thresholds = cfg.get("thresholds", {}) disk_warn = int(thresholds.get("disk_warn", 80)) load_warn = float(thresholds.get("load_warn", 2.0)) try: env = os.environ.copy() env.update(RESTIC_ENV) subprocess.check_output(["restic", "snapshots"], timeout=10, env=env) lines.append("🟢 Backup repo reachable") except Exception: lines.append("🔴 Backup repo unreachable") containers = container_map if container_map is not None else _containers_from_cfg(cfg) for alias, real in containers.items(): out = subprocess.getoutput( f"docker inspect -f '{{{{.State.Status}}}}' {real}" ) if out.strip() != "running": lines.append(f"🔴 {alias} down") else: lines.append(f"🟢 {alias} OK") npm_cfg = cfg.get("npmplus", {}) npm_base = _npm_api_base(cfg) if npm_base: npm_status = _request_status(npm_base, npm_cfg.get("verify_tls", True)) if npm_status == 200: lines.append("🟢 NPMplus API OK") elif npm_status is None: lines.append("🔴 NPMplus API unreachable") else: lines.append(f"🟡 NPMplus API HTTP {npm_status}") g_cfg = cfg.get("gitea", {}) g_base = (g_cfg.get("base_url") or "").rstrip("/") if g_base: health_paths = ["/api/healthz", "/api/v1/healthz"] g_status = None for path in health_paths: status = _request_status(f"{g_base}{path}", g_cfg.get("verify_tls", True)) if status == 200: g_status = status break if status not in (404, 405): g_status = status break if g_status == 200: lines.append("🟢 Gitea API OK") elif g_status is None: lines.append("🔴 Gitea API unreachable") else: lines.append(f"🟡 Gitea API HTTP {g_status}") usage, mount = worst_disk_usage() if usage is None: lines.append("⚠️ Disk n/a") elif usage > disk_warn: lines.append(f"🟡 Disk {usage}% ({mount})") else: lines.append(f"🟢 Disk {usage}% ({mount})") load = psutil.getloadavg()[0] lines.append(f"{'🟢' if load < load_warn else '🟡'} Load {load}") return "\n".join(lines)