103 lines
3.3 KiB
Python
103 lines
3.3 KiB
Python
import os
|
|
import ssl
|
|
import subprocess
|
|
import psutil
|
|
from urllib.error import HTTPError, URLError
|
|
from urllib.request import Request, urlopen
|
|
from app import RESTIC_ENV
|
|
from services.system import worst_disk_usage
|
|
|
|
|
|
def _containers_from_cfg(cfg) -> dict:
|
|
return cfg.get("docker", {}).get("containers", {})
|
|
|
|
|
|
def _request_status(url: str, verify_tls: bool) -> int | None:
|
|
context = None
|
|
if not verify_tls:
|
|
context = ssl._create_unverified_context() # nosec - config-controlled
|
|
req = Request(url, headers={"User-Agent": "tg-admin-bot"})
|
|
try:
|
|
with urlopen(req, timeout=8, context=context) as resp:
|
|
return int(resp.status)
|
|
except HTTPError as e:
|
|
return int(e.code)
|
|
except URLError:
|
|
return None
|
|
|
|
|
|
def _npm_api_base(cfg) -> str | None:
|
|
npm_cfg = cfg.get("npmplus", {})
|
|
base = (npm_cfg.get("base_url") or "").rstrip("/")
|
|
if not base:
|
|
return None
|
|
if not base.endswith("/api"):
|
|
base = f"{base}/api"
|
|
return base
|
|
|
|
|
|
def health(cfg, container_map: dict | None = None) -> str:
|
|
lines = ["🩺 Health check\n"]
|
|
|
|
try:
|
|
env = os.environ.copy()
|
|
env.update(RESTIC_ENV)
|
|
subprocess.check_output(["restic", "snapshots"], timeout=10, env=env)
|
|
lines.append("🟢 Backup repo reachable")
|
|
except Exception:
|
|
lines.append("🔴 Backup repo unreachable")
|
|
|
|
containers = container_map if container_map is not None else _containers_from_cfg(cfg)
|
|
for alias, real in containers.items():
|
|
out = subprocess.getoutput(
|
|
f"docker inspect -f '{{{{.State.Status}}}}' {real}"
|
|
)
|
|
if out.strip() != "running":
|
|
lines.append(f"🔴 {alias} down")
|
|
else:
|
|
lines.append(f"🟢 {alias} OK")
|
|
|
|
npm_cfg = cfg.get("npmplus", {})
|
|
npm_base = _npm_api_base(cfg)
|
|
if npm_base:
|
|
npm_status = _request_status(npm_base, npm_cfg.get("verify_tls", True))
|
|
if npm_status == 200:
|
|
lines.append("🟢 NPMplus API OK")
|
|
elif npm_status is None:
|
|
lines.append("🔴 NPMplus API unreachable")
|
|
else:
|
|
lines.append(f"🟡 NPMplus API HTTP {npm_status}")
|
|
|
|
g_cfg = cfg.get("gitea", {})
|
|
g_base = (g_cfg.get("base_url") or "").rstrip("/")
|
|
if g_base:
|
|
health_paths = ["/api/healthz", "/api/v1/healthz"]
|
|
g_status = None
|
|
for path in health_paths:
|
|
status = _request_status(f"{g_base}{path}", g_cfg.get("verify_tls", True))
|
|
if status == 200:
|
|
g_status = status
|
|
break
|
|
if status not in (404, 405):
|
|
g_status = status
|
|
break
|
|
if g_status == 200:
|
|
lines.append("🟢 Gitea API OK")
|
|
elif g_status is None:
|
|
lines.append("🔴 Gitea API unreachable")
|
|
else:
|
|
lines.append(f"🟡 Gitea API HTTP {g_status}")
|
|
|
|
usage, mount = worst_disk_usage()
|
|
if usage is None:
|
|
lines.append("⚠️ Disk n/a")
|
|
elif usage > cfg["thresholds"]["disk_warn"]:
|
|
lines.append(f"🟡 Disk {usage}% ({mount})")
|
|
else:
|
|
lines.append(f"🟢 Disk {usage}% ({mount})")
|
|
|
|
load = psutil.getloadavg()[0]
|
|
lines.append(f"{'🟢' if load < cfg['thresholds']['load_warn'] else '🟡'} Load {load}")
|
|
|
|
return "\n".join(lines)
|