Add safe config fallbacks for app init and health checks

This commit is contained in:
2026-02-15 01:16:58 +03:00
parent 6d5fb9c258
commit b54a094185
3 changed files with 25 additions and 20 deletions

2
app.py
View File

@@ -15,7 +15,7 @@ else:
paths_cfg = cfg.get("paths", {})
runtime_state.configure(paths_cfg.get("runtime_state", "/var/server-bot/runtime.json"))
ARTIFACT_STATE = paths_cfg["artifact_state"]
ARTIFACT_STATE = paths_cfg.get("artifact_state", "/opt/tg-bot/state.json")
RESTIC_ENV = load_env(paths_cfg.get("restic_env", "/etc/restic/restic.env"))
DISK_WARN = int(cfg.get("thresholds", {}).get("disk_warn", 80))

View File

@@ -9,7 +9,9 @@ def validate_cfg(cfg: dict[str, Any]) -> Tuple[List[str], List[str]]:
tg = cfg.get("telegram", {})
if not tg.get("token"):
errors.append("telegram.token is missing")
if not tg.get("admin_id"):
admin_ids = tg.get("admin_ids")
has_admin_ids = isinstance(admin_ids, list) and len(admin_ids) > 0
if not tg.get("admin_id") and not has_admin_ids:
errors.append("telegram.admin_id is missing")
thresholds = cfg.get("thresholds", {})

View File

@@ -1,4 +1,4 @@
import os
import os
import ssl
import subprocess
import psutil
@@ -37,15 +37,17 @@ def _npm_api_base(cfg) -> str | None:
def health(cfg, container_map: dict | None = None) -> str:
lines = ["🩺 Health check\n"]
lines = ["рџ©є Health check\n"]
thresholds = cfg.get("thresholds", {})
disk_warn = int(thresholds.get("disk_warn", 80))
load_warn = float(thresholds.get("load_warn", 2.0))
try:
env = os.environ.copy()
env.update(RESTIC_ENV)
subprocess.check_output(["restic", "snapshots"], timeout=10, env=env)
lines.append("🟢 Backup repo reachable")
lines.append("рџџў Backup repo reachable")
except Exception:
lines.append("🔴 Backup repo unreachable")
lines.append("🔴 Backup repo unreachable")
containers = container_map if container_map is not None else _containers_from_cfg(cfg)
for alias, real in containers.items():
@@ -53,20 +55,20 @@ def health(cfg, container_map: dict | None = None) -> str:
f"docker inspect -f '{{{{.State.Status}}}}' {real}"
)
if out.strip() != "running":
lines.append(f"🔴 {alias} down")
lines.append(f"🔴 {alias} down")
else:
lines.append(f"🟢 {alias} OK")
lines.append(f"рџџў {alias} OK")
npm_cfg = cfg.get("npmplus", {})
npm_base = _npm_api_base(cfg)
if npm_base:
npm_status = _request_status(npm_base, npm_cfg.get("verify_tls", True))
if npm_status == 200:
lines.append("🟢 NPMplus API OK")
lines.append("рџџў NPMplus API OK")
elif npm_status is None:
lines.append("🔴 NPMplus API unreachable")
lines.append("🔴 NPMplus API unreachable")
else:
lines.append(f"🟡 NPMplus API HTTP {npm_status}")
lines.append(f"рџџЎ NPMplus API HTTP {npm_status}")
g_cfg = cfg.get("gitea", {})
g_base = (g_cfg.get("base_url") or "").rstrip("/")
@@ -82,21 +84,22 @@ def health(cfg, container_map: dict | None = None) -> str:
g_status = status
break
if g_status == 200:
lines.append("🟢 Gitea API OK")
lines.append("рџџў Gitea API OK")
elif g_status is None:
lines.append("🔴 Gitea API unreachable")
lines.append("🔴 Gitea API unreachable")
else:
lines.append(f"🟡 Gitea API HTTP {g_status}")
lines.append(f"рџџЎ Gitea API HTTP {g_status}")
usage, mount = worst_disk_usage()
if usage is None:
lines.append("⚠️ Disk n/a")
elif usage > cfg["thresholds"]["disk_warn"]:
lines.append(f"🟡 Disk {usage}% ({mount})")
lines.append("вљ пёЏ Disk n/a")
elif usage > disk_warn:
lines.append(f"рџџЎ Disk {usage}% ({mount})")
else:
lines.append(f"🟢 Disk {usage}% ({mount})")
lines.append(f"рџџў Disk {usage}% ({mount})")
load = psutil.getloadavg()[0]
lines.append(f"{'🟢' if load < cfg['thresholds']['load_warn'] else '🟡'} Load {load}")
lines.append(f"{'рџџў' if load < load_warn else 'рџџЎ'} Load {load}")
return "\n".join(lines)