Add safe config fallbacks for app init and health checks
This commit is contained in:
2
app.py
2
app.py
@@ -15,7 +15,7 @@ else:
|
|||||||
|
|
||||||
paths_cfg = cfg.get("paths", {})
|
paths_cfg = cfg.get("paths", {})
|
||||||
runtime_state.configure(paths_cfg.get("runtime_state", "/var/server-bot/runtime.json"))
|
runtime_state.configure(paths_cfg.get("runtime_state", "/var/server-bot/runtime.json"))
|
||||||
ARTIFACT_STATE = paths_cfg["artifact_state"]
|
ARTIFACT_STATE = paths_cfg.get("artifact_state", "/opt/tg-bot/state.json")
|
||||||
RESTIC_ENV = load_env(paths_cfg.get("restic_env", "/etc/restic/restic.env"))
|
RESTIC_ENV = load_env(paths_cfg.get("restic_env", "/etc/restic/restic.env"))
|
||||||
|
|
||||||
DISK_WARN = int(cfg.get("thresholds", {}).get("disk_warn", 80))
|
DISK_WARN = int(cfg.get("thresholds", {}).get("disk_warn", 80))
|
||||||
|
|||||||
@@ -9,7 +9,9 @@ def validate_cfg(cfg: dict[str, Any]) -> Tuple[List[str], List[str]]:
|
|||||||
tg = cfg.get("telegram", {})
|
tg = cfg.get("telegram", {})
|
||||||
if not tg.get("token"):
|
if not tg.get("token"):
|
||||||
errors.append("telegram.token is missing")
|
errors.append("telegram.token is missing")
|
||||||
if not tg.get("admin_id"):
|
admin_ids = tg.get("admin_ids")
|
||||||
|
has_admin_ids = isinstance(admin_ids, list) and len(admin_ids) > 0
|
||||||
|
if not tg.get("admin_id") and not has_admin_ids:
|
||||||
errors.append("telegram.admin_id is missing")
|
errors.append("telegram.admin_id is missing")
|
||||||
|
|
||||||
thresholds = cfg.get("thresholds", {})
|
thresholds = cfg.get("thresholds", {})
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
import ssl
|
import ssl
|
||||||
import subprocess
|
import subprocess
|
||||||
import psutil
|
import psutil
|
||||||
@@ -37,15 +37,17 @@ def _npm_api_base(cfg) -> str | None:
|
|||||||
|
|
||||||
|
|
||||||
def health(cfg, container_map: dict | None = None) -> str:
|
def health(cfg, container_map: dict | None = None) -> str:
|
||||||
lines = ["🩺 Health check\n"]
|
lines = ["рџ©є Health check\n"]
|
||||||
|
thresholds = cfg.get("thresholds", {})
|
||||||
|
disk_warn = int(thresholds.get("disk_warn", 80))
|
||||||
|
load_warn = float(thresholds.get("load_warn", 2.0))
|
||||||
try:
|
try:
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env.update(RESTIC_ENV)
|
env.update(RESTIC_ENV)
|
||||||
subprocess.check_output(["restic", "snapshots"], timeout=10, env=env)
|
subprocess.check_output(["restic", "snapshots"], timeout=10, env=env)
|
||||||
lines.append("🟢 Backup repo reachable")
|
lines.append("рџџў Backup repo reachable")
|
||||||
except Exception:
|
except Exception:
|
||||||
lines.append("🔴 Backup repo unreachable")
|
lines.append("🔴 Backup repo unreachable")
|
||||||
|
|
||||||
containers = container_map if container_map is not None else _containers_from_cfg(cfg)
|
containers = container_map if container_map is not None else _containers_from_cfg(cfg)
|
||||||
for alias, real in containers.items():
|
for alias, real in containers.items():
|
||||||
@@ -53,20 +55,20 @@ def health(cfg, container_map: dict | None = None) -> str:
|
|||||||
f"docker inspect -f '{{{{.State.Status}}}}' {real}"
|
f"docker inspect -f '{{{{.State.Status}}}}' {real}"
|
||||||
)
|
)
|
||||||
if out.strip() != "running":
|
if out.strip() != "running":
|
||||||
lines.append(f"🔴 {alias} down")
|
lines.append(f"🔴 {alias} down")
|
||||||
else:
|
else:
|
||||||
lines.append(f"🟢 {alias} OK")
|
lines.append(f"рџџў {alias} OK")
|
||||||
|
|
||||||
npm_cfg = cfg.get("npmplus", {})
|
npm_cfg = cfg.get("npmplus", {})
|
||||||
npm_base = _npm_api_base(cfg)
|
npm_base = _npm_api_base(cfg)
|
||||||
if npm_base:
|
if npm_base:
|
||||||
npm_status = _request_status(npm_base, npm_cfg.get("verify_tls", True))
|
npm_status = _request_status(npm_base, npm_cfg.get("verify_tls", True))
|
||||||
if npm_status == 200:
|
if npm_status == 200:
|
||||||
lines.append("🟢 NPMplus API OK")
|
lines.append("рџџў NPMplus API OK")
|
||||||
elif npm_status is None:
|
elif npm_status is None:
|
||||||
lines.append("🔴 NPMplus API unreachable")
|
lines.append("🔴 NPMplus API unreachable")
|
||||||
else:
|
else:
|
||||||
lines.append(f"🟡 NPMplus API HTTP {npm_status}")
|
lines.append(f"рџџЎ NPMplus API HTTP {npm_status}")
|
||||||
|
|
||||||
g_cfg = cfg.get("gitea", {})
|
g_cfg = cfg.get("gitea", {})
|
||||||
g_base = (g_cfg.get("base_url") or "").rstrip("/")
|
g_base = (g_cfg.get("base_url") or "").rstrip("/")
|
||||||
@@ -82,21 +84,22 @@ def health(cfg, container_map: dict | None = None) -> str:
|
|||||||
g_status = status
|
g_status = status
|
||||||
break
|
break
|
||||||
if g_status == 200:
|
if g_status == 200:
|
||||||
lines.append("🟢 Gitea API OK")
|
lines.append("рџџў Gitea API OK")
|
||||||
elif g_status is None:
|
elif g_status is None:
|
||||||
lines.append("🔴 Gitea API unreachable")
|
lines.append("🔴 Gitea API unreachable")
|
||||||
else:
|
else:
|
||||||
lines.append(f"🟡 Gitea API HTTP {g_status}")
|
lines.append(f"рџџЎ Gitea API HTTP {g_status}")
|
||||||
|
|
||||||
usage, mount = worst_disk_usage()
|
usage, mount = worst_disk_usage()
|
||||||
if usage is None:
|
if usage is None:
|
||||||
lines.append("⚠️ Disk n/a")
|
lines.append("вљ пёЏ Disk n/a")
|
||||||
elif usage > cfg["thresholds"]["disk_warn"]:
|
elif usage > disk_warn:
|
||||||
lines.append(f"🟡 Disk {usage}% ({mount})")
|
lines.append(f"рџџЎ Disk {usage}% ({mount})")
|
||||||
else:
|
else:
|
||||||
lines.append(f"🟢 Disk {usage}% ({mount})")
|
lines.append(f"рџџў Disk {usage}% ({mount})")
|
||||||
|
|
||||||
load = psutil.getloadavg()[0]
|
load = psutil.getloadavg()[0]
|
||||||
lines.append(f"{'🟢' if load < cfg['thresholds']['load_warn'] else '🟡'} Load {load}")
|
lines.append(f"{'рџџў' if load < load_warn else 'рџџЎ'} Load {load}")
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user