Add runtime state, auto-mute schedules, and backup retries

This commit is contained in:
2026-02-09 01:14:37 +03:00
parent 9399be4168
commit b0a4413671
14 changed files with 312 additions and 17 deletions

View File

@@ -11,6 +11,7 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
## paths ## paths
- `artifact_state` (string): JSON file for artifact state. - `artifact_state` (string): JSON file for artifact state.
- `runtime_state` (string): File for runtime state (mutes, metrics, etc.).
- `restic_env` (string): Path to a file with RESTIC_* environment variables. - `restic_env` (string): Path to a file with RESTIC_* environment variables.
## thresholds ## thresholds
@@ -31,6 +32,10 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
- `start` (string): Start time `HH:MM` (e.g. `23:00`). - `start` (string): Start time `HH:MM` (e.g. `23:00`).
- `end` (string): End time `HH:MM` (e.g. `08:00`). - `end` (string): End time `HH:MM` (e.g. `08:00`).
- `allow_critical` (bool): Allow critical alerts during quiet hours. - `allow_critical` (bool): Allow critical alerts during quiet hours.
- `auto_mute` (list): Per-category auto mutes by time window.
- `category` (string): load/disk/smart/ssl/docker/test.
- `start` (string): Start `HH:MM`.
- `end` (string): End `HH:MM` (can wrap over midnight).
- `notify_recovery` (bool): Send recovery notifications. - `notify_recovery` (bool): Send recovery notifications.
- `smart_enabled` (bool): Enable SMART health polling. - `smart_enabled` (bool): Enable SMART health polling.
- `smart_interval_sec` (int): SMART poll interval. - `smart_interval_sec` (int): SMART poll interval.

View File

@@ -11,6 +11,7 @@
## paths ## paths
- `artifact_state` (string): JSON файл состояния артефактов. - `artifact_state` (string): JSON файл состояния артефактов.
- `runtime_state` (string): файл с runtime-состоянием (мьюты, метрики и т.п.).
- `restic_env` (string): путь к файлу с RESTIC_* переменными. - `restic_env` (string): путь к файлу с RESTIC_* переменными.
## thresholds ## thresholds
@@ -31,6 +32,10 @@
- `start` (string): начало, формат `HH:MM` (например `23:00`). - `start` (string): начало, формат `HH:MM` (например `23:00`).
- `end` (string): конец, формат `HH:MM` (например `08:00`). - `end` (string): конец, формат `HH:MM` (например `08:00`).
- `allow_critical` (bool): слать критичные алерты в тишину. - `allow_critical` (bool): слать критичные алерты в тишину.
- `auto_mute` (list): авто‑мьюты по категориям и времени.
- `category` (string): load/disk/smart/ssl/docker/test.
- `start` (string): начало `HH:MM`.
- `end` (string): конец `HH:MM` (интервал может пересекать ночь).
- `notify_recovery` (bool): уведомлять о восстановлении. - `notify_recovery` (bool): уведомлять о восстановлении.
- `smart_enabled` (bool): SMART проверки. - `smart_enabled` (bool): SMART проверки.
- `smart_interval_sec` (int): интервал SMART. - `smart_interval_sec` (int): интервал SMART.

7
app.py
View File

@@ -1,5 +1,6 @@
from aiogram import Bot, Dispatcher from aiogram import Bot, Dispatcher
from config import load_cfg, load_env from config import load_cfg, load_env
from services import runtime_state
cfg = load_cfg() cfg = load_cfg()
@@ -12,8 +13,10 @@ else:
ADMIN_ID = int(cfg["telegram"]["admin_id"]) ADMIN_ID = int(cfg["telegram"]["admin_id"])
ADMIN_IDS = [ADMIN_ID] ADMIN_IDS = [ADMIN_ID]
ARTIFACT_STATE = cfg["paths"]["artifact_state"] paths_cfg = cfg.get("paths", {})
RESTIC_ENV = load_env(cfg["paths"].get("restic_env", "/etc/restic/restic.env")) runtime_state.configure(paths_cfg.get("runtime_state", "/var/server-bot/runtime.json"))
ARTIFACT_STATE = paths_cfg["artifact_state"]
RESTIC_ENV = load_env(paths_cfg.get("restic_env", "/etc/restic/restic.env"))
DISK_WARN = int(cfg.get("thresholds", {}).get("disk_warn", 80)) DISK_WARN = int(cfg.get("thresholds", {}).get("disk_warn", 80))
LOAD_WARN = float(cfg.get("thresholds", {}).get("load_warn", 2.0)) LOAD_WARN = float(cfg.get("thresholds", {}).get("load_warn", 2.0))

View File

@@ -8,6 +8,7 @@ telegram:
paths: paths:
# JSON state file for artifacts # JSON state file for artifacts
artifact_state: "/opt/tg-bot/state.json" artifact_state: "/opt/tg-bot/state.json"
runtime_state: "/var/server-bot/runtime.json"
# Optional env file with RESTIC_* variables # Optional env file with RESTIC_* variables
restic_env: "/etc/restic/restic.env" restic_env: "/etc/restic/restic.env"
@@ -24,6 +25,11 @@ alerts:
notify_cooldown_sec: 900 notify_cooldown_sec: 900
# If true, only critical load alerts are sent (no warn/OK) # If true, only critical load alerts are sent (no warn/OK)
load_only_critical: false load_only_critical: false
# Optional auto-mute windows per category
auto_mute:
- category: "load"
start: "23:00"
end: "08:00"
quiet_hours: quiet_hours:
enabled: false enabled: false
start: "23:00" start: "23:00"

View File

@@ -63,6 +63,17 @@ def _format_backup_result(rc: int, out: str) -> str:
return base return base
def _tail(path: str, lines: int = 120) -> str:
if not os.path.exists(path):
return f"⚠️ Log not found: {path}"
try:
with open(path, "r", encoding="utf-8", errors="replace") as f:
data = f.readlines()[-lines:]
except Exception as e:
return f"⚠️ Failed to read log: {e}"
return "".join(data).strip() or "(empty)"
def _load_json(raw: str, label: str) -> tuple[bool, object | None, str]: def _load_json(raw: str, label: str) -> tuple[bool, object | None, str]:
if not raw or not raw.strip(): if not raw or not raw.strip():
return False, None, f"? {label} returned empty output" return False, None, f"? {label} returned empty output"
@@ -225,6 +236,10 @@ async def cmd_backup_status(msg: Message):
async def cmd_backup_now(msg: Message): async def cmd_backup_now(msg: Message):
await schedule_backup(msg)
async def schedule_backup(msg: Message):
async def job(): async def job():
if cfg.get("safety", {}).get("dry_run", False): if cfg.get("safety", {}).get("dry_run", False):
await msg.answer("🧪 Dry-run: backup skipped", reply_markup=backup_kb) await msg.answer("🧪 Dry-run: backup skipped", reply_markup=backup_kb)
@@ -241,7 +256,14 @@ async def cmd_backup_now(msg: Message):
use_restic_env=True, use_restic_env=True,
timeout=6 * 3600, timeout=6 * 3600,
) )
await msg.answer(_format_backup_result(rc, out), reply_markup=backup_kb) kb = backup_kb
if rc != 0:
kb = InlineKeyboardMarkup(
inline_keyboard=[
[InlineKeyboardButton(text="🔁 Retry backup", callback_data="backup:retry")]
]
)
await msg.answer(_format_backup_result(rc, out), reply_markup=kb)
finally: finally:
release_lock("backup") release_lock("backup")
@@ -352,7 +374,7 @@ async def br(msg: Message):
@dp.message(F.text == "/backup_run") @dp.message(F.text == "/backup_run")
async def br_cmd(msg: Message): async def br_cmd(msg: Message):
if is_admin_msg(msg): if is_admin_msg(msg):
await cmd_backup_now(msg) await schedule_backup(msg)
@dp.message(F.text == "🧪 Restic check") @dp.message(F.text == "🧪 Restic check")
@@ -367,7 +389,14 @@ async def rc(msg: Message):
use_restic_env=True, use_restic_env=True,
timeout=6 * 3600, timeout=6 * 3600,
) )
await msg.answer(("✅ OK\n" if rc2 == 0 else "❌ FAIL\n") + out, reply_markup=backup_kb) kb = backup_kb
if rc2 != 0:
kb = InlineKeyboardMarkup(
inline_keyboard=[
[InlineKeyboardButton(text="🔁 Retry restic check", callback_data="backup:retry_check")]
]
)
await msg.answer(("✅ OK\n" if rc2 == 0 else "❌ FAIL\n") + out, reply_markup=kb)
pos = await enqueue("restic-check", job) pos = await enqueue("restic-check", job)
await msg.answer(f"🕓 Restic check queued (#{pos})", reply_markup=backup_kb) await msg.answer(f"🕓 Restic check queued (#{pos})", reply_markup=backup_kb)
@@ -395,3 +424,36 @@ async def wr(msg: Message):
async def rh(msg: Message): async def rh(msg: Message):
if is_admin_msg(msg): if is_admin_msg(msg):
await msg.answer(restore_help(), reply_markup=backup_kb) await msg.answer(restore_help(), reply_markup=backup_kb)
@dp.message(F.text == "📜 History")
@dp.message(F.text == "/backup_history")
async def backup_history(msg: Message):
if not is_admin_msg(msg):
return
log_path = "/var/log/backup-auto.log"
content = _tail(log_path, lines=160)
if content.startswith("⚠️"):
await msg.answer(content, reply_markup=backup_kb)
return
await msg.answer(
f"📜 Backup history (tail)\n`{log_path}`\n```\n{content}\n```",
reply_markup=backup_kb,
parse_mode="Markdown",
)
@dp.callback_query(F.data == "backup:retry")
async def backup_retry(cb: CallbackQuery):
if not is_admin_cb(cb):
return
await cb.answer("Queuing backup…")
await schedule_backup(cb.message)
@dp.callback_query(F.data == "backup:retry_check")
async def backup_retry_check(cb: CallbackQuery):
if not is_admin_cb(cb):
return
await cb.answer("Queuing restic check…")
await rc(cb.message)

View File

@@ -13,6 +13,7 @@ HELP_PAGES = [
"📊 *Статус* — общая загрузка.\n" "📊 *Статус* — общая загрузка.\n"
"📋 */status_short* — кратко (load/RAM/диски).\n" "📋 */status_short* — кратко (load/RAM/диски).\n"
"🩺 */health_short* — краткий health.\n" "🩺 */health_short* — краткий health.\n"
"🧪 */selftest* — health + restic snapshot probe.\n"
"🔧 Разделы: Docker, Backup, Artifacts, System, OpenWrt.", "🔧 Разделы: Docker, Backup, Artifacts, System, OpenWrt.",
), ),
( (
@@ -25,13 +26,16 @@ HELP_PAGES = [
"Шорткаты: `/alerts_list`, `/alerts_recent`, `/alerts_mute_load` (60м).\n" "Шорткаты: `/alerts_list`, `/alerts_recent`, `/alerts_mute_load` (60м).\n"
"Категории: load, disk, smart, ssl, docker, test.\n" "Категории: load, disk, smart, ssl, docker, test.\n"
"Quiet hours: `alerts.quiet_hours` для не‑критичных.\n" "Quiet hours: `alerts.quiet_hours` для не‑критичных.\n"
"Авто-мьют: `alerts.auto_mute` со слотами времени.\n"
"Только красные load: `alerts.load_only_critical: true`.\n" "Только красные load: `alerts.load_only_critical: true`.\n"
"Валидатор конфига: `/config_check`.", "Валидатор конфига: `/config_check`.",
), ),
( (
"Backup", "Backup",
"💾 **Backup (restic)**\n\n" "💾 **Backup (restic)**\n\n"
"Кнопки: Status, Last snapshot, Repo stats, Run backup, Queue, Restic check, Weekly report.\n" "Кнопки: Status, Last snapshot, Repo stats, Run backup, Queue, Restic check, Weekly report, History.\n"
"History — хвост `/var/log/backup-auto.log`.\n"
"Fail → кнопка Retry (backup/check).\n"
"Run backup/Check учитывают `safety.dry_run`.\n" "Run backup/Check учитывают `safety.dry_run`.\n"
"После бэкапа приходит TL;DR + путь к логу `/var/log/backup-auto.log`.\n" "После бэкапа приходит TL;DR + путь к логу `/var/log/backup-auto.log`.\n"
"Queue → Details показывает отложенные задачи.", "Queue → Details показывает отложенные задачи.",
@@ -45,7 +49,7 @@ HELP_PAGES = [
"Info: Disks/Security/Metrics/Hardware/SMART/OpenWrt.\n" "Info: Disks/Security/Metrics/Hardware/SMART/OpenWrt.\n"
"Ops: Updates/Upgrade/Reboot.\n" "Ops: Updates/Upgrade/Reboot.\n"
"Logs: Audit/Incidents/Security/Integrations/Processes.\n" "Logs: Audit/Incidents/Security/Integrations/Processes.\n"
"OpenWrt: `/openwrt`, `/openwrt_wan`, `/openwrt_clients`.", "OpenWrt: `/openwrt`, `/openwrt_wan`, `/openwrt_clients`, `/openwrt_leases`.",
), ),
( (
"Admin", "Admin",

View File

@@ -1,4 +1,5 @@
import asyncio import asyncio
import json
import socket import socket
import time import time
import psutil import psutil
@@ -10,6 +11,7 @@ from keyboards import menu_kb
from services.system import format_disks from services.system import format_disks
from services.health import health from services.health import health
from state import DOCKER_MAP from state import DOCKER_MAP
from services.runner import run_cmd_full
async def cmd_status(msg: Message): async def cmd_status(msg: Message):
@@ -115,6 +117,46 @@ async def health_short(msg: Message):
await msg.answer(f"🩺 Health (short)\n{brief}", reply_markup=menu_kb) await msg.answer(f"🩺 Health (short)\n{brief}", reply_markup=menu_kb)
@dp.message(F.text.in_({"🧪 Self-test", "/selftest"}))
async def selftest(msg: Message):
if not is_admin_msg(msg):
return
await msg.answer("⏳ Self-test…", reply_markup=menu_kb)
async def worker():
lines = ["🧪 Self-test"]
# health
try:
htext = await asyncio.to_thread(health, cfg, DOCKER_MAP)
h_lines = [ln for ln in htext.splitlines() if ln.strip()]
brief = " | ".join(h_lines[1:5]) if len(h_lines) > 1 else h_lines[0] if h_lines else "n/a"
lines.append(f"🟢 Health: {brief}")
except Exception as e:
lines.append(f"🔴 Health failed: {e}")
# restic snapshots check
rc, out = await run_cmd_full(["restic", "snapshots", "--json"], use_restic_env=True, timeout=40)
if rc == 0:
try:
snaps = json.loads(out)
if isinstance(snaps, list) and snaps:
snaps.sort(key=lambda s: s.get("time", ""), reverse=True)
last = snaps[0]
t = last.get("time", "?").replace("Z", "").replace("T", " ")[:16]
lines.append(f"🟢 Restic snapshots: {len(snaps)}, last {t}")
else:
lines.append("🟡 Restic snapshots: empty")
except Exception:
lines.append("🟡 Restic snapshots: invalid JSON")
else:
lines.append(f"🔴 Restic snapshots error: {out.strip() or rc}")
await msg.answer("\n".join(lines), reply_markup=menu_kb)
asyncio.create_task(worker())
def _rate_str(value: float) -> str: def _rate_str(value: float) -> str:
if value >= 1024 * 1024: if value >= 1024 * 1024:
return f"{value / (1024 * 1024):.2f} MiB/s" return f"{value / (1024 * 1024):.2f} MiB/s"

View File

@@ -251,6 +251,22 @@ async def openwrt_clients(msg: Message):
asyncio.create_task(worker()) asyncio.create_task(worker())
@dp.message(F.text == "/openwrt_leases")
async def openwrt_leases(msg: Message):
if not is_admin_msg(msg):
return
await msg.answer("⏳ Checking OpenWrt leases…", reply_markup=system_info_kb)
async def worker():
try:
text = await get_openwrt_status(cfg, mode="leases")
except Exception as e:
text = f"⚠️ OpenWrt error: {e}"
await msg.answer(text, reply_markup=system_info_kb)
asyncio.create_task(worker())
@dp.message(F.text == "🧾 Audit") @dp.message(F.text == "🧾 Audit")
async def audit_log(msg: Message): async def audit_log(msg: Message):
if not is_admin_msg(msg): if not is_admin_msg(msg):

View File

@@ -38,7 +38,8 @@ backup_kb = ReplyKeyboardMarkup(
[KeyboardButton(text="📦 Status"), KeyboardButton(text="📦 Last snapshot")], [KeyboardButton(text="📦 Status"), KeyboardButton(text="📦 Last snapshot")],
[KeyboardButton(text="📊 Repo stats"), KeyboardButton(text="🧯 Restore help")], [KeyboardButton(text="📊 Repo stats"), KeyboardButton(text="🧯 Restore help")],
[KeyboardButton(text="▶️ Run backup"), KeyboardButton(text="🧾 Queue")], [KeyboardButton(text="▶️ Run backup"), KeyboardButton(text="🧾 Queue")],
[KeyboardButton(text="🧪 Restic check"), KeyboardButton(text="📬 Weekly report"), KeyboardButton(text="⬅️ Назад")], [KeyboardButton(text="🧪 Restic check"), KeyboardButton(text="📬 Weekly report"), KeyboardButton(text="📜 History")],
[KeyboardButton(text="⬅️ Назад")],
], ],
resize_keyboard=True, resize_keyboard=True,
) )

View File

@@ -1,37 +1,53 @@
import time import time
from typing import Dict from typing import Dict
from services.runtime_state import get_state, set_state
# category -> unix timestamp until muted # category -> unix timestamp until muted
_MUTES: Dict[str, float] = {}
def _mutes() -> Dict[str, float]:
return get_state().get("mutes", {})
def _save(mutes: Dict[str, float]):
set_state("mutes", mutes)
def _cleanup() -> None: def _cleanup() -> None:
mutes = _mutes()
now = time.time() now = time.time()
expired = [k for k, until in _MUTES.items() if until <= now] expired = [k for k, until in mutes.items() if until <= now]
for k in expired: for k in expired:
_MUTES.pop(k, None) mutes.pop(k, None)
_save(mutes)
def set_mute(category: str, seconds: int) -> float: def set_mute(category: str, seconds: int) -> float:
_cleanup() _cleanup()
mutes = _mutes()
until = time.time() + max(0, seconds) until = time.time() + max(0, seconds)
_MUTES[category] = until mutes[category] = until
_save(mutes)
return until return until
def clear_mute(category: str) -> None: def clear_mute(category: str) -> None:
_MUTES.pop(category, None) mutes = _mutes()
mutes.pop(category, None)
_save(mutes)
def is_muted(category: str | None) -> bool: def is_muted(category: str | None) -> bool:
if not category: if not category:
return False return False
_cleanup() _cleanup()
until = _MUTES.get(category) mutes = _mutes()
until = mutes.get(category)
if until is None: if until is None:
return False return False
if until <= time.time(): if until <= time.time():
_MUTES.pop(category, None) mutes.pop(category, None)
_save(mutes)
return False return False
return True return True
@@ -39,4 +55,39 @@ def is_muted(category: str | None) -> bool:
def list_mutes() -> dict[str, int]: def list_mutes() -> dict[str, int]:
_cleanup() _cleanup()
now = time.time() now = time.time()
return {k: int(until - now) for k, until in _MUTES.items()} mutes = _mutes()
return {k: int(until - now) for k, until in mutes.items()}
def is_auto_muted(cfg: dict, category: str | None) -> bool:
if not category:
return False
auto_list = cfg.get("alerts", {}).get("auto_mute", [])
if not isinstance(auto_list, list):
return False
now = time.localtime()
now_minutes = now.tm_hour * 60 + now.tm_min
for item in auto_list:
if not isinstance(item, dict):
continue
cat = item.get("category")
if cat != category:
continue
start = item.get("start", "00:00")
end = item.get("end", "00:00")
try:
sh, sm = [int(x) for x in start.split(":")]
eh, em = [int(x) for x in end.split(":")]
except Exception:
continue
start_min = sh * 60 + sm
end_min = eh * 60 + em
if start_min == end_min:
continue
if start_min < end_min:
if start_min <= now_minutes < end_min:
return True
else:
if now_minutes >= start_min or now_minutes < end_min:
return True
return False

View File

@@ -2,7 +2,7 @@ import time
from datetime import datetime from datetime import datetime
from aiogram import Bot from aiogram import Bot
from app import cfg from app import cfg
from services.alert_mute import is_muted from services.alert_mute import is_muted, is_auto_muted
from services.incidents import log_incident from services.incidents import log_incident
@@ -49,6 +49,8 @@ async def notify(
alerts_cfg = cfg.get("alerts", {}) alerts_cfg = cfg.get("alerts", {})
if category and is_muted(category): if category and is_muted(category):
return return
if category and is_auto_muted(cfg, category):
return
if _in_quiet_hours(alerts_cfg): if _in_quiet_hours(alerts_cfg):
allow_critical = bool(alerts_cfg.get("quiet_hours", {}).get("allow_critical", True)) allow_critical = bool(alerts_cfg.get("quiet_hours", {}).get("allow_critical", True))
if not (allow_critical and level == "critical"): if not (allow_critical and level == "critical"):

View File

@@ -447,4 +447,6 @@ async def get_openwrt_status(cfg: dict[str, Any], mode: str = "full") -> str:
return "\n".join(header) return "\n".join(header)
if mode == "clients": if mode == "clients":
return "\n".join(header + wifi_section) return "\n".join(header + wifi_section)
if mode == "leases":
return "\n".join(header + lease_section)
return "\n".join(header + wifi_section + lease_section) return "\n".join(header + wifi_section + lease_section)

View File

@@ -2,12 +2,24 @@ import asyncio
import time import time
from collections import deque from collections import deque
from typing import Awaitable, Callable, Any from typing import Awaitable, Callable, Any
from services import runtime_state
_queue: asyncio.Queue = asyncio.Queue() _queue: asyncio.Queue = asyncio.Queue()
_current_label: str | None = None _current_label: str | None = None
_current_meta: dict[str, Any] | None = None _current_meta: dict[str, Any] | None = None
_pending: deque[tuple[str, float]] = deque() _pending: deque[tuple[str, float]] = deque()
_stats: dict[str, Any] = runtime_state.get("queue_stats", {}) or {
"processed": 0,
"avg_wait_sec": 0.0,
"avg_runtime_sec": 0.0,
"last_label": "",
"last_finished_at": 0.0,
}
def _save_stats():
runtime_state.set_state("queue_stats", _stats)
async def enqueue(label: str, job: Callable[[], Awaitable[None]]) -> int: async def enqueue(label: str, job: Callable[[], Awaitable[None]]) -> int:
@@ -34,6 +46,21 @@ async def worker():
try: try:
await job() await job()
finally: finally:
finished_at = time.time()
if _current_meta:
wait_sec = max(0.0, _current_meta["started_at"] - _current_meta["enqueued_at"])
runtime_sec = max(0.0, finished_at - _current_meta["started_at"])
n_prev = int(_stats.get("processed", 0))
_stats["processed"] = n_prev + 1
_stats["avg_wait_sec"] = (
(_stats.get("avg_wait_sec", 0.0) * n_prev) + wait_sec
) / _stats["processed"]
_stats["avg_runtime_sec"] = (
(_stats.get("avg_runtime_sec", 0.0) * n_prev) + runtime_sec
) / _stats["processed"]
_stats["last_label"] = label
_stats["last_finished_at"] = finished_at
_save_stats()
_current_label = None _current_label = None
_current_meta = None _current_meta = None
_queue.task_done() _queue.task_done()
@@ -47,6 +74,12 @@ def format_status() -> str:
if pending: if pending:
preview = ", ".join([p[0] for p in pending[:5]]) preview = ", ".join([p[0] for p in pending[:5]])
lines.append(f"➡️ Next: {preview}") lines.append(f"➡️ Next: {preview}")
if _stats.get("processed"):
lines.append(
f"📈 Done: {_stats.get('processed')} | "
f"avg wait {int(_stats.get('avg_wait_sec', 0))}s | "
f"avg run {int(_stats.get('avg_runtime_sec', 0))}s"
)
return "\n".join(lines) return "\n".join(lines)
@@ -67,4 +100,15 @@ def format_details(limit: int = 10) -> str:
for i, (label, enqueued_at) in enumerate(pending[:limit], start=1): for i, (label, enqueued_at) in enumerate(pending[:limit], start=1):
wait = int(now - enqueued_at) wait = int(now - enqueued_at)
lines.append(f"{i:>3} | {label} | {wait}s") lines.append(f"{i:>3} | {label} | {wait}s")
if _stats.get("processed"):
lines.append("")
lines.append(
"📈 Stats: "
f"{_stats.get('processed')} done, "
f"avg wait {int(_stats.get('avg_wait_sec', 0))}s, "
f"avg run {int(_stats.get('avg_runtime_sec', 0))}s"
)
last_label = _stats.get("last_label")
if last_label:
lines.append(f"Last: {last_label}")
return "\n".join(lines) return "\n".join(lines)

52
services/runtime_state.py Normal file
View File

@@ -0,0 +1,52 @@
import json
import os
from typing import Any, Dict
_PATH = "/var/server-bot/runtime.json"
_STATE: Dict[str, Any] = {}
def configure(path: str | None):
global _PATH
if path:
_PATH = path
def _load_from_disk():
global _STATE
if not os.path.exists(_PATH):
_STATE = {}
return
try:
with open(_PATH, "r", encoding="utf-8") as f:
_STATE = json.load(f)
except Exception:
_STATE = {}
def _save():
os.makedirs(os.path.dirname(_PATH), exist_ok=True)
try:
with open(_PATH, "w", encoding="utf-8") as f:
json.dump(_STATE, f)
except Exception:
pass
def get_state() -> Dict[str, Any]:
if not _STATE:
_load_from_disk()
return _STATE
def set_state(key: str, value: Any):
if not _STATE:
_load_from_disk()
_STATE[key] = value
_save()
def get(key: str, default: Any = None) -> Any:
if not _STATE:
_load_from_disk()
return _STATE.get(key, default)