Add runtime state, auto-mute schedules, and backup retries
This commit is contained in:
@@ -63,6 +63,17 @@ def _format_backup_result(rc: int, out: str) -> str:
|
||||
return base
|
||||
|
||||
|
||||
def _tail(path: str, lines: int = 120) -> str:
|
||||
if not os.path.exists(path):
|
||||
return f"⚠️ Log not found: {path}"
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
||||
data = f.readlines()[-lines:]
|
||||
except Exception as e:
|
||||
return f"⚠️ Failed to read log: {e}"
|
||||
return "".join(data).strip() or "(empty)"
|
||||
|
||||
|
||||
def _load_json(raw: str, label: str) -> tuple[bool, object | None, str]:
|
||||
if not raw or not raw.strip():
|
||||
return False, None, f"? {label} returned empty output"
|
||||
@@ -225,6 +236,10 @@ async def cmd_backup_status(msg: Message):
|
||||
|
||||
|
||||
async def cmd_backup_now(msg: Message):
|
||||
await schedule_backup(msg)
|
||||
|
||||
|
||||
async def schedule_backup(msg: Message):
|
||||
async def job():
|
||||
if cfg.get("safety", {}).get("dry_run", False):
|
||||
await msg.answer("🧪 Dry-run: backup skipped", reply_markup=backup_kb)
|
||||
@@ -241,7 +256,14 @@ async def cmd_backup_now(msg: Message):
|
||||
use_restic_env=True,
|
||||
timeout=6 * 3600,
|
||||
)
|
||||
await msg.answer(_format_backup_result(rc, out), reply_markup=backup_kb)
|
||||
kb = backup_kb
|
||||
if rc != 0:
|
||||
kb = InlineKeyboardMarkup(
|
||||
inline_keyboard=[
|
||||
[InlineKeyboardButton(text="🔁 Retry backup", callback_data="backup:retry")]
|
||||
]
|
||||
)
|
||||
await msg.answer(_format_backup_result(rc, out), reply_markup=kb)
|
||||
finally:
|
||||
release_lock("backup")
|
||||
|
||||
@@ -352,7 +374,7 @@ async def br(msg: Message):
|
||||
@dp.message(F.text == "/backup_run")
|
||||
async def br_cmd(msg: Message):
|
||||
if is_admin_msg(msg):
|
||||
await cmd_backup_now(msg)
|
||||
await schedule_backup(msg)
|
||||
|
||||
|
||||
@dp.message(F.text == "🧪 Restic check")
|
||||
@@ -367,7 +389,14 @@ async def rc(msg: Message):
|
||||
use_restic_env=True,
|
||||
timeout=6 * 3600,
|
||||
)
|
||||
await msg.answer(("✅ OK\n" if rc2 == 0 else "❌ FAIL\n") + out, reply_markup=backup_kb)
|
||||
kb = backup_kb
|
||||
if rc2 != 0:
|
||||
kb = InlineKeyboardMarkup(
|
||||
inline_keyboard=[
|
||||
[InlineKeyboardButton(text="🔁 Retry restic check", callback_data="backup:retry_check")]
|
||||
]
|
||||
)
|
||||
await msg.answer(("✅ OK\n" if rc2 == 0 else "❌ FAIL\n") + out, reply_markup=kb)
|
||||
|
||||
pos = await enqueue("restic-check", job)
|
||||
await msg.answer(f"🕓 Restic check queued (#{pos})", reply_markup=backup_kb)
|
||||
@@ -395,3 +424,36 @@ async def wr(msg: Message):
|
||||
async def rh(msg: Message):
|
||||
if is_admin_msg(msg):
|
||||
await msg.answer(restore_help(), reply_markup=backup_kb)
|
||||
|
||||
|
||||
@dp.message(F.text == "📜 History")
|
||||
@dp.message(F.text == "/backup_history")
|
||||
async def backup_history(msg: Message):
|
||||
if not is_admin_msg(msg):
|
||||
return
|
||||
log_path = "/var/log/backup-auto.log"
|
||||
content = _tail(log_path, lines=160)
|
||||
if content.startswith("⚠️"):
|
||||
await msg.answer(content, reply_markup=backup_kb)
|
||||
return
|
||||
await msg.answer(
|
||||
f"📜 Backup history (tail)\n`{log_path}`\n```\n{content}\n```",
|
||||
reply_markup=backup_kb,
|
||||
parse_mode="Markdown",
|
||||
)
|
||||
|
||||
|
||||
@dp.callback_query(F.data == "backup:retry")
|
||||
async def backup_retry(cb: CallbackQuery):
|
||||
if not is_admin_cb(cb):
|
||||
return
|
||||
await cb.answer("Queuing backup…")
|
||||
await schedule_backup(cb.message)
|
||||
|
||||
|
||||
@dp.callback_query(F.data == "backup:retry_check")
|
||||
async def backup_retry_check(cb: CallbackQuery):
|
||||
if not is_admin_cb(cb):
|
||||
return
|
||||
await cb.answer("Queuing restic check…")
|
||||
await rc(cb.message)
|
||||
|
||||
@@ -13,6 +13,7 @@ HELP_PAGES = [
|
||||
"📊 *Статус* — общая загрузка.\n"
|
||||
"📋 */status_short* — кратко (load/RAM/диски).\n"
|
||||
"🩺 */health_short* — краткий health.\n"
|
||||
"🧪 */selftest* — health + restic snapshot probe.\n"
|
||||
"🔧 Разделы: Docker, Backup, Artifacts, System, OpenWrt.",
|
||||
),
|
||||
(
|
||||
@@ -25,13 +26,16 @@ HELP_PAGES = [
|
||||
"Шорткаты: `/alerts_list`, `/alerts_recent`, `/alerts_mute_load` (60м).\n"
|
||||
"Категории: load, disk, smart, ssl, docker, test.\n"
|
||||
"Quiet hours: `alerts.quiet_hours` для не‑критичных.\n"
|
||||
"Авто-мьют: `alerts.auto_mute` со слотами времени.\n"
|
||||
"Только красные load: `alerts.load_only_critical: true`.\n"
|
||||
"Валидатор конфига: `/config_check`.",
|
||||
),
|
||||
(
|
||||
"Backup",
|
||||
"💾 **Backup (restic)**\n\n"
|
||||
"Кнопки: Status, Last snapshot, Repo stats, Run backup, Queue, Restic check, Weekly report.\n"
|
||||
"Кнопки: Status, Last snapshot, Repo stats, Run backup, Queue, Restic check, Weekly report, History.\n"
|
||||
"History — хвост `/var/log/backup-auto.log`.\n"
|
||||
"Fail → кнопка Retry (backup/check).\n"
|
||||
"Run backup/Check учитывают `safety.dry_run`.\n"
|
||||
"После бэкапа приходит TL;DR + путь к логу `/var/log/backup-auto.log`.\n"
|
||||
"Queue → Details показывает отложенные задачи.",
|
||||
@@ -45,7 +49,7 @@ HELP_PAGES = [
|
||||
"Info: Disks/Security/Metrics/Hardware/SMART/OpenWrt.\n"
|
||||
"Ops: Updates/Upgrade/Reboot.\n"
|
||||
"Logs: Audit/Incidents/Security/Integrations/Processes.\n"
|
||||
"OpenWrt: `/openwrt`, `/openwrt_wan`, `/openwrt_clients`.",
|
||||
"OpenWrt: `/openwrt`, `/openwrt_wan`, `/openwrt_clients`, `/openwrt_leases`.",
|
||||
),
|
||||
(
|
||||
"Admin",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import json
|
||||
import socket
|
||||
import time
|
||||
import psutil
|
||||
@@ -10,6 +11,7 @@ from keyboards import menu_kb
|
||||
from services.system import format_disks
|
||||
from services.health import health
|
||||
from state import DOCKER_MAP
|
||||
from services.runner import run_cmd_full
|
||||
|
||||
|
||||
async def cmd_status(msg: Message):
|
||||
@@ -115,6 +117,46 @@ async def health_short(msg: Message):
|
||||
await msg.answer(f"🩺 Health (short)\n{brief}", reply_markup=menu_kb)
|
||||
|
||||
|
||||
@dp.message(F.text.in_({"🧪 Self-test", "/selftest"}))
|
||||
async def selftest(msg: Message):
|
||||
if not is_admin_msg(msg):
|
||||
return
|
||||
|
||||
await msg.answer("⏳ Self-test…", reply_markup=menu_kb)
|
||||
|
||||
async def worker():
|
||||
lines = ["🧪 Self-test"]
|
||||
# health
|
||||
try:
|
||||
htext = await asyncio.to_thread(health, cfg, DOCKER_MAP)
|
||||
h_lines = [ln for ln in htext.splitlines() if ln.strip()]
|
||||
brief = " | ".join(h_lines[1:5]) if len(h_lines) > 1 else h_lines[0] if h_lines else "n/a"
|
||||
lines.append(f"🟢 Health: {brief}")
|
||||
except Exception as e:
|
||||
lines.append(f"🔴 Health failed: {e}")
|
||||
|
||||
# restic snapshots check
|
||||
rc, out = await run_cmd_full(["restic", "snapshots", "--json"], use_restic_env=True, timeout=40)
|
||||
if rc == 0:
|
||||
try:
|
||||
snaps = json.loads(out)
|
||||
if isinstance(snaps, list) and snaps:
|
||||
snaps.sort(key=lambda s: s.get("time", ""), reverse=True)
|
||||
last = snaps[0]
|
||||
t = last.get("time", "?").replace("Z", "").replace("T", " ")[:16]
|
||||
lines.append(f"🟢 Restic snapshots: {len(snaps)}, last {t}")
|
||||
else:
|
||||
lines.append("🟡 Restic snapshots: empty")
|
||||
except Exception:
|
||||
lines.append("🟡 Restic snapshots: invalid JSON")
|
||||
else:
|
||||
lines.append(f"🔴 Restic snapshots error: {out.strip() or rc}")
|
||||
|
||||
await msg.answer("\n".join(lines), reply_markup=menu_kb)
|
||||
|
||||
asyncio.create_task(worker())
|
||||
|
||||
|
||||
def _rate_str(value: float) -> str:
|
||||
if value >= 1024 * 1024:
|
||||
return f"{value / (1024 * 1024):.2f} MiB/s"
|
||||
|
||||
@@ -251,6 +251,22 @@ async def openwrt_clients(msg: Message):
|
||||
asyncio.create_task(worker())
|
||||
|
||||
|
||||
@dp.message(F.text == "/openwrt_leases")
|
||||
async def openwrt_leases(msg: Message):
|
||||
if not is_admin_msg(msg):
|
||||
return
|
||||
await msg.answer("⏳ Checking OpenWrt leases…", reply_markup=system_info_kb)
|
||||
|
||||
async def worker():
|
||||
try:
|
||||
text = await get_openwrt_status(cfg, mode="leases")
|
||||
except Exception as e:
|
||||
text = f"⚠️ OpenWrt error: {e}"
|
||||
await msg.answer(text, reply_markup=system_info_kb)
|
||||
|
||||
asyncio.create_task(worker())
|
||||
|
||||
|
||||
@dp.message(F.text == "🧾 Audit")
|
||||
async def audit_log(msg: Message):
|
||||
if not is_admin_msg(msg):
|
||||
|
||||
Reference in New Issue
Block a user