Add incidents export, queue alerts, and health summaries

This commit is contained in:
2026-02-09 02:24:08 +03:00
parent 5a4234f59d
commit 2e0bf0c6ea
11 changed files with 292 additions and 23 deletions

View File

@@ -8,9 +8,10 @@ from app import dp, cfg
from auth import is_admin_msg, is_admin_cb
from keyboards import backup_kb
from lock_utils import acquire_lock, release_lock
from services.queue import enqueue, format_status, format_details
from services.queue import enqueue, format_status, format_details, format_history
from services.backup import backup_badge, restore_help
from services.runner import run_cmd, run_cmd_full
from services.incidents import log_incident
def _parse_systemctl_kv(raw: str) -> dict[str, str]:
@@ -551,6 +552,13 @@ async def backup_history(msg: Message):
)
@dp.message(F.text == "/queue_history")
async def queue_history(msg: Message):
if not is_admin_msg(msg):
return
await msg.answer(format_history(), reply_markup=backup_kb)
@dp.callback_query(F.data == "backup:retry")
async def backup_retry(cb: CallbackQuery):
if not is_admin_cb(cb):

View File

@@ -124,6 +124,37 @@ async def docker_health(msg: Message):
log_incident(cfg, f"docker_health alias={alias} by {msg.from_user.id}", category="docker")
@dp.message(F.text == "/docker_health_summary")
async def docker_health_summary(msg: Message):
if not is_admin_msg(msg):
return
if not DOCKER_MAP:
await msg.answer("⚠️ DOCKER_MAP пуст", reply_markup=docker_kb)
return
problems = []
total = len(DOCKER_MAP)
for alias, real in DOCKER_MAP.items():
rc, out = await docker_cmd(["inspect", "-f", "{{json .State}}", real], timeout=10)
if rc != 0:
problems.append(f"{alias}: inspect error")
continue
try:
state = json.loads(out)
except Exception:
problems.append(f"{alias}: bad JSON")
continue
status = state.get("Status", "n/a")
health = (state.get("Health") or {}).get("Status", "n/a")
if status != "running" or health not in ("healthy", "none"):
problems.append(f"{alias}: {status}/{health}")
ok = total - len(problems)
lines = [f"🐳 Docker health: 🟢 {ok}/{total} healthy, 🔴 {len(problems)} issues"]
if problems:
lines.append("Problems:")
lines.extend([f"- {p}" for p in problems])
await msg.answer("\n".join(lines), reply_markup=docker_kb)
@dp.message(F.text == "📈 Stats")
async def dstats(msg: Message):
if not is_admin_msg(msg):

View File

@@ -57,7 +57,10 @@ HELP_PAGES = [
"Config: `/config_check`, файл `config.yaml` (см. config.example.yaml).\n"
"Deploy: `deploy.sh` (ssh 10.10.10.10:1090 → git pull → systemctl restart tg-bot).\n"
"Incidents summary: `/incidents_summary`.\n"
"Incidents export: `/incidents_export [hours] [csv|json]`.\n"
"Alerts log: `/alerts_log [hours]`.\n"
"Disk snapshot: `/disk_snapshot`.\n"
"Queue history: `/queue_history`.\n"
"BotFather list: `/botfather_list`.\n"
"Безопасность: `safety.dry_run: true` блокирует опасные действия.\n"
"OpenWrt: кнопка в System → Info.",
@@ -124,15 +127,20 @@ alerts - Manage alerts
alerts_list - List active mutes
alerts_recent - Show recent incidents (24h)
alerts_mute_load - Mute load alerts for 60m
alerts_log - Show suppressed alerts
backup_run - Run backup (queued)
backup_history - Show backup log tail
queue_history - Show queue recent jobs
docker_status - Docker summary
docker_health - Docker inspect/health by alias
docker_health_summary - Docker health summary (problems only)
openwrt - Full OpenWrt status
openwrt_wan - OpenWrt WAN only
openwrt_clients - OpenWrt wifi clients
openwrt_leases - OpenWrt DHCP leases
openwrt_fast - OpenWrt quick WAN view
incidents_summary - Incidents counters (24h/7d)
incidents_export - Export incidents (hours fmt)
disk_snapshot - Disk usage snapshot
config_check - Validate config
"""

View File

@@ -24,9 +24,12 @@ import state
from state import UPDATES_CACHE, REBOOT_PENDING
from services.metrics import summarize
from services.audit import read_audit_tail
from services.incidents import read_recent, incidents_path
from services.incidents import read_recent, incidents_path, read_raw, infer_category
from services.external_checks import format_report
from services.disk_report import build_disk_report
import io
import json
import csv
@dp.message(F.text == "💽 Disks")
@@ -269,6 +272,22 @@ async def openwrt_leases(msg: Message):
asyncio.create_task(worker())
@dp.message(F.text == "/openwrt_fast")
async def openwrt_fast(msg: Message):
if not is_admin_msg(msg):
return
await msg.answer("⏳ OpenWrt fast…", reply_markup=system_info_kb)
async def worker():
try:
text = await get_openwrt_status(cfg, mode="wan")
except Exception as e:
text = f"⚠️ OpenWrt error: {e}"
await msg.answer(text, reply_markup=system_info_kb)
asyncio.create_task(worker())
@dp.message(F.text == "🧾 Audit")
async def audit_log(msg: Message):
if not is_admin_msg(msg):
@@ -314,26 +333,36 @@ async def incidents(msg: Message):
async def incidents_summary(msg: Message):
if not is_admin_msg(msg):
return
last_24h = read_recent(cfg, hours=24, limit=2000)
last_7d = read_recent(cfg, hours=24 * 7, limit=4000)
last_24h = read_raw(cfg, hours=24, limit=2000)
last_7d = read_raw(cfg, hours=24 * 7, limit=4000)
def count(lines):
import re
total = len(lines)
def summarize(items):
total = len(items)
cats = {}
for line in lines:
m = re.search(r"category=([A-Za-z0-9_-]+)", line)
if m:
cats[m.group(1)] = cats.get(m.group(1), 0) + 1
top = ", ".join(f"{k}:{v}" for k, v in sorted(cats.items(), key=lambda x: x[1], reverse=True)[:5]) or "n/a"
return total, top
suppressed = {}
last_seen = {}
for dt, msg in items:
cat = infer_category(msg) or "n/a"
cats[cat] = cats.get(cat, 0) + 1
last_seen[cat] = dt
if "[suppressed" in msg.lower():
suppressed[cat] = suppressed.get(cat, 0) + 1
def fmt_top(d):
return ", ".join(f"{k}:{v}" for k, v in sorted(d.items(), key=lambda x: x[1], reverse=True)[:5]) or "n/a"
top = fmt_top(cats)
top_supp = fmt_top(suppressed)
last_parts = []
for k, dt in sorted(last_seen.items(), key=lambda x: x[1], reverse=True)[:5]:
last_parts.append(f"{k}:{dt.astimezone().strftime('%Y-%m-%d %H:%M')}")
last_str = ", ".join(last_parts) or "n/a"
return total, top, top_supp, last_str
t24, top24 = count(last_24h)
t7, top7 = count(last_7d)
t24, top24, supp24, last24 = summarize(last_24h)
t7, top7, supp7, last7 = summarize(last_7d)
text = (
"📣 Incidents summary\n\n"
f"24h: {t24} (top: {top24})\n"
f"7d: {t7} (top: {top7})"
f"24h: {t24} (top: {top24}; suppressed: {supp24}; last: {last24})\n"
f"7d: {t7} (top: {top7}; suppressed: {supp7}; last: {last7})"
)
await msg.answer(text, reply_markup=system_logs_audit_kb)
@@ -352,6 +381,68 @@ async def disk_snapshot(msg: Message):
await msg.answer(f"💽 Disk snapshot ({mount})\n\n{report}", reply_markup=system_info_kb)
@dp.message(F.text.startswith("/alerts_log"))
async def alerts_log(msg: Message):
if not is_admin_msg(msg):
return
parts = msg.text.split()
hours = 24
if len(parts) >= 2:
try:
hours = max(1, int(parts[1]))
except ValueError:
hours = 24
rows = read_raw(cfg, hours=hours, limit=2000)
suppressed = [(dt, m) for dt, m in rows if "[suppressed" in m.lower()]
sent = [(dt, m) for dt, m in rows if "[suppressed" not in m.lower()]
lines = [f"📣 Alerts log ({hours}h)"]
lines.append(f"Sent: {len(sent)}, Suppressed: {len(suppressed)}")
if suppressed:
lines.append("\nSuppressed:")
for dt, m in suppressed[-20:]:
lines.append(f"{dt:%m-%d %H:%M} {m}")
await msg.answer("\n".join(lines), reply_markup=system_logs_audit_kb)
@dp.message(F.text.startswith("/incidents_export"))
async def incidents_export(msg: Message):
if not is_admin_msg(msg):
return
parts = msg.text.split()
hours = 24
fmt = "csv"
if len(parts) >= 2:
try:
hours = max(1, int(parts[1]))
except ValueError:
hours = 24
if len(parts) >= 3:
fmt = parts[2].lower()
rows = read_raw(cfg, hours=hours, limit=20000, include_old=False)
data = []
for dt, msg_line in rows:
data.append({
"timestamp": dt.astimezone().isoformat(),
"category": infer_category(msg_line) or "n/a",
"message": msg_line,
})
if fmt == "json":
payload = json.dumps(data, ensure_ascii=False, indent=2)
bio = io.BytesIO(payload.encode("utf-8"))
bio.name = f"incidents_{hours}h.json"
else:
sio = io.StringIO()
writer = csv.DictWriter(sio, fieldnames=["timestamp", "category", "message"])
writer.writeheader()
for row in data:
writer.writerow(row)
bio = io.BytesIO(sio.getvalue().encode("utf-8"))
bio.name = f"incidents_{hours}h.csv"
summary = f"📤 Incidents export ({hours}h): {len(data)} rows, format {fmt}"
await msg.answer(summary)
await msg.answer_document(document=bio)
@dp.message(F.text == "🔒 SSL")
async def ssl_certs(msg: Message):
if not is_admin_msg(msg):