Add incidents export, queue alerts, and health summaries

This commit is contained in:
2026-02-09 02:24:08 +03:00
parent 5a4234f59d
commit 2e0bf0c6ea
11 changed files with 292 additions and 23 deletions

19
main.py
View File

@@ -7,7 +7,7 @@ from keyboards import menu_kb
from services.docker import discover_containers, docker_watchdog
from services.alerts import monitor_resources, monitor_smart
from services.metrics import MetricsStore, start_sampler
from services.queue import worker as queue_worker
from services.queue import worker as queue_worker, configure as queue_configure
from services.notify import notify
from services.audit import AuditMiddleware, audit_start
from services.ssl_alerts import monitor_ssl
@@ -38,6 +38,22 @@ def _handle_async_exception(_loop, context):
text = f"{msg}: {type(exc).__name__}: {exc}"
else:
text = f"{msg}"
now = datetime.now()
if not hasattr(_handle_async_exception, "_recent"):
_handle_async_exception._recent = []
_handle_async_exception._last_alert = None
recent = _handle_async_exception._recent
recent.append(now)
# keep last hour
_handle_async_exception._recent = [t for t in recent if (now - t).total_seconds() < 3600]
if len(_handle_async_exception._recent) >= 3:
last_alert = getattr(_handle_async_exception, "_last_alert", None)
if not last_alert or (now - last_alert).total_seconds() > 3600:
try:
log_incident(cfg, "exception_flood", category="system")
except Exception:
pass
_handle_async_exception._last_alert = now
try:
log_incident(cfg, text, category="system")
except Exception:
@@ -72,6 +88,7 @@ async def main():
asyncio.create_task(monitor_external(cfg))
state.METRICS_STORE = MetricsStore()
asyncio.create_task(start_sampler(state.METRICS_STORE, interval=5))
queue_configure(cfg.get("queue", {}), cfg)
asyncio.create_task(queue_worker())
asyncio.create_task(weekly_reporter(cfg, bot, ADMIN_IDS, state.DOCKER_MAP))
asyncio.create_task(schedule_selftest(cfg, bot, ADMIN_IDS, state.DOCKER_MAP))