Add weekly report, multi-admin, docker health cmd, backup tail, openwrt filters

This commit is contained in:
2026-02-08 23:27:23 +03:00
parent b78dc3cd5c
commit 4d4e3767bc
12 changed files with 264 additions and 31 deletions

View File

@@ -6,6 +6,7 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
- `token` (string, required): Telegram bot token. - `token` (string, required): Telegram bot token.
- `admin_id` (int, required): Telegram user id with admin access. - `admin_id` (int, required): Telegram user id with admin access.
- `admin_ids` (list<int>): Optional list of admins (first is primary for alerts).
## paths ## paths
@@ -70,6 +71,12 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
- `dry_run` (bool): If `true`, dangerous actions (upgrade/reboot/backup) are skipped. - `dry_run` (bool): If `true`, dangerous actions (upgrade/reboot/backup) are skipped.
## reports
- `weekly.enabled` (bool): Enable weekly report.
- `weekly.day` (string): Weekday `Mon`..`Sun` (default `Sun`).
- `weekly.time` (string): Local time `HH:MM` (default `08:00`).
## external_checks ## external_checks
- `enabled` (bool): Enable background checks. - `enabled` (bool): Enable background checks.

View File

@@ -6,6 +6,7 @@
- `token` (string, обяз.): токен бота. - `token` (string, обяз.): токен бота.
- `admin_id` (int, обяз.): Telegram user id администратора. - `admin_id` (int, обяз.): Telegram user id администратора.
- `admin_ids` (list<int>): список админов (первый используется как основной для уведомлений).
## paths ## paths
@@ -70,6 +71,12 @@
- `dry_run` (bool): если `true`, опасные действия (upgrade/reboot/backup) не выполняются. - `dry_run` (bool): если `true`, опасные действия (upgrade/reboot/backup) не выполняются.
## reports
- `weekly.enabled` (bool): включить еженедельный отчёт.
- `weekly.day` (string): день недели (`Mon`..`Sun`), по умолчанию `Sun`.
- `weekly.time` (string): локальное время `HH:MM`, по умолчанию `08:00`.
## external_checks ## external_checks
- `enabled` (bool): включить фоновые проверки. - `enabled` (bool): включить фоновые проверки.

8
app.py
View File

@@ -4,7 +4,13 @@ from config import load_cfg, load_env
cfg = load_cfg() cfg = load_cfg()
TOKEN = cfg["telegram"]["token"] TOKEN = cfg["telegram"]["token"]
ADMIN_ID = cfg["telegram"]["admin_id"] admin_ids_cfg = cfg["telegram"].get("admin_ids")
if isinstance(admin_ids_cfg, list) and admin_ids_cfg:
ADMIN_IDS = [int(x) for x in admin_ids_cfg]
ADMIN_ID = ADMIN_IDS[0]
else:
ADMIN_ID = int(cfg["telegram"]["admin_id"])
ADMIN_IDS = [ADMIN_ID]
ARTIFACT_STATE = cfg["paths"]["artifact_state"] ARTIFACT_STATE = cfg["paths"]["artifact_state"]
RESTIC_ENV = load_env(cfg["paths"].get("restic_env", "/etc/restic/restic.env")) RESTIC_ENV = load_env(cfg["paths"].get("restic_env", "/etc/restic/restic.env"))

View File

@@ -1,10 +1,10 @@
from aiogram.types import Message, CallbackQuery from aiogram.types import Message, CallbackQuery
from app import ADMIN_ID from app import ADMIN_IDS
def is_admin_msg(msg: Message) -> bool: def is_admin_msg(msg: Message) -> bool:
return msg.from_user and msg.from_user.id == ADMIN_ID return msg.from_user and msg.from_user.id in ADMIN_IDS
def is_admin_cb(cb: CallbackQuery) -> bool: def is_admin_cb(cb: CallbackQuery) -> bool:
return cb.from_user and cb.from_user.id == ADMIN_ID return cb.from_user and cb.from_user.id in ADMIN_IDS

View File

@@ -1,6 +1,9 @@
telegram: telegram:
token: "YOUR_TELEGRAM_BOT_TOKEN" token: "YOUR_TELEGRAM_BOT_TOKEN"
admin_id: 123456789 admin_id: 123456789
# Optional list of admins (first is primary for alerts)
admin_ids:
- 123456789
paths: paths:
# JSON state file for artifacts # JSON state file for artifacts
@@ -63,6 +66,12 @@ safety:
# If true, dangerous actions will be skipped # If true, dangerous actions will be skipped
dry_run: false dry_run: false
reports:
weekly:
enabled: false
day: "Sun" # Mon/Tue/Wed/Thu/Fri/Sat/Sun
time: "08:00" # HH:MM server local time
external_checks: external_checks:
enabled: true enabled: true
state_path: "/var/server-bot/external_checks.json" state_path: "/var/server-bot/external_checks.json"

View File

@@ -5,7 +5,7 @@ from aiogram.types import Message, CallbackQuery, InlineKeyboardMarkup, InlineKe
from app import dp, bot, cfg, ADMIN_ID from app import dp, bot, cfg, ADMIN_ID
from auth import is_admin_msg from auth import is_admin_msg
from services.alert_mute import set_mute, clear_mute, list_mutes from services.alert_mute import set_mute, clear_mute, list_mutes
from services.incidents import read_recent from services.incidents import read_recent, log_incident
from services.notify import notify from services.notify import notify
@@ -32,6 +32,7 @@ async def _handle_alerts(msg: Message, action: str, args: list[str]):
key = f"test:{level}:{int(time.time())}" key = f"test:{level}:{int(time.time())}"
await notify(bot, msg.chat.id, f"[TEST] {level.upper()} alert", level=level, key=key, category="test") await notify(bot, msg.chat.id, f"[TEST] {level.upper()} alert", level=level, key=key, category="test")
await msg.answer(f"Sent test alert: {level}") await msg.answer(f"Sent test alert: {level}")
log_incident(cfg, f"alert_test level={level} by {msg.from_user.id}")
return return
if action == "mute": if action == "mute":
@@ -48,6 +49,7 @@ async def _handle_alerts(msg: Message, action: str, args: list[str]):
until = set_mute(category, minutes * 60) until = set_mute(category, minutes * 60)
dt = datetime.fromtimestamp(until, tz=timezone.utc).astimezone() dt = datetime.fromtimestamp(until, tz=timezone.utc).astimezone()
await msg.answer(f"🔕 Muted {category} for {minutes}m (until {dt:%Y-%m-%d %H:%M:%S})") await msg.answer(f"🔕 Muted {category} for {minutes}m (until {dt:%Y-%m-%d %H:%M:%S})")
log_incident(cfg, f"alert_mute category={category} minutes={minutes} by {msg.from_user.id}")
return return
if action == "unmute": if action == "unmute":
@@ -57,6 +59,7 @@ async def _handle_alerts(msg: Message, action: str, args: list[str]):
category = args[0].lower() category = args[0].lower()
clear_mute(category) clear_mute(category)
await msg.answer(f"🔔 Unmuted {category}") await msg.answer(f"🔔 Unmuted {category}")
log_incident(cfg, f"alert_unmute category={category} by {msg.from_user.id}")
return return
if action in ("list", "mutes"): if action in ("list", "mutes"):

View File

@@ -38,13 +38,29 @@ def _sudo_cmd(cmd: list[str]) -> list[str]:
def _format_backup_result(rc: int, out: str) -> str: def _format_backup_result(rc: int, out: str) -> str:
log_hint = "log: /var/log/backup-auto.log" log_path = "/var/log/backup-auto.log"
header = "✅ Backup finished" if rc == 0 else "❌ Backup failed" header = "✅ Backup finished" if rc == 0 else "❌ Backup failed"
lines = out.strip().splitlines() lines = out.strip().splitlines()
body = "\n".join(lines[:20]) body = "\n".join(lines[:20])
if len(lines) > 20: if len(lines) > 20:
body += f"\n… trimmed {len(lines) - 20} lines" body += f"\n… trimmed {len(lines) - 20} lines"
return f"{header} (rc={rc})\n{log_hint}\n\n{body}" if body else f"{header} (rc={rc})\n{log_hint}" extra = ""
if rc != 0 and os.path.exists(log_path):
try:
tail = ""
with open(log_path, "r", encoding="utf-8", errors="replace") as f:
tail_lines = f.readlines()[-40:]
tail = "".join(tail_lines).strip()
if tail:
extra = "\n\nLog tail:\n" + tail
except Exception:
pass
base = f"{header} (rc={rc})\nlog: {log_path}"
if body:
base += "\n\n" + body
if extra:
base += extra
return base
def _load_json(raw: str, label: str) -> tuple[bool, object | None, str]: def _load_json(raw: str, label: str) -> tuple[bool, object | None, str]:
@@ -231,6 +247,11 @@ async def cmd_backup_now(msg: Message):
pos = await enqueue("backup", job) pos = await enqueue("backup", job)
await msg.answer(f"🕓 Backup queued (#{pos})", reply_markup=backup_kb) await msg.answer(f"🕓 Backup queued (#{pos})", reply_markup=backup_kb)
try:
from services.incidents import log_incident
log_incident(cfg, f"backup_queued by {msg.from_user.id}")
except Exception:
pass
async def cmd_last_snapshot(msg: Message): async def cmd_last_snapshot(msg: Message):

View File

@@ -4,8 +4,10 @@ from app import dp
from auth import is_admin_msg from auth import is_admin_msg
from keyboards import docker_kb, docker_inline_kb from keyboards import docker_kb, docker_inline_kb
from services.docker import container_uptime, docker_cmd from services.docker import container_uptime, docker_cmd
from services.incidents import log_incident
from state import DOCKER_MAP, LOG_FILTER_PENDING from state import DOCKER_MAP, LOG_FILTER_PENDING
import time import time
import json
async def cmd_docker_status(msg: Message): async def cmd_docker_status(msg: Message):
@@ -42,6 +44,7 @@ async def cmd_docker_status(msg: Message):
lines.append(f"{icon} {alias}: {status} ({up})") lines.append(f"{icon} {alias}: {status} ({up})")
await msg.answer("\n".join(lines), reply_markup=docker_kb) await msg.answer("\n".join(lines), reply_markup=docker_kb)
log_incident(cfg, f"docker_status by {msg.from_user.id}")
except Exception as e: except Exception as e:
# ⬅️ КРИТИЧЕСКИ ВАЖНО # ⬅️ КРИТИЧЕСКИ ВАЖНО
@@ -83,6 +86,45 @@ async def ds_cmd(msg: Message):
await cmd_docker_status(msg) await cmd_docker_status(msg)
@dp.message(F.text.startswith("/docker_health"))
async def docker_health(msg: Message):
if not is_admin_msg(msg):
return
parts = msg.text.split()
if len(parts) < 2:
await msg.answer("Usage: /docker_health <alias>")
return
alias = parts[1]
real = DOCKER_MAP.get(alias)
if not real:
await msg.answer(f"⚠️ Unknown container: {alias}", reply_markup=docker_kb)
return
rc, out = await docker_cmd(["inspect", "-f", "{{json .State.Health}}", real], timeout=10)
if rc != 0 or not out.strip():
await msg.answer(f"⚠️ Failed to get health for {alias}", reply_markup=docker_kb)
return
try:
data = json.loads(out)
except json.JSONDecodeError:
await msg.answer(f"⚠️ Invalid health JSON for {alias}", reply_markup=docker_kb)
return
status = data.get("Status", "n/a")
fail = data.get("FailingStreak", "n/a")
logs = data.get("Log") or []
lines = [f"🐳 {alias} health", f"Status: {status}", f"Failing streak: {fail}"]
if logs:
lines.append("Recent logs:")
for entry in logs[-5:]:
if not isinstance(entry, dict):
continue
ts = entry.get("Start") or entry.get("End") or ""
exitc = entry.get("ExitCode", "")
out_line = entry.get("Output", "").strip()
lines.append(f"- {ts} rc={exitc} {out_line}")
await msg.answer("\n".join(lines), reply_markup=docker_kb)
log_incident(cfg, f"docker_health alias={alias} by {msg.from_user.id}")
@dp.message(F.text == "📈 Stats") @dp.message(F.text == "📈 Stats")
async def dstats(msg: Message): async def dstats(msg: Message):
if not is_admin_msg(msg): if not is_admin_msg(msg):

View File

@@ -219,6 +219,38 @@ async def openwrt_cmd(msg: Message):
await openwrt_status(msg) await openwrt_status(msg)
@dp.message(F.text == "/openwrt_wan")
async def openwrt_wan(msg: Message):
if not is_admin_msg(msg):
return
await msg.answer("⏳ Checking OpenWrt WAN…", reply_markup=system_info_kb)
async def worker():
try:
text = await get_openwrt_status(cfg, mode="wan")
except Exception as e:
text = f"⚠️ OpenWrt error: {e}"
await msg.answer(text, reply_markup=system_info_kb)
asyncio.create_task(worker())
@dp.message(F.text == "/openwrt_clients")
async def openwrt_clients(msg: Message):
if not is_admin_msg(msg):
return
await msg.answer("⏳ Checking OpenWrt clients…", reply_markup=system_info_kb)
async def worker():
try:
text = await get_openwrt_status(cfg, mode="clients")
except Exception as e:
text = f"⚠️ OpenWrt error: {e}"
await msg.answer(text, reply_markup=system_info_kb)
asyncio.create_task(worker())
@dp.message(F.text == "🧾 Audit") @dp.message(F.text == "🧾 Audit")
async def audit_log(msg: Message): async def audit_log(msg: Message):
if not is_admin_msg(msg): if not is_admin_msg(msg):

View File

@@ -2,7 +2,7 @@ import asyncio
import logging import logging
import socket import socket
from datetime import datetime from datetime import datetime
from app import bot, dp, cfg, ADMIN_ID from app import bot, dp, cfg, ADMIN_ID, ADMIN_IDS
from keyboards import menu_kb from keyboards import menu_kb
from services.docker import discover_containers, docker_watchdog from services.docker import discover_containers, docker_watchdog
from services.alerts import monitor_resources, monitor_smart from services.alerts import monitor_resources, monitor_smart
@@ -25,6 +25,7 @@ import handlers.help
import handlers.callbacks import handlers.callbacks
import handlers.arcane import handlers.arcane
import handlers.processes import handlers.processes
from services.weekly_report import weekly_reporter
import handlers.alerts_admin import handlers.alerts_admin
import handlers.config_check import handlers.config_check
@@ -71,6 +72,7 @@ async def main():
state.METRICS_STORE = MetricsStore() state.METRICS_STORE = MetricsStore()
asyncio.create_task(start_sampler(state.METRICS_STORE, interval=5)) asyncio.create_task(start_sampler(state.METRICS_STORE, interval=5))
asyncio.create_task(queue_worker()) asyncio.create_task(queue_worker())
asyncio.create_task(weekly_reporter(cfg, bot, ADMIN_IDS, state.DOCKER_MAP))
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
loop.set_exception_handler(_handle_async_exception) loop.set_exception_handler(_handle_async_exception)
await notify_start() await notify_start()

View File

@@ -308,7 +308,7 @@ def _parse_leases_fallback(raw: str) -> list[str]:
return out return out
async def get_openwrt_status(cfg: dict[str, Any]) -> str: async def get_openwrt_status(cfg: dict[str, Any], mode: str = "full") -> str:
ow_cfg = cfg.get("openwrt", {}) ow_cfg = cfg.get("openwrt", {})
host = ow_cfg.get("host") host = ow_cfg.get("host")
user = ow_cfg.get("user", "root") user = ow_cfg.get("user", "root")
@@ -353,19 +353,11 @@ async def get_openwrt_status(cfg: dict[str, Any]) -> str:
if len(parts) < 4: if len(parts) < 4:
return "⚠️ OpenWrt response incomplete" return "⚠️ OpenWrt response incomplete"
sys_info = None
wan_status = None
wireless = None
leases = None
leases_fallback = ""
sys_info = _safe_json_load(parts[0]) sys_info = _safe_json_load(parts[0])
if sys_info is None:
sys_info = None
wan_status = _safe_json_load(parts[1]) or {} wan_status = _safe_json_load(parts[1]) or {}
wireless = _safe_json_load(parts[2]) or {} wireless = _safe_json_load(parts[2]) or {}
leases = _safe_json_load(parts[3]) leases = _safe_json_load(parts[3])
if leases is None: leases_fallback = "" if leases is not None else parts[3]
leases_fallback = parts[3]
if isinstance(sys_info, dict): if isinstance(sys_info, dict):
uptime_raw = sys_info.get("uptime") uptime_raw = sys_info.get("uptime")
@@ -419,35 +411,40 @@ async def get_openwrt_status(cfg: dict[str, Any]) -> str:
else: else:
leases_list = _parse_leases_fallback(leases_fallback) leases_list = _parse_leases_fallback(leases_fallback)
lines = [ header = [
"📡 OpenWrt", "📡 OpenWrt",
f"🕒 Uptime: {uptime}", f"🕒 Uptime: {uptime}",
f"⚙️ Load: {load}", f"⚙️ Load: {load}",
f"🌐 WAN: {wan_ip} ({wan_state})", f"🌐 WAN: {wan_ip} ({wan_state})",
"", "",
] ]
wifi_section: list[str] = []
if wifi_net_counts: if wifi_net_counts:
lines.append("📶 Wi-Fi networks:") wifi_section.append("📶 Wi-Fi networks:")
for label, count in sorted(wifi_net_counts.items()): for label, count in sorted(wifi_net_counts.items()):
lines.append(f" - {label}: {count}") wifi_section.append(f" - {label}: {count}")
lines.append("") wifi_section.append("")
lines.append(f"📶 Wi-Fi clients: {len(wifi_clients)}") wifi_section.append(f"📶 Wi-Fi clients: {len(wifi_clients)}")
if wifi_clients: if wifi_clients:
for line in wifi_clients[:20]: for line in wifi_clients[:20]:
lines.append(f" - {line}") wifi_section.append(f" - {line}")
if len(wifi_clients) > 20: if len(wifi_clients) > 20:
lines.append(f" … and {len(wifi_clients) - 20} more") wifi_section.append(f" … and {len(wifi_clients) - 20} more")
else: else:
lines.append(" (none)") wifi_section.append(" (none)")
lines += ["", f"🧾 DHCP leases: {len(leases_list)}"] lease_section: list[str] = ["", f"🧾 DHCP leases: {len(leases_list)}"]
if leases_list: if leases_list:
for line in leases_list[:20]: for line in leases_list[:20]:
lines.append(f" - {line}") lease_section.append(f" - {line}")
if len(leases_list) > 20: if len(leases_list) > 20:
lines.append(f" … and {len(leases_list) - 20} more") lease_section.append(f" … and {len(leases_list) - 20} more")
else: else:
lines.append(" (none)") lease_section.append(" (none)")
return "\n".join(lines) if mode == "wan":
return "\n".join(header)
if mode == "clients":
return "\n".join(header + wifi_section)
return "\n".join(header + wifi_section + lease_section)

107
services/weekly_report.py Normal file
View File

@@ -0,0 +1,107 @@
import asyncio
import socket
from datetime import datetime, timedelta
import psutil
from services.system import worst_disk_usage
from services.alert_mute import list_mutes
from services.incidents import read_recent
from services.docker import docker_cmd
def _parse_hhmm(value: str) -> tuple[int, int]:
try:
h, m = value.split(":", 1)
h = int(h)
m = int(m)
if 0 <= h <= 23 and 0 <= m <= 59:
return h, m
except Exception:
pass
return 8, 0
def _next_run(day: str, time_str: str) -> datetime:
day = (day or "Sun").lower()
day_map = {"mon": 0, "tue": 1, "wed": 2, "thu": 3, "fri": 4, "sat": 5, "sun": 6}
target_wd = day_map.get(day[:3], 6)
hour, minute = _parse_hhmm(time_str or "08:00")
now = datetime.now()
candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
# find next target weekday/time
while candidate <= now or candidate.weekday() != target_wd:
candidate = candidate + timedelta(days=1)
candidate = candidate.replace(hour=hour, minute=minute, second=0, microsecond=0)
return candidate
async def _docker_running_counts(docker_map: dict) -> tuple[int, int]:
total = len(docker_map)
running = 0
for real in docker_map.values():
rc, raw = await docker_cmd(["inspect", "-f", "{{.State.Status}}", real], timeout=10)
if rc == 0 and raw.strip() == "running":
running += 1
return running, total
def _format_uptime(seconds: int) -> str:
days, rem = divmod(seconds, 86400)
hours, rem = divmod(rem, 3600)
minutes, _ = divmod(rem, 60)
return f"{days}d {hours:02d}:{minutes:02d}"
async def build_weekly_report(cfg, docker_map: dict) -> str:
host = socket.gethostname()
uptime = int(datetime.now().timestamp() - psutil.boot_time())
load1, load5, load15 = psutil.getloadavg()
mem = psutil.virtual_memory()
disk_usage, disk_mount = worst_disk_usage()
running, total = await _docker_running_counts(docker_map)
mutes = list_mutes()
incidents_24 = len(read_recent(cfg, 24, limit=1000))
incidents_7d = len(read_recent(cfg, 24 * 7, limit=2000))
lines = [
f"🧾 Weekly report — {host}",
f"⏱ Uptime: {_format_uptime(uptime)}",
f"⚙️ Load: {load1:.2f} {load5:.2f} {load15:.2f}",
f"🧠 RAM: {mem.percent}%",
]
if disk_usage is None:
lines.append("💾 Disk: n/a")
else:
lines.append(f"💾 Disk: {disk_usage}% ({disk_mount})")
lines.append(f"🐳 Docker: {running}/{total} running")
lines.append(f"📓 Incidents: 24h={incidents_24}, 7d={incidents_7d}")
if mutes:
lines.append("🔕 Active mutes:")
for cat, secs in mutes.items():
mins = max(0, secs) // 60
lines.append(f"- {cat}: {mins}m left")
else:
lines.append("🔔 Mutes: none")
return "\n".join(lines)
async def weekly_reporter(cfg, bot, admin_ids: list[int], docker_map: dict):
reports_cfg = cfg.get("reports", {}).get("weekly", {})
if not reports_cfg.get("enabled", False):
return
day = reports_cfg.get("day", "Sun")
time_str = reports_cfg.get("time", "08:00")
while True:
target = _next_run(day, time_str)
wait_sec = (target - datetime.now()).total_seconds()
if wait_sec > 0:
await asyncio.sleep(wait_sec)
try:
text = await build_weekly_report(cfg, docker_map)
for admin_id in admin_ids:
await bot.send_message(admin_id, text)
except Exception:
pass
await asyncio.sleep(60) # small delay to avoid tight loop if time skew