diff --git a/CONFIG.en.md b/CONFIG.en.md index df9d7a1..28995e1 100644 --- a/CONFIG.en.md +++ b/CONFIG.en.md @@ -43,6 +43,19 @@ This project uses `config.yaml`. Start from `config.example.yaml`. - `rotate_when` (string): Rotation schedule for `TimedRotatingFileHandler`. Example `W0` for weekly on Monday. - `backup_count` (int): How many rotated files to keep. +## external_checks + +- `enabled` (bool): Enable background checks. +- `state_path` (string): State file for uptime, default `/var/server-bot/external_checks.json`. +- `timeout_sec` (int): Check timeout in seconds. +- `interval_sec` (int): Background check interval. +- `services` (list): List of checks. + - `name` (string): Service name. + - `type` (string): `http`, `tcp`, `ping`. + - `url` (string): URL for `http`. + - `host` (string): Host for `tcp`/`ping`. + - `port` (int): Port for `tcp`. + ## arcane - `base_url` (string): Arcane API base url. diff --git a/CONFIG.md b/CONFIG.md index 3d10fb8..a6aa387 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -43,6 +43,19 @@ - `rotate_when` (string): режим ротации (`TimedRotatingFileHandler`), например `W0`. - `backup_count` (int): сколько файлов хранить. +## external_checks + +- `enabled` (bool): включить фоновые проверки. +- `state_path` (string): файл состояния для аптайма, по умолчанию `/var/server-bot/external_checks.json`. +- `timeout_sec` (int): таймаут проверки в секундах. +- `interval_sec` (int): интервал фоновых проверок. +- `services` (list): список проверок. + - `name` (string): название сервиса. + - `type` (string): `http`, `tcp`, `ping`. + - `url` (string): URL для `http`. + - `host` (string): хост для `tcp`/`ping`. + - `port` (int): порт для `tcp`. + ## arcane - `base_url` (string): base url API Arcane. diff --git a/config.example.yaml b/config.example.yaml index b3fe558..90e21ad 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -35,6 +35,20 @@ incidents: rotate_when: "W0" backup_count: 8 +external_checks: + enabled: true + state_path: "/var/server-bot/external_checks.json" + timeout_sec: 5 + interval_sec: 300 + services: + - name: "example-site" + type: "http" + url: "https://example.com" + - name: "example-ssh" + type: "tcp" + host: "example.com" + port: 22 + arcane: base_url: "http://localhost:3552" api_key: "arc_..." diff --git a/handlers/system.py b/handlers/system.py index 3cafef4..341794f 100644 --- a/handlers/system.py +++ b/handlers/system.py @@ -16,6 +16,7 @@ from state import UPDATES_CACHE, REBOOT_PENDING from services.metrics import summarize from services.audit import read_audit_tail from services.incidents import read_recent, incidents_path +from services.external_checks import format_report @dp.message(F.text == "💽 Disks") @@ -197,6 +198,13 @@ async def audit_log(msg: Message): await msg.answer(text, reply_markup=system_logs_kb, parse_mode="Markdown") +@dp.message(F.text == "🌍 External") +async def external_checks(msg: Message): + if not is_admin_msg(msg): + return + await msg.answer(format_report(cfg), reply_markup=system_logs_kb) + + @dp.message(F.text == "📣 Incidents") async def incidents(msg: Message): if not is_admin_msg(msg): diff --git a/keyboards.py b/keyboards.py index 54c3b8d..26228a8 100644 --- a/keyboards.py +++ b/keyboards.py @@ -84,6 +84,7 @@ system_logs_kb = ReplyKeyboardMarkup( [KeyboardButton(text="🧾 Audit"), KeyboardButton(text="📣 Incidents")], [KeyboardButton(text="🧰 Processes"), KeyboardButton(text="🔒 SSL")], [KeyboardButton(text="🌐 URLs"), KeyboardButton(text="🔑 SSH log")], + [KeyboardButton(text="🌍 External")], [KeyboardButton(text="⬅️ System")], ], resize_keyboard=True, diff --git a/main.py b/main.py index 4224144..5d2a23d 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,7 @@ from services.queue import worker as queue_worker from services.notify import notify from services.audit import AuditMiddleware, audit_start from services.ssl_alerts import monitor_ssl +from services.external_checks import monitor_external import state import handlers.menu import handlers.status @@ -45,6 +46,8 @@ async def main(): asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID)) if cfg.get("npmplus", {}).get("alerts", {}).get("enabled", True): asyncio.create_task(monitor_ssl(cfg, notify, bot, ADMIN_ID)) + if cfg.get("external_checks", {}).get("enabled", True): + asyncio.create_task(monitor_external(cfg)) state.METRICS_STORE = MetricsStore() asyncio.create_task(start_sampler(state.METRICS_STORE, interval=5)) asyncio.create_task(queue_worker()) diff --git a/services/external_checks.py b/services/external_checks.py new file mode 100644 index 0000000..fa3fca5 --- /dev/null +++ b/services/external_checks.py @@ -0,0 +1,143 @@ +import asyncio +import json +import os +import socket +import time +from datetime import datetime, timezone +from typing import Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + + +def _state_path(cfg: dict[str, Any]) -> str: + return cfg.get("external_checks", {}).get("state_path", "/var/server-bot/external_checks.json") + + +def _load_state(cfg: dict[str, Any]) -> dict[str, Any]: + path = _state_path(cfg) + if not os.path.exists(path): + return {"services": {}, "total_checks": 0, "ok_checks": 0} + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return {"services": {}, "total_checks": 0, "ok_checks": 0} + + +def _save_state(cfg: dict[str, Any], state: dict[str, Any]) -> None: + path = _state_path(cfg) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(state, f, ensure_ascii=False, indent=2) + + +def _check_http(url: str, timeout: int) -> tuple[bool, str]: + req = Request(url, headers={"User-Agent": "tg-admin-bot"}) + try: + with urlopen(req, timeout=timeout) as resp: + status = int(resp.status) + return status < 400, f"HTTP {status}" + except HTTPError as e: + return False, f"HTTP {int(e.code)}" + except URLError as e: + return False, str(e.reason) + except Exception as e: + return False, str(e) + + +def _check_tcp(host: str, port: int, timeout: int) -> tuple[bool, str]: + try: + with socket.create_connection((host, port), timeout=timeout): + return True, "TCP ok" + except Exception as e: + return False, str(e) + + +def _check_ping(host: str, timeout: int) -> tuple[bool, str]: + try: + socket.gethostbyname(host) + return True, "DNS ok" + except Exception: + pass + return _check_tcp(host, 80, timeout) + + +def run_checks(cfg: dict[str, Any]) -> dict[str, Any]: + checks_cfg = cfg.get("external_checks", {}) + services = checks_cfg.get("services", []) + timeout = int(checks_cfg.get("timeout_sec", 5)) + + state = _load_state(cfg) + services_state = state.setdefault("services", {}) + + results = [] + for entry in services: + name = entry.get("name") or "unknown" + check_type = entry.get("type", "http") + ok = False + detail = "n/a" + + if check_type == "http": + url = entry.get("url") + if url: + ok, detail = _check_http(url, timeout) + elif check_type == "tcp": + host = entry.get("host") + port = int(entry.get("port", 0)) + if host and port: + ok, detail = _check_tcp(host, port, timeout) + elif check_type == "ping": + host = entry.get("host") + if host: + ok, detail = _check_ping(host, timeout) + + service_state = services_state.setdefault(name, {"ok": 0, "total": 0}) + service_state["total"] += 1 + if ok: + service_state["ok"] += 1 + + state["total_checks"] = state.get("total_checks", 0) + 1 + if ok: + state["ok_checks"] = state.get("ok_checks", 0) + 1 + + results.append({"name": name, "ok": ok, "detail": detail}) + + _save_state(cfg, state) + return {"results": results, "state": state} + + +def format_report(cfg: dict[str, Any]) -> str: + checks_cfg = cfg.get("external_checks", {}) + services = checks_cfg.get("services", []) + if not services: + return "🌍 External checks\n\nℹ️ No services configured" + + data = run_checks(cfg) + results = data["results"] + state = data["state"] + + total = state.get("total_checks", 0) or 1 + ok_total = state.get("ok_checks", 0) + uptime = 100.0 * ok_total / total + + lines = ["🌍 External checks", ""] + for item in results: + icon = "🟢" if item["ok"] else "🔴" + lines.append(f"{icon} {item['name']}: {item['detail']}") + + lines.append("") + lines.append(f"📈 Uptime (global): {uptime:.2f}%") + + lines.append(f"🕒 {datetime.now(timezone.utc):%Y-%m-%d %H:%M UTC}") + return "\n".join(lines) + + +async def monitor_external(cfg: dict[str, Any]): + checks_cfg = cfg.get("external_checks", {}) + if not checks_cfg.get("enabled", True): + return + interval = int(checks_cfg.get("interval_sec", 300)) + + while True: + run_checks(cfg) + await asyncio.sleep(interval)