Add external checks with uptime

This commit is contained in:
2026-02-08 02:16:42 +03:00
parent aab54d4108
commit 3df9db3bf7
7 changed files with 195 additions and 0 deletions

View File

@@ -43,6 +43,19 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
- `rotate_when` (string): Rotation schedule for `TimedRotatingFileHandler`. Example `W0` for weekly on Monday. - `rotate_when` (string): Rotation schedule for `TimedRotatingFileHandler`. Example `W0` for weekly on Monday.
- `backup_count` (int): How many rotated files to keep. - `backup_count` (int): How many rotated files to keep.
## external_checks
- `enabled` (bool): Enable background checks.
- `state_path` (string): State file for uptime, default `/var/server-bot/external_checks.json`.
- `timeout_sec` (int): Check timeout in seconds.
- `interval_sec` (int): Background check interval.
- `services` (list): List of checks.
- `name` (string): Service name.
- `type` (string): `http`, `tcp`, `ping`.
- `url` (string): URL for `http`.
- `host` (string): Host for `tcp`/`ping`.
- `port` (int): Port for `tcp`.
## arcane ## arcane
- `base_url` (string): Arcane API base url. - `base_url` (string): Arcane API base url.

View File

@@ -43,6 +43,19 @@
- `rotate_when` (string): режим ротации (`TimedRotatingFileHandler`), например `W0`. - `rotate_when` (string): режим ротации (`TimedRotatingFileHandler`), например `W0`.
- `backup_count` (int): сколько файлов хранить. - `backup_count` (int): сколько файлов хранить.
## external_checks
- `enabled` (bool): включить фоновые проверки.
- `state_path` (string): файл состояния для аптайма, по умолчанию `/var/server-bot/external_checks.json`.
- `timeout_sec` (int): таймаут проверки в секундах.
- `interval_sec` (int): интервал фоновых проверок.
- `services` (list): список проверок.
- `name` (string): название сервиса.
- `type` (string): `http`, `tcp`, `ping`.
- `url` (string): URL для `http`.
- `host` (string): хост для `tcp`/`ping`.
- `port` (int): порт для `tcp`.
## arcane ## arcane
- `base_url` (string): base url API Arcane. - `base_url` (string): base url API Arcane.

View File

@@ -35,6 +35,20 @@ incidents:
rotate_when: "W0" rotate_when: "W0"
backup_count: 8 backup_count: 8
external_checks:
enabled: true
state_path: "/var/server-bot/external_checks.json"
timeout_sec: 5
interval_sec: 300
services:
- name: "example-site"
type: "http"
url: "https://example.com"
- name: "example-ssh"
type: "tcp"
host: "example.com"
port: 22
arcane: arcane:
base_url: "http://localhost:3552" base_url: "http://localhost:3552"
api_key: "arc_..." api_key: "arc_..."

View File

@@ -16,6 +16,7 @@ from state import UPDATES_CACHE, REBOOT_PENDING
from services.metrics import summarize from services.metrics import summarize
from services.audit import read_audit_tail from services.audit import read_audit_tail
from services.incidents import read_recent, incidents_path from services.incidents import read_recent, incidents_path
from services.external_checks import format_report
@dp.message(F.text == "💽 Disks") @dp.message(F.text == "💽 Disks")
@@ -197,6 +198,13 @@ async def audit_log(msg: Message):
await msg.answer(text, reply_markup=system_logs_kb, parse_mode="Markdown") await msg.answer(text, reply_markup=system_logs_kb, parse_mode="Markdown")
@dp.message(F.text == "🌍 External")
async def external_checks(msg: Message):
if not is_admin_msg(msg):
return
await msg.answer(format_report(cfg), reply_markup=system_logs_kb)
@dp.message(F.text == "📣 Incidents") @dp.message(F.text == "📣 Incidents")
async def incidents(msg: Message): async def incidents(msg: Message):
if not is_admin_msg(msg): if not is_admin_msg(msg):

View File

@@ -84,6 +84,7 @@ system_logs_kb = ReplyKeyboardMarkup(
[KeyboardButton(text="🧾 Audit"), KeyboardButton(text="📣 Incidents")], [KeyboardButton(text="🧾 Audit"), KeyboardButton(text="📣 Incidents")],
[KeyboardButton(text="🧰 Processes"), KeyboardButton(text="🔒 SSL")], [KeyboardButton(text="🧰 Processes"), KeyboardButton(text="🔒 SSL")],
[KeyboardButton(text="🌐 URLs"), KeyboardButton(text="🔑 SSH log")], [KeyboardButton(text="🌐 URLs"), KeyboardButton(text="🔑 SSH log")],
[KeyboardButton(text="🌍 External")],
[KeyboardButton(text="⬅️ System")], [KeyboardButton(text="⬅️ System")],
], ],
resize_keyboard=True, resize_keyboard=True,

View File

@@ -10,6 +10,7 @@ from services.queue import worker as queue_worker
from services.notify import notify from services.notify import notify
from services.audit import AuditMiddleware, audit_start from services.audit import AuditMiddleware, audit_start
from services.ssl_alerts import monitor_ssl from services.ssl_alerts import monitor_ssl
from services.external_checks import monitor_external
import state import state
import handlers.menu import handlers.menu
import handlers.status import handlers.status
@@ -45,6 +46,8 @@ async def main():
asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID)) asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID))
if cfg.get("npmplus", {}).get("alerts", {}).get("enabled", True): if cfg.get("npmplus", {}).get("alerts", {}).get("enabled", True):
asyncio.create_task(monitor_ssl(cfg, notify, bot, ADMIN_ID)) asyncio.create_task(monitor_ssl(cfg, notify, bot, ADMIN_ID))
if cfg.get("external_checks", {}).get("enabled", True):
asyncio.create_task(monitor_external(cfg))
state.METRICS_STORE = MetricsStore() state.METRICS_STORE = MetricsStore()
asyncio.create_task(start_sampler(state.METRICS_STORE, interval=5)) asyncio.create_task(start_sampler(state.METRICS_STORE, interval=5))
asyncio.create_task(queue_worker()) asyncio.create_task(queue_worker())

143
services/external_checks.py Normal file
View File

@@ -0,0 +1,143 @@
import asyncio
import json
import os
import socket
import time
from datetime import datetime, timezone
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
def _state_path(cfg: dict[str, Any]) -> str:
return cfg.get("external_checks", {}).get("state_path", "/var/server-bot/external_checks.json")
def _load_state(cfg: dict[str, Any]) -> dict[str, Any]:
path = _state_path(cfg)
if not os.path.exists(path):
return {"services": {}, "total_checks": 0, "ok_checks": 0}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {"services": {}, "total_checks": 0, "ok_checks": 0}
def _save_state(cfg: dict[str, Any], state: dict[str, Any]) -> None:
path = _state_path(cfg)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(state, f, ensure_ascii=False, indent=2)
def _check_http(url: str, timeout: int) -> tuple[bool, str]:
req = Request(url, headers={"User-Agent": "tg-admin-bot"})
try:
with urlopen(req, timeout=timeout) as resp:
status = int(resp.status)
return status < 400, f"HTTP {status}"
except HTTPError as e:
return False, f"HTTP {int(e.code)}"
except URLError as e:
return False, str(e.reason)
except Exception as e:
return False, str(e)
def _check_tcp(host: str, port: int, timeout: int) -> tuple[bool, str]:
try:
with socket.create_connection((host, port), timeout=timeout):
return True, "TCP ok"
except Exception as e:
return False, str(e)
def _check_ping(host: str, timeout: int) -> tuple[bool, str]:
try:
socket.gethostbyname(host)
return True, "DNS ok"
except Exception:
pass
return _check_tcp(host, 80, timeout)
def run_checks(cfg: dict[str, Any]) -> dict[str, Any]:
checks_cfg = cfg.get("external_checks", {})
services = checks_cfg.get("services", [])
timeout = int(checks_cfg.get("timeout_sec", 5))
state = _load_state(cfg)
services_state = state.setdefault("services", {})
results = []
for entry in services:
name = entry.get("name") or "unknown"
check_type = entry.get("type", "http")
ok = False
detail = "n/a"
if check_type == "http":
url = entry.get("url")
if url:
ok, detail = _check_http(url, timeout)
elif check_type == "tcp":
host = entry.get("host")
port = int(entry.get("port", 0))
if host and port:
ok, detail = _check_tcp(host, port, timeout)
elif check_type == "ping":
host = entry.get("host")
if host:
ok, detail = _check_ping(host, timeout)
service_state = services_state.setdefault(name, {"ok": 0, "total": 0})
service_state["total"] += 1
if ok:
service_state["ok"] += 1
state["total_checks"] = state.get("total_checks", 0) + 1
if ok:
state["ok_checks"] = state.get("ok_checks", 0) + 1
results.append({"name": name, "ok": ok, "detail": detail})
_save_state(cfg, state)
return {"results": results, "state": state}
def format_report(cfg: dict[str, Any]) -> str:
checks_cfg = cfg.get("external_checks", {})
services = checks_cfg.get("services", [])
if not services:
return "🌍 External checks\n\n No services configured"
data = run_checks(cfg)
results = data["results"]
state = data["state"]
total = state.get("total_checks", 0) or 1
ok_total = state.get("ok_checks", 0)
uptime = 100.0 * ok_total / total
lines = ["🌍 External checks", ""]
for item in results:
icon = "🟢" if item["ok"] else "🔴"
lines.append(f"{icon} {item['name']}: {item['detail']}")
lines.append("")
lines.append(f"📈 Uptime (global): {uptime:.2f}%")
lines.append(f"🕒 {datetime.now(timezone.utc):%Y-%m-%d %H:%M UTC}")
return "\n".join(lines)
async def monitor_external(cfg: dict[str, Any]):
checks_cfg = cfg.get("external_checks", {})
if not checks_cfg.get("enabled", True):
return
interval = int(checks_cfg.get("interval_sec", 300))
while True:
run_checks(cfg)
await asyncio.sleep(interval)