Compare commits
5 Commits
7c56430f32
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b84107463c | |||
| ee361abb99 | |||
| 2ad423fb6a | |||
| efa5dd9644 | |||
| 678332e6d0 |
@@ -33,7 +33,7 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
|
|||||||
- `end` (string): End time `HH:MM` (e.g. `08:00`).
|
- `end` (string): End time `HH:MM` (e.g. `08:00`).
|
||||||
- `allow_critical` (bool): Allow critical alerts during quiet hours.
|
- `allow_critical` (bool): Allow critical alerts during quiet hours.
|
||||||
- `auto_mute` (list): Per-category auto mutes by time window.
|
- `auto_mute` (list): Per-category auto mutes by time window.
|
||||||
- `category` (string): load/disk/smart/ssl/docker/test.
|
- `category` (string): load/disk/smart/raid/ssl/docker/test.
|
||||||
- `start` (string): Start `HH:MM`.
|
- `start` (string): Start `HH:MM`.
|
||||||
- `end` (string): End `HH:MM` (can wrap over midnight).
|
- `end` (string): End `HH:MM` (can wrap over midnight).
|
||||||
- `auto_mute_on_high_load_sec` (int): auto-mute `load` category for N seconds on critical load (0 disables).
|
- `auto_mute_on_high_load_sec` (int): auto-mute `load` category for N seconds on critical load (0 disables).
|
||||||
@@ -42,6 +42,9 @@ This project uses `config.yaml`. Start from `config.example.yaml`.
|
|||||||
- `smart_interval_sec` (int): SMART poll interval.
|
- `smart_interval_sec` (int): SMART poll interval.
|
||||||
- `smart_cooldown_sec` (int): SMART alert cooldown.
|
- `smart_cooldown_sec` (int): SMART alert cooldown.
|
||||||
- `smart_temp_warn` (int): SMART temperature warning (C).
|
- `smart_temp_warn` (int): SMART temperature warning (C).
|
||||||
|
- `raid_enabled` (bool): Enable md RAID polling (`/proc/mdstat`).
|
||||||
|
- `raid_interval_sec` (int): RAID poll interval.
|
||||||
|
- `raid_cooldown_sec` (int): RAID alert cooldown.
|
||||||
|
|
||||||
## disk_report
|
## disk_report
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@
|
|||||||
- `end` (string): конец, формат `HH:MM` (например `08:00`).
|
- `end` (string): конец, формат `HH:MM` (например `08:00`).
|
||||||
- `allow_critical` (bool): слать критичные алерты в тишину.
|
- `allow_critical` (bool): слать критичные алерты в тишину.
|
||||||
- `auto_mute` (list): авто‑мьюты по категориям и времени.
|
- `auto_mute` (list): авто‑мьюты по категориям и времени.
|
||||||
- `category` (string): load/disk/smart/ssl/docker/test.
|
- `category` (string): load/disk/smart/raid/ssl/docker/test.
|
||||||
- `start` (string): начало `HH:MM`.
|
- `start` (string): начало `HH:MM`.
|
||||||
- `end` (string): конец `HH:MM` (интервал может пересекать ночь).
|
- `end` (string): конец `HH:MM` (интервал может пересекать ночь).
|
||||||
- `auto_mute_on_high_load_sec` (int): при critical load автоматически мьютить категорию `load` на N секунд (0 — выкл).
|
- `auto_mute_on_high_load_sec` (int): при critical load автоматически мьютить категорию `load` на N секунд (0 — выкл).
|
||||||
@@ -42,6 +42,9 @@
|
|||||||
- `smart_interval_sec` (int): интервал SMART.
|
- `smart_interval_sec` (int): интервал SMART.
|
||||||
- `smart_cooldown_sec` (int): кулдаун SMART.
|
- `smart_cooldown_sec` (int): кулдаун SMART.
|
||||||
- `smart_temp_warn` (int): порог температуры (C).
|
- `smart_temp_warn` (int): порог температуры (C).
|
||||||
|
- `raid_enabled` (bool): RAID проверки (`/proc/mdstat`).
|
||||||
|
- `raid_interval_sec` (int): интервал RAID.
|
||||||
|
- `raid_cooldown_sec` (int): кулдаун RAID алертов.
|
||||||
|
|
||||||
## disk_report
|
## disk_report
|
||||||
|
|
||||||
|
|||||||
@@ -43,6 +43,9 @@ alerts:
|
|||||||
smart_interval_sec: 3600
|
smart_interval_sec: 3600
|
||||||
smart_cooldown_sec: 21600
|
smart_cooldown_sec: 21600
|
||||||
smart_temp_warn: 50
|
smart_temp_warn: 50
|
||||||
|
raid_enabled: true
|
||||||
|
raid_interval_sec: 300
|
||||||
|
raid_cooldown_sec: 1800
|
||||||
|
|
||||||
disk_report:
|
disk_report:
|
||||||
threshold: 90
|
threshold: 90
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ HELP_TEXT = (
|
|||||||
"/alerts unmute <category> - unmute category\n"
|
"/alerts unmute <category> - unmute category\n"
|
||||||
"/alerts list - show active mutes\n"
|
"/alerts list - show active mutes\n"
|
||||||
"/alerts recent [hours] - show incidents log (default 24h)\n"
|
"/alerts recent [hours] - show incidents log (default 24h)\n"
|
||||||
"Categories: load, disk, smart, ssl, docker, test\n"
|
"Categories: load, disk, smart, raid, ssl, docker, test\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ async def snapshot_details(cb: CallbackQuery):
|
|||||||
snap_id = cb.data.split(":", 1)[1]
|
snap_id = cb.data.split(":", 1)[1]
|
||||||
await cb.answer("Loading snapshot…")
|
await cb.answer("Loading snapshot…")
|
||||||
|
|
||||||
# получаем статистику snapshot
|
# получаем статистику snapshot
|
||||||
rc, raw = await run_cmd(
|
rc, raw = await run_cmd(
|
||||||
["restic", "stats", snap_id, "--json"],
|
["restic", "stats", snap_id, "--json"],
|
||||||
use_restic_env=True,
|
use_restic_env=True,
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ HELP_PAGES = [
|
|||||||
"• `/alerts mute <cat> <minutes>` / `/alerts unmute <cat>` / `/alerts list`\n"
|
"• `/alerts mute <cat> <minutes>` / `/alerts unmute <cat>` / `/alerts list`\n"
|
||||||
"• `/alerts recent [hours]`\n"
|
"• `/alerts recent [hours]`\n"
|
||||||
"Шорткаты: `/alerts_list`, `/alerts_recent`, `/alerts_mute_load` (60м).\n"
|
"Шорткаты: `/alerts_list`, `/alerts_recent`, `/alerts_mute_load` (60м).\n"
|
||||||
"Категории: load, disk, smart, ssl, docker, test.\n"
|
"Категории: load, disk, smart, raid, ssl, docker, test.\n"
|
||||||
"Quiet hours: `alerts.quiet_hours` для не‑критичных.\n"
|
"Quiet hours: `alerts.quiet_hours` для не‑критичных.\n"
|
||||||
"Авто-мьют: `alerts.auto_mute` со слотами времени.\n"
|
"Авто-мьют: `alerts.auto_mute` со слотами времени.\n"
|
||||||
"Только красные load: `alerts.load_only_critical: true`.\n"
|
"Только красные load: `alerts.load_only_critical: true`.\n"
|
||||||
|
|||||||
4
main.py
4
main.py
@@ -5,7 +5,7 @@ from datetime import datetime
|
|||||||
from app import bot, dp, cfg, ADMIN_ID, ADMIN_IDS
|
from app import bot, dp, cfg, ADMIN_ID, ADMIN_IDS
|
||||||
from keyboards import menu_kb
|
from keyboards import menu_kb
|
||||||
from services.docker import discover_containers, docker_watchdog
|
from services.docker import discover_containers, docker_watchdog
|
||||||
from services.alerts import monitor_resources, monitor_smart
|
from services.alerts import monitor_resources, monitor_smart, monitor_raid
|
||||||
from services.metrics import MetricsStore, start_sampler
|
from services.metrics import MetricsStore, start_sampler
|
||||||
from services.queue import worker as queue_worker, configure as queue_configure
|
from services.queue import worker as queue_worker, configure as queue_configure
|
||||||
from services.notify import notify
|
from services.notify import notify
|
||||||
@@ -82,6 +82,8 @@ async def main():
|
|||||||
asyncio.create_task(monitor_resources(cfg, notify, bot, ADMIN_ID))
|
asyncio.create_task(monitor_resources(cfg, notify, bot, ADMIN_ID))
|
||||||
if cfg.get("alerts", {}).get("smart_enabled", True):
|
if cfg.get("alerts", {}).get("smart_enabled", True):
|
||||||
asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID))
|
asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID))
|
||||||
|
if cfg.get("alerts", {}).get("raid_enabled", True):
|
||||||
|
asyncio.create_task(monitor_raid(cfg, notify, bot, ADMIN_ID))
|
||||||
if cfg.get("npmplus", {}).get("alerts", {}).get("enabled", True):
|
if cfg.get("npmplus", {}).get("alerts", {}).get("enabled", True):
|
||||||
asyncio.create_task(monitor_ssl(cfg, notify, bot, ADMIN_ID))
|
asyncio.create_task(monitor_ssl(cfg, notify, bot, ADMIN_ID))
|
||||||
if cfg.get("external_checks", {}).get("enabled", True):
|
if cfg.get("external_checks", {}).get("enabled", True):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
import psutil
|
import psutil
|
||||||
from system_checks import list_disks, smart_health, disk_temperature
|
from system_checks import list_disks, smart_health, disk_temperature, list_md_arrays, md_array_status
|
||||||
from services.system import worst_disk_usage
|
from services.system import worst_disk_usage
|
||||||
from services.disk_report import build_disk_report
|
from services.disk_report import build_disk_report
|
||||||
|
|
||||||
@@ -130,3 +130,54 @@ async def monitor_smart(cfg, notify, bot, chat_id):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
await asyncio.sleep(interval)
|
await asyncio.sleep(interval)
|
||||||
|
|
||||||
|
|
||||||
|
async def monitor_raid(cfg, notify, bot, chat_id):
|
||||||
|
alerts_cfg = cfg.get("alerts", {})
|
||||||
|
interval = int(alerts_cfg.get("raid_interval_sec", 300))
|
||||||
|
cooldown = int(alerts_cfg.get("raid_cooldown_sec", 1800))
|
||||||
|
notify_recovery = bool(alerts_cfg.get("notify_recovery", True))
|
||||||
|
|
||||||
|
last_sent: dict[str, float] = {}
|
||||||
|
bad_state: dict[str, bool] = {}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
now = time.time()
|
||||||
|
for dev in list_md_arrays():
|
||||||
|
status = md_array_status(dev)
|
||||||
|
lower = status.lower()
|
||||||
|
level = None
|
||||||
|
key_suffix = None
|
||||||
|
if "inactive" in lower:
|
||||||
|
level = "critical"
|
||||||
|
key_suffix = "inactive"
|
||||||
|
elif "degraded" in lower:
|
||||||
|
level = "warn"
|
||||||
|
key_suffix = "degraded"
|
||||||
|
|
||||||
|
if level:
|
||||||
|
if not bad_state.get(dev) or (now - last_sent.get(dev, 0.0) >= cooldown):
|
||||||
|
icon = "🔴" if level == "critical" else "🟡"
|
||||||
|
await notify(
|
||||||
|
bot,
|
||||||
|
chat_id,
|
||||||
|
f"{icon} RAID {dev}: {status}",
|
||||||
|
level=level,
|
||||||
|
key=f"raid_{key_suffix}:{dev}",
|
||||||
|
category="raid",
|
||||||
|
)
|
||||||
|
last_sent[dev] = now
|
||||||
|
bad_state[dev] = True
|
||||||
|
else:
|
||||||
|
if bad_state.get(dev) and notify_recovery:
|
||||||
|
await notify(
|
||||||
|
bot,
|
||||||
|
chat_id,
|
||||||
|
f"🟢 RAID {dev}: {status}",
|
||||||
|
level="info",
|
||||||
|
key=f"raid_ok:{dev}",
|
||||||
|
category="raid",
|
||||||
|
)
|
||||||
|
bad_state[dev] = False
|
||||||
|
|
||||||
|
await asyncio.sleep(interval)
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ def _npm_api_base(cfg) -> str | None:
|
|||||||
|
|
||||||
|
|
||||||
def health(cfg, container_map: dict | None = None) -> str:
|
def health(cfg, container_map: dict | None = None) -> str:
|
||||||
lines = ["рџ©є Health check\n"]
|
lines = ["🩺 Health check\n"]
|
||||||
thresholds = cfg.get("thresholds", {})
|
thresholds = cfg.get("thresholds", {})
|
||||||
disk_warn = int(thresholds.get("disk_warn", 80))
|
disk_warn = int(thresholds.get("disk_warn", 80))
|
||||||
load_warn = float(thresholds.get("load_warn", 2.0))
|
load_warn = float(thresholds.get("load_warn", 2.0))
|
||||||
@@ -45,9 +45,9 @@ def health(cfg, container_map: dict | None = None) -> str:
|
|||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env.update(RESTIC_ENV)
|
env.update(RESTIC_ENV)
|
||||||
subprocess.check_output(["restic", "snapshots"], timeout=10, env=env)
|
subprocess.check_output(["restic", "snapshots"], timeout=10, env=env)
|
||||||
lines.append("рџџў Backup repo reachable")
|
lines.append("🟢 Backup repo reachable")
|
||||||
except Exception:
|
except Exception:
|
||||||
lines.append("🔴 Backup repo unreachable")
|
lines.append("🔴 Backup repo unreachable")
|
||||||
|
|
||||||
containers = container_map if container_map is not None else _containers_from_cfg(cfg)
|
containers = container_map if container_map is not None else _containers_from_cfg(cfg)
|
||||||
for alias, real in containers.items():
|
for alias, real in containers.items():
|
||||||
@@ -55,20 +55,20 @@ def health(cfg, container_map: dict | None = None) -> str:
|
|||||||
f"docker inspect -f '{{{{.State.Status}}}}' {real}"
|
f"docker inspect -f '{{{{.State.Status}}}}' {real}"
|
||||||
)
|
)
|
||||||
if out.strip() != "running":
|
if out.strip() != "running":
|
||||||
lines.append(f"🔴 {alias} down")
|
lines.append(f"🔴 {alias} down")
|
||||||
else:
|
else:
|
||||||
lines.append(f"рџџў {alias} OK")
|
lines.append(f"🟢 {alias} OK")
|
||||||
|
|
||||||
npm_cfg = cfg.get("npmplus", {})
|
npm_cfg = cfg.get("npmplus", {})
|
||||||
npm_base = _npm_api_base(cfg)
|
npm_base = _npm_api_base(cfg)
|
||||||
if npm_base:
|
if npm_base:
|
||||||
npm_status = _request_status(npm_base, npm_cfg.get("verify_tls", True))
|
npm_status = _request_status(npm_base, npm_cfg.get("verify_tls", True))
|
||||||
if npm_status == 200:
|
if npm_status == 200:
|
||||||
lines.append("рџџў NPMplus API OK")
|
lines.append("🟢 NPMplus API OK")
|
||||||
elif npm_status is None:
|
elif npm_status is None:
|
||||||
lines.append("🔴 NPMplus API unreachable")
|
lines.append("🔴 NPMplus API unreachable")
|
||||||
else:
|
else:
|
||||||
lines.append(f"рџџЎ NPMplus API HTTP {npm_status}")
|
lines.append(f"🟡 NPMplus API HTTP {npm_status}")
|
||||||
|
|
||||||
g_cfg = cfg.get("gitea", {})
|
g_cfg = cfg.get("gitea", {})
|
||||||
g_base = (g_cfg.get("base_url") or "").rstrip("/")
|
g_base = (g_cfg.get("base_url") or "").rstrip("/")
|
||||||
@@ -84,22 +84,22 @@ def health(cfg, container_map: dict | None = None) -> str:
|
|||||||
g_status = status
|
g_status = status
|
||||||
break
|
break
|
||||||
if g_status == 200:
|
if g_status == 200:
|
||||||
lines.append("рџџў Gitea API OK")
|
lines.append("🟢 Gitea API OK")
|
||||||
elif g_status is None:
|
elif g_status is None:
|
||||||
lines.append("🔴 Gitea API unreachable")
|
lines.append("🔴 Gitea API unreachable")
|
||||||
else:
|
else:
|
||||||
lines.append(f"рџџЎ Gitea API HTTP {g_status}")
|
lines.append(f"🟡 Gitea API HTTP {g_status}")
|
||||||
|
|
||||||
usage, mount = worst_disk_usage()
|
usage, mount = worst_disk_usage()
|
||||||
if usage is None:
|
if usage is None:
|
||||||
lines.append("вљ пёЏ Disk n/a")
|
lines.append("⚠️ Disk n/a")
|
||||||
elif usage > disk_warn:
|
elif usage > disk_warn:
|
||||||
lines.append(f"рџџЎ Disk {usage}% ({mount})")
|
lines.append(f"🟡 Disk {usage}% ({mount})")
|
||||||
else:
|
else:
|
||||||
lines.append(f"рџџў Disk {usage}% ({mount})")
|
lines.append(f"🟢 Disk {usage}% ({mount})")
|
||||||
|
|
||||||
load = psutil.getloadavg()[0]
|
load = psutil.getloadavg()[0]
|
||||||
lines.append(f"{'рџџў' if load < load_warn else 'рџџЎ'} Load {load}")
|
lines.append(f"{'🟢' if load < load_warn else '🟡'} Load {load}")
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def _cmd(cmd: str) -> str:
|
def _cmd(cmd: str) -> str:
|
||||||
@@ -82,6 +83,62 @@ def list_disks() -> list[str]:
|
|||||||
return disks
|
return disks
|
||||||
|
|
||||||
|
|
||||||
|
def list_md_arrays() -> list[str]:
|
||||||
|
# Prefer /proc/mdstat: it reliably lists active md arrays
|
||||||
|
# even when lsblk tree/filters differ across distros.
|
||||||
|
out = _cmd("cat /proc/mdstat")
|
||||||
|
arrays: set[str] = set()
|
||||||
|
for line in out.splitlines():
|
||||||
|
m = re.match(r"^\s*(md\d+)\s*:", line)
|
||||||
|
if m:
|
||||||
|
arrays.add(f"/dev/{m.group(1)}")
|
||||||
|
|
||||||
|
if arrays:
|
||||||
|
return sorted(arrays)
|
||||||
|
|
||||||
|
# Fallback for environments where mdstat parsing is unavailable.
|
||||||
|
out = _cmd("ls -1 /dev/md* 2>/dev/null")
|
||||||
|
for line in out.splitlines():
|
||||||
|
dev = line.strip()
|
||||||
|
if dev and re.match(r"^/dev/md\d+$", dev):
|
||||||
|
arrays.add(dev)
|
||||||
|
return sorted(arrays)
|
||||||
|
|
||||||
|
|
||||||
|
def md_array_status(dev: str) -> str:
|
||||||
|
out = _cmd("cat /proc/mdstat")
|
||||||
|
if not out or "ERROR:" in out:
|
||||||
|
return "⚠️ n/a"
|
||||||
|
|
||||||
|
name = dev.rsplit("/", 1)[-1]
|
||||||
|
lines = out.splitlines()
|
||||||
|
header = None
|
||||||
|
idx = -1
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
s = line.strip()
|
||||||
|
if s.startswith(f"{name} :"):
|
||||||
|
header = s
|
||||||
|
idx = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if not header:
|
||||||
|
return "⚠️ not found in /proc/mdstat"
|
||||||
|
|
||||||
|
if "inactive" in header:
|
||||||
|
return "🔴 inactive"
|
||||||
|
|
||||||
|
# Typical mdstat health marker: [UU] for healthy mirrors/raid members.
|
||||||
|
block = [header]
|
||||||
|
for line in lines[idx + 1:]:
|
||||||
|
if not line.strip():
|
||||||
|
break
|
||||||
|
block.append(line.strip())
|
||||||
|
block_text = " ".join(block)
|
||||||
|
if "[U_" in block_text or "[_U" in block_text:
|
||||||
|
return "🟡 degraded"
|
||||||
|
return "🟢 active"
|
||||||
|
|
||||||
|
|
||||||
def smart_health(dev: str) -> str:
|
def smart_health(dev: str) -> str:
|
||||||
out = _cmd(f"smartctl -H {dev}")
|
out = _cmd(f"smartctl -H {dev}")
|
||||||
|
|
||||||
@@ -138,8 +195,9 @@ def smart_last_test(dev: str) -> str:
|
|||||||
|
|
||||||
def disks() -> str:
|
def disks() -> str:
|
||||||
disks = list_disks()
|
disks = list_disks()
|
||||||
|
md_arrays = list_md_arrays()
|
||||||
|
|
||||||
if not disks:
|
if not disks and not md_arrays:
|
||||||
return "💽 Disks\n\n❌ No disks found"
|
return "💽 Disks\n\n❌ No disks found"
|
||||||
|
|
||||||
lines = ["💽 Disks (SMART)\n"]
|
lines = ["💽 Disks (SMART)\n"]
|
||||||
@@ -158,6 +216,12 @@ def disks() -> str:
|
|||||||
|
|
||||||
lines.append(f"{icon} {d} — {health}, 🌡 {temp}")
|
lines.append(f"{icon} {d} — {health}, 🌡 {temp}")
|
||||||
|
|
||||||
|
if md_arrays:
|
||||||
|
lines.append("")
|
||||||
|
lines.append("🧱 RAID (md)")
|
||||||
|
for md in md_arrays:
|
||||||
|
lines.append(f"{md} — {md_array_status(md)}")
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
20
tests/test_config_check.py
Normal file
20
tests/test_config_check.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from services.config_check import validate_cfg
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigCheckTests(unittest.TestCase):
|
||||||
|
def test_admin_ids_without_admin_id_is_valid(self):
|
||||||
|
cfg = {
|
||||||
|
"telegram": {
|
||||||
|
"token": "x",
|
||||||
|
"admin_ids": [1, 2],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
errors, warnings = validate_cfg(cfg)
|
||||||
|
self.assertEqual(errors, [])
|
||||||
|
self.assertIsInstance(warnings, list)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
21
tests/test_disk_report.py
Normal file
21
tests/test_disk_report.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import unittest
|
||||||
|
import types
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Avoid runtime import of real app/aiogram in services.runner.
|
||||||
|
sys.modules.setdefault("app", types.SimpleNamespace(RESTIC_ENV={}))
|
||||||
|
|
||||||
|
from services.disk_report import _top_dirs_cmd
|
||||||
|
|
||||||
|
|
||||||
|
class DiskReportTests(unittest.TestCase):
|
||||||
|
def test_top_dirs_cmd_uses_exec_args_without_shell(self):
|
||||||
|
cmd = _top_dirs_cmd("/tmp/path with spaces", 5)
|
||||||
|
self.assertEqual(cmd[:4], ["du", "-x", "-h", "-d"])
|
||||||
|
self.assertNotIn("bash", cmd)
|
||||||
|
self.assertNotIn("-lc", cmd)
|
||||||
|
self.assertEqual(cmd[-1], "/tmp/path with spaces")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
59
tests/test_queue.py
Normal file
59
tests/test_queue.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
import asyncio
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from services import runtime_state
|
||||||
|
from services import queue as queue_service
|
||||||
|
|
||||||
|
|
||||||
|
class QueueTests(unittest.IsolatedAsyncioTestCase):
|
||||||
|
async def asyncSetUp(self):
|
||||||
|
self.tmp = tempfile.TemporaryDirectory()
|
||||||
|
runtime_state.configure(f"{self.tmp.name}/runtime.json")
|
||||||
|
|
||||||
|
queue_service._pending.clear() # type: ignore[attr-defined]
|
||||||
|
queue_service._history.clear() # type: ignore[attr-defined]
|
||||||
|
queue_service._stats = { # type: ignore[attr-defined]
|
||||||
|
"processed": 0,
|
||||||
|
"avg_wait_sec": 0.0,
|
||||||
|
"avg_runtime_sec": 0.0,
|
||||||
|
"last_label": "",
|
||||||
|
"last_finished_at": 0.0,
|
||||||
|
}
|
||||||
|
queue_service._cfg = {"incidents": {"enabled": True}} # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
async def asyncTearDown(self):
|
||||||
|
self.tmp.cleanup()
|
||||||
|
|
||||||
|
async def test_worker_logs_failed_job_to_incidents(self):
|
||||||
|
logged = []
|
||||||
|
|
||||||
|
def fake_log_incident(cfg, text, category=None):
|
||||||
|
logged.append((text, category))
|
||||||
|
|
||||||
|
orig = queue_service.log_incident
|
||||||
|
queue_service.log_incident = fake_log_incident
|
||||||
|
|
||||||
|
async def boom():
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
|
||||||
|
worker_task = asyncio.create_task(queue_service.worker())
|
||||||
|
try:
|
||||||
|
await queue_service.enqueue("broken-job", boom)
|
||||||
|
await asyncio.wait_for(queue_service._queue.join(), timeout=2.0) # type: ignore[attr-defined]
|
||||||
|
finally:
|
||||||
|
worker_task.cancel()
|
||||||
|
with contextlib.suppress(asyncio.CancelledError):
|
||||||
|
await worker_task
|
||||||
|
queue_service.log_incident = orig
|
||||||
|
|
||||||
|
self.assertEqual(queue_service._stats.get("processed"), 1) # type: ignore[attr-defined]
|
||||||
|
self.assertTrue(any("queue_job_failed label=broken-job" in t for t, _c in logged))
|
||||||
|
self.assertTrue(any(c == "queue" for _t, c in logged))
|
||||||
|
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
28
tests/test_runtime_state.py
Normal file
28
tests/test_runtime_state.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from services import runtime_state
|
||||||
|
|
||||||
|
|
||||||
|
class RuntimeStateTests(unittest.TestCase):
|
||||||
|
def test_set_and_get_persist_between_loads(self):
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
path = Path(tmp) / "runtime.json"
|
||||||
|
runtime_state.configure(str(path))
|
||||||
|
|
||||||
|
runtime_state.set_state("foo", {"bar": 1})
|
||||||
|
self.assertEqual(runtime_state.get("foo"), {"bar": 1})
|
||||||
|
|
||||||
|
# Force a fresh in-memory state and load from disk again.
|
||||||
|
runtime_state._STATE = {} # type: ignore[attr-defined]
|
||||||
|
runtime_state._LOADED = False # type: ignore[attr-defined]
|
||||||
|
self.assertEqual(runtime_state.get("foo"), {"bar": 1})
|
||||||
|
|
||||||
|
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
self.assertEqual(raw.get("foo"), {"bar": 1})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user