Compare commits
2 Commits
23fa70f15c
...
745a5171a1
| Author | SHA1 | Date | |
|---|---|---|---|
| 745a5171a1 | |||
| e3a1321d3f |
@@ -17,6 +17,10 @@ alerts:
|
||||
interval_sec: 60
|
||||
cooldown_sec: 900
|
||||
notify_recovery: true
|
||||
smart_enabled: true
|
||||
smart_interval_sec: 3600
|
||||
smart_cooldown_sec: 21600
|
||||
smart_temp_warn: 50
|
||||
|
||||
docker:
|
||||
# If true, discover containers by name/label
|
||||
|
||||
@@ -150,3 +150,57 @@ async def logs_options(cb: CallbackQuery):
|
||||
return
|
||||
|
||||
await cb.answer("Bad request")
|
||||
|
||||
|
||||
@dp.callback_query(F.data.startswith("wdrestart:"))
|
||||
async def watchdog_restart_request(cb: CallbackQuery):
|
||||
if cb.from_user.id != ADMIN_ID:
|
||||
return
|
||||
|
||||
_, alias = cb.data.split(":", 1)
|
||||
if alias not in DOCKER_MAP:
|
||||
await cb.answer("Container not found")
|
||||
return
|
||||
|
||||
kb = InlineKeyboardMarkup(
|
||||
inline_keyboard=[[
|
||||
InlineKeyboardButton(
|
||||
text="✅ Confirm restart",
|
||||
callback_data=f"wdconfirm:{alias}"
|
||||
),
|
||||
InlineKeyboardButton(
|
||||
text="✖ Cancel",
|
||||
callback_data="wdcancel"
|
||||
),
|
||||
]]
|
||||
)
|
||||
await cb.message.answer(
|
||||
f"⚠️ Confirm restart `{alias}`?",
|
||||
reply_markup=kb,
|
||||
parse_mode="Markdown",
|
||||
)
|
||||
await cb.answer()
|
||||
|
||||
|
||||
@dp.callback_query(F.data == "wdcancel")
|
||||
async def watchdog_restart_cancel(cb: CallbackQuery):
|
||||
await cb.answer("Cancelled")
|
||||
|
||||
|
||||
@dp.callback_query(F.data.startswith("wdconfirm:"))
|
||||
async def watchdog_restart_confirm(cb: CallbackQuery):
|
||||
if cb.from_user.id != ADMIN_ID:
|
||||
return
|
||||
|
||||
_, alias = cb.data.split(":", 1)
|
||||
real = DOCKER_MAP.get(alias)
|
||||
if not real:
|
||||
await cb.answer("Container not found")
|
||||
return
|
||||
|
||||
await cb.answer("Restarting…")
|
||||
rc, out = await docker_cmd(["restart", real])
|
||||
await cb.message.answer(
|
||||
f"🔄 **{alias} restarted**\n```{out}```",
|
||||
parse_mode="Markdown",
|
||||
)
|
||||
|
||||
4
main.py
4
main.py
@@ -4,7 +4,7 @@ from datetime import datetime
|
||||
from app import bot, dp, cfg, ADMIN_ID
|
||||
from keyboards import menu_kb
|
||||
from services.docker import discover_containers, docker_watchdog
|
||||
from services.alerts import monitor_resources
|
||||
from services.alerts import monitor_resources, monitor_smart
|
||||
from services.notify import notify
|
||||
import state
|
||||
import handlers.menu
|
||||
@@ -32,6 +32,8 @@ async def main():
|
||||
asyncio.create_task(docker_watchdog(state.DOCKER_MAP, notify, bot, ADMIN_ID))
|
||||
if cfg.get("alerts", {}).get("enabled", True):
|
||||
asyncio.create_task(monitor_resources(cfg, notify, bot, ADMIN_ID))
|
||||
if cfg.get("alerts", {}).get("smart_enabled", True):
|
||||
asyncio.create_task(monitor_smart(cfg, notify, bot, ADMIN_ID))
|
||||
await notify_start()
|
||||
await dp.start_polling(bot)
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import time
|
||||
import psutil
|
||||
from system_checks import list_disks, smart_health, disk_temperature
|
||||
from services.system import worst_disk_usage
|
||||
|
||||
|
||||
@@ -52,3 +53,39 @@ async def monitor_resources(cfg, notify, bot, chat_id):
|
||||
state["load_high"] = False
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
async def monitor_smart(cfg, notify, bot, chat_id):
|
||||
alerts_cfg = cfg.get("alerts", {})
|
||||
interval = int(alerts_cfg.get("smart_interval_sec", 3600))
|
||||
cooldown = int(alerts_cfg.get("smart_cooldown_sec", 6 * 3600))
|
||||
temp_warn = int(alerts_cfg.get("smart_temp_warn", 50))
|
||||
|
||||
last_sent = {}
|
||||
|
||||
while True:
|
||||
for dev in list_disks():
|
||||
health = smart_health(dev)
|
||||
temp = disk_temperature(dev)
|
||||
|
||||
key = f"{dev}:{health}:{temp}"
|
||||
now = time.time()
|
||||
if last_sent.get(key, 0) + cooldown > now:
|
||||
continue
|
||||
|
||||
if "FAILED" in health:
|
||||
await notify(bot, chat_id, f"🔴 SMART FAIL {dev}: {health}, 🌡 {temp}")
|
||||
last_sent[key] = now
|
||||
continue
|
||||
|
||||
if temp != "n/a":
|
||||
try:
|
||||
t = int(temp.replace("°C", ""))
|
||||
except ValueError:
|
||||
t = None
|
||||
if t is not None and t >= temp_warn:
|
||||
await notify(bot, chat_id, f"🟡 SMART HOT {dev}: {health}, 🌡 {temp}")
|
||||
last_sent[key] = now
|
||||
continue
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict
|
||||
from aiogram.types import InlineKeyboardMarkup, InlineKeyboardButton
|
||||
from services.runner import run_cmd
|
||||
|
||||
|
||||
@@ -109,6 +110,21 @@ async def docker_watchdog(container_map, notify, bot, chat_id):
|
||||
state = "error"
|
||||
state = state.strip()
|
||||
if last.get(alias) != state:
|
||||
if state != "running":
|
||||
kb = InlineKeyboardMarkup(
|
||||
inline_keyboard=[[
|
||||
InlineKeyboardButton(
|
||||
text="🔄 Restart",
|
||||
callback_data=f"wdrestart:{alias}"
|
||||
)
|
||||
]]
|
||||
)
|
||||
await bot.send_message(
|
||||
chat_id,
|
||||
f"🐳 {alias}: {state}",
|
||||
reply_markup=kb,
|
||||
)
|
||||
else:
|
||||
await notify(bot, chat_id, f"🐳 {alias}: {state}")
|
||||
last[alias] = state
|
||||
await asyncio.sleep(120)
|
||||
|
||||
Reference in New Issue
Block a user