Files
BotVPS/watchdog.py

97 lines
3.3 KiB
Python

import os
import psutil
import time
import json
import httpx
import subprocess
from dotenv import load_dotenv
load_dotenv()
TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
CHAT_ID = os.getenv("TELEGRAM_CHAT_ID")
# Configurações do Watchdog
CPU_THRESHOLD = 90.0
CPU_STREAK_LIMIT = 6 # 6 * 10s = 60s
CHECK_INTERVAL = 10 # segundos
class Watchdog:
def __init__(self):
self.cpu_streak = 0
self.last_alert_time = 0
self.alert_cooldown = 300 # 5 minutos entre alertas do mesmo tipo
async def send_telegram_message(self, message: str):
if not TOKEN or not CHAT_ID:
print("[WATCHDOG] Erro: TOKEN ou CHAT_ID não configurados.")
return
url = f"https://api.telegram.org/bot{TOKEN}/sendMessage"
payload = {
"chat_id": CHAT_ID,
"text": f"🚨 **[WATCHDOG VPS]**\n\n{message}",
"parse_mode": "Markdown"
}
try:
async with httpx.AsyncClient() as client:
await client.post(url, json=payload)
except Exception as e:
print(f"[WATCHDOG] Erro ao enviar Telegram: {e}")
def get_pm2_status(self):
try:
result = subprocess.run(["pm2", "jlist"], capture_output=True, text=True)
if result.returncode == 0:
data = json.loads(result.stdout)
issues = []
for proc in data:
if proc['pm2_env']['status'] != 'online':
issues.append(f"🔴 App '{proc['name']}' está {proc['pm2_env']['status']}!")
return issues
except Exception as e:
print(f"[WATCHDOG] Erro PM2: {e}")
return []
async def run(self):
print("[WATCHDOG] Iniciado. Vigilância ativa...")
while True:
try:
# 1. Monitoramento de CPU
cpu_usage = psutil.cpu_percent(interval=1)
if cpu_usage > CPU_THRESHOLD:
self.cpu_streak += 1
else:
self.cpu_streak = 0
if self.cpu_streak >= CPU_STREAK_LIMIT:
if time.time() - self.last_alert_time > self.alert_cooldown:
await self.send_telegram_message(
f"CPU em nível crítico: {cpu_usage}% por mais de 1 minuto!"
)
self.last_alert_time = time.time()
# 2. Monitoramento de PM2
pm2_issues = self.get_pm2_status()
if pm2_issues:
await self.send_telegram_message("\n".join(pm2_issues))
# 3. Monitoramento de Espaço em Disco
disk = psutil.disk_usage('/')
if disk.percent > 95:
if time.time() - self.last_alert_time > self.alert_cooldown:
await self.send_telegram_message(f"Espaço em disco crítico: {disk.percent}% ocupado!")
self.last_alert_time = time.time()
await asyncio.sleep(CHECK_INTERVAL)
except Exception as e:
print(f"[WATCHDOG] Erro no loop: {e}")
await asyncio.sleep(CHECK_INTERVAL)
if __name__ == "__main__":
import asyncio
dog = Watchdog()
asyncio.run(dog.run())