import os import psutil import time import json import httpx import subprocess from dotenv import load_dotenv load_dotenv() TOKEN = os.getenv("TELEGRAM_BOT_TOKEN") CHAT_ID = os.getenv("TELEGRAM_CHAT_ID") # Configurações do Watchdog CPU_THRESHOLD = 90.0 CPU_STREAK_LIMIT = 6 # 6 * 10s = 60s CHECK_INTERVAL = 10 # segundos class Watchdog: def __init__(self): self.cpu_streak = 0 self.last_alert_time = 0 self.alert_cooldown = 300 # 5 minutos entre alertas do mesmo tipo async def send_telegram_message(self, message: str): if not TOKEN or not CHAT_ID: print("[WATCHDOG] Erro: TOKEN ou CHAT_ID não configurados.") return url = f"https://api.telegram.org/bot{TOKEN}/sendMessage" payload = { "chat_id": CHAT_ID, "text": f"🚨 **[WATCHDOG VPS]**\n\n{message}", "parse_mode": "Markdown" } try: async with httpx.AsyncClient() as client: await client.post(url, json=payload) except Exception as e: print(f"[WATCHDOG] Erro ao enviar Telegram: {e}") def get_pm2_status(self): try: result = subprocess.run(["pm2", "jlist"], capture_output=True, text=True) if result.returncode == 0: data = json.loads(result.stdout) issues = [] for proc in data: if proc['pm2_env']['status'] != 'online': issues.append(f"🔴 App '{proc['name']}' está {proc['pm2_env']['status']}!") return issues except Exception as e: print(f"[WATCHDOG] Erro PM2: {e}") return [] async def run(self): print("[WATCHDOG] Iniciado. Vigilância ativa...") while True: try: # 1. Monitoramento de CPU cpu_usage = psutil.cpu_percent(interval=1) if cpu_usage > CPU_THRESHOLD: self.cpu_streak += 1 else: self.cpu_streak = 0 if self.cpu_streak >= CPU_STREAK_LIMIT: if time.time() - self.last_alert_time > self.alert_cooldown: await self.send_telegram_message( f"CPU em nível crítico: {cpu_usage}% por mais de 1 minuto!" ) self.last_alert_time = time.time() # 2. Monitoramento de PM2 pm2_issues = self.get_pm2_status() if pm2_issues: await self.send_telegram_message("\n".join(pm2_issues)) # 3. Monitoramento de Espaço em Disco disk = psutil.disk_usage('/') if disk.percent > 95: if time.time() - self.last_alert_time > self.alert_cooldown: await self.send_telegram_message(f"Espaço em disco crítico: {disk.percent}% ocupado!") self.last_alert_time = time.time() await asyncio.sleep(CHECK_INTERVAL) except Exception as e: print(f"[WATCHDOG] Erro no loop: {e}") await asyncio.sleep(CHECK_INTERVAL) if __name__ == "__main__": import asyncio dog = Watchdog() asyncio.run(dog.run())