import os import httpx import json import asyncio import time from typing import Optional, Dict, List from collections import deque from config import get_config, save_config # Monitor de requisições (Rate Limiting Monitoring) REQUEST_HISTORY = deque() ALERT_THRESHOLD = 20 ALERT_WINDOW = 60 LAST_ALERT_TIME = 0 ALERT_COOLDOWN = 300 # Evita flood de alertas (5 min) async def send_telegram_alert(message: str): """Envia um alerta diretamente para o Telegram do administrador.""" token = os.getenv("TELEGRAM_BOT_TOKEN") chat_id = os.getenv("TELEGRAM_CHAT_ID") if not token or not chat_id: return url = f"https://api.telegram.org/bot{token}/sendMessage" payload = {"chat_id": chat_id, "text": f"⚠️ **ALERTA DE MONITORAMENTO**\n\n{message}", "parse_mode": "Markdown"} try: async with httpx.AsyncClient() as client: await client.post(url, json=payload, timeout=10) except Exception as e: print(f"Erro ao enviar alerta Telegram: {e}") def track_request(): """Registra uma requisição e verifica se o limite foi atingido.""" global LAST_ALERT_TIME now = time.time() REQUEST_HISTORY.append(now) # Remove registros mais antigos que a janela de 60s while REQUEST_HISTORY and REQUEST_HISTORY[0] < now - ALERT_WINDOW: REQUEST_HISTORY.popleft() # Verifica threshold if len(REQUEST_HISTORY) > ALERT_THRESHOLD: # Verifica cooldown para não floodar o Telegram if now - LAST_ALERT_TIME > ALERT_COOLDOWN: LAST_ALERT_TIME = now msg = ( f"Detectado alto volume de requisições LLM!\n" f"• Total: {len(REQUEST_HISTORY)} requisições nos últimos {ALERT_WINDOW}s\n" f"• Limite: {ALERT_THRESHOLD} requisições\n" f"• Horário: {time.strftime('%H:%M:%S', time.localtime(now))}" ) # Como track_request é sync e chamada de locais variados, # vamos disparar o alerta via background task ou garantir que seja async onde importa. return msg return None # ============================================================ # CONFIGURAÇÃO DE PROVIDERS # ============================================================ LLM_PROVIDERS = { "gemini": { "name": "Google Gemini", "type": "api", "models": ["gemini-1.5-flash", "gemini-1.5-pro", "gemini-2.0-flash-exp"], "default": "gemini-1.5-flash", # Estabilizado para 1.5-flash "endpoint": "https://generativelanguage.googleapis.com/v1beta/models" }, "openai": { "name": "OpenAI GPT", "type": "api", "models": ["gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"], "default": "gpt-4o", "endpoint": "https://api.openai.com/v1" }, "anthropic": { "name": "Anthropic Claude", "type": "api", "models": ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", "claude-3-opus-20240229"], "default": "claude-3-5-sonnet-20241022", "endpoint": "https://api.anthropic.com/v1" }, "openrouter": { "name": "OpenRouter", "type": "api", "models": ["qwen/qwen-2.5-72b-instruct", "inclusionai/ling-2.6-flash:free", "google/gemini-2.0-flash-001"], "default": "qwen/qwen-2.5-72b-instruct", "endpoint": "https://openrouter.ai/api/v1" }, "minimax": { "name": "MiniMax (Hermes)", "type": "api", "models": ["abab7-preview", "abab6.5s-chat", "minimax-text-01"], "default": "abab7-preview", "endpoint": "https://api.minimax.io/v1/text/chatcompletion_v2" }, "ollama": { "name": "Ollama (Local)", "type": "local", "endpoint": os.getenv("OLLAMA_HOST", "http://ollama:11434"), "models": None, "default": "llama3.2:1b" } } # ============================================================ # CONFIG MANAGER (OBSOLETE LOCALLY, USING config.py) # ============================================================ def get_orchestrator_config() -> dict: """Retorna config do orchestrator.""" cfg = get_config() return cfg.get("orchestrator", { "planner": {"provider": "openrouter", "model": "qwen/qwen-2.5-72b-instruct"}, "executor": {"provider": "ollama", "model": "llama3.2:1b"} }) def set_planner(provider: str = None, model: str = None) -> dict: """Define o provider e modelo do planner.""" cfg = get_config() if "orchestrator" not in cfg: cfg["orchestrator"] = {} if provider: cfg["orchestrator"]["planner"] = { "provider": provider, "model": model or LLM_PROVIDERS[provider]["default"] } save_config(cfg) return cfg["orchestrator"].get("planner", {"provider": "openrouter", "model": "qwen/qwen-2.5-72b-instruct"}) def set_executor(provider: str = None, model: str = None) -> dict: """Define o provider e modelo do executor.""" cfg = get_config() if "orchestrator" not in cfg: cfg["orchestrator"] = {} if provider: cfg["orchestrator"]["executor"] = { "provider": provider, "model": model or LLM_PROVIDERS[provider]["default"] } save_config(cfg) return cfg["orchestrator"].get("executor", {"provider": "ollama", "model": "llama3.2:1b"}) def set_api_key(provider: str, key: str): """Armazena API key de um provider.""" cfg = get_config() if "api_keys" not in cfg: cfg["api_keys"] = {} cfg["api_keys"][provider] = key save_config(cfg) def get_api_key(provider: str) -> str: """Busca API key de um provider (config, env var ou fallback).""" cfg = get_config() # 1. Tenta API Keys modernas no config api_keys = cfg.get("api_keys", {}) if api_keys.get(provider): return api_keys[provider] # 2. Tenta chaves legadas no raiz do config (ex: gemini_api_key) legacy_key = f"{provider}_api_key" if cfg.get(legacy_key): return cfg[legacy_key] # 3. Fallback para environment variable env_vars = { "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "gemini": "GEMINI_API_KEY", "openrouter": "OPENROUTER_API_KEY", "minimax": "MINIMAX_API_KEY" } # 3.1 Busca específica do provider if provider in env_vars and os.getenv(env_vars[provider]): return os.getenv(env_vars[provider]) # 3.2 Busca DINÂMICA (Chaves do Vault do Orquestrador) # Procura qualquer variável que comece com o nome do provedor (ex: openrouter_qwen) for key, value in os.environ.items(): if key.lower().startswith(f"{provider}_"): return value # 4. Fallback ÚLTIMO RECURSO (Segurança Antigravity) if provider == "gemini": return "AIzaSyAxuqqqtxWSnns_XbXV7DVePSdM4DXPaVo" return "" # ============================================================ # OLLAMA DISCOVERY # ============================================================ async def list_ollama_models() -> List[str]: """Busca modelos disponíveis no Ollama em modo async.""" try: endpoint = LLM_PROVIDERS["ollama"]["endpoint"] async with httpx.AsyncClient() as client: response = await client.get(f"{endpoint}/api/tags", timeout=5) if response.status_code == 200: models = [m["name"] for m in response.json().get("models", [])] LLM_PROVIDERS["ollama"]["models"] = models return models except Exception as e: print(f"Erro ao buscar modelos Ollama: {e}") return [] async def get_available_models(provider: str = None) -> List[Dict]: """Retorna modelos disponíveis para um provider ou todos (async).""" if provider: p = LLM_PROVIDERS.get(provider) if not p: return [] if p["type"] == "local" and provider == "ollama": models = await list_ollama_models() return [{"provider": provider, "models": models}] else: return [{"provider": provider, "models": p.get("models", [p["default"]])}] # Todos os providers result = [] for prov_id, prov in LLM_PROVIDERS.items(): if prov_id == "ollama": models = await list_ollama_models() result.append({"provider": prov_id, "name": prov["name"], "models": models}) else: result.append({"provider": prov_id, "name": prov["name"], "models": prov.get("models", [prov["default"]])}) return result # ============================================================ # ASYNC LLM CALL FUNCTIONS # ============================================================ async def call_llm(provider: str, model: str, prompt: str, system_prompt: str = None, **kwargs) -> dict: """Suporte universal async para chamadas de LLM com monitoramento de tráfego.""" # Monitoramento de Rate Limit alert_msg = track_request() if alert_msg: asyncio.create_task(send_telegram_alert(alert_msg)) if provider == "gemini": res = await _call_gemini_async(model, prompt, system_prompt) elif provider == "openai": res = await _call_openai_async(model, prompt, system_prompt) elif provider == "anthropic": res = await _call_anthropic_async(model, prompt, system_prompt) elif provider == "ollama": res = await _call_ollama_async(model, prompt, system_prompt) elif provider == "openrouter": res = await _call_openrouter_async(model, prompt, system_prompt) elif provider == "minimax": res = await _call_minimax_async(model, prompt, system_prompt) else: return {"content": f"Erro: Provider '{provider}' não suportado.", "usage": {}} # Garante que o retorno seja um dicionário (compatibilidade com shims antigos se houver) if isinstance(res, str): return {"content": res, "usage": {}, "model": model} return res async def _call_openrouter_async(model: str, prompt: str, system_prompt: str = None) -> dict: """Chama API do OpenRouter (OpenAI Compatible) via httpx (async).""" api_key = get_api_key("openrouter") url = "https://openrouter.ai/api/v1/chat/completions" messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) payload = { "model": model, "messages": messages, "temperature": 0.7 } headers = { "Authorization": f"Bearer {api_key}", "HTTP-Referer": "https://botvps.cloud", # Requisito OpenRouter "X-Title": "BotVPS Factory", "Content-Type": "application/json" } try: async with httpx.AsyncClient() as client: res = await client.post(url, json=payload, headers=headers, timeout=120) if res.status_code == 200: data = res.json() if "choices" in data and len(data["choices"]) > 0: return { "content": data["choices"][0]["message"]["content"], "usage": data.get("usage", {}), "model": data.get("model", model) } return {"content": f"Erro OpenRouter (Resposta sem 'choices'): {json.dumps(data)}", "usage": {}} # Se não for 200, tenta extrair erro detalhado try: error_data = res.json() return {"content": f"Erro OpenRouter {res.status_code}: {json.dumps(error_data)}", "usage": {}} except: return {"content": f"Erro OpenRouter: {res.status_code} - {res.text}", "usage": {}} except Exception as e: return {"content": f"Erro OpenRouter: {str(e)}", "usage": {}} async def _call_gemini_async(model: str, prompt: str, system_prompt: str = None) -> str: """Chama API do Google Gemini via httpx (async).""" api_key = get_api_key("gemini") url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}" contents = [{"parts": [{"text": prompt}]}] if system_prompt: contents.insert(0, {"role": "model", "parts": [{"text": system_prompt}]}) payload = { "contents": contents, "safetySettings": [ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"} ], "generationConfig": { "temperature": 0.7, "maxOutputTokens": 4096 } } try: async with httpx.AsyncClient() as client: res = await client.post(url, json=payload, timeout=60) if res.status_code == 200: data = res.json() try: candidate = data["candidates"][0] return candidate["content"]["parts"][0]["text"] except KeyError: # Model might have blocked it due to safety or empty response finish_reason = data.get("candidates", [{}])[0].get("finishReason", "Unknown") return f"Erro Gemini (Parsing): Resposta sem formato esperado. Motivo/FinishReason: {finish_reason}. Raw: {json.dumps(data)}" return f"Erro Gemini: {res.status_code} - {res.text}" except Exception as e: return f"Erro Gemini: {str(e)}" async def _call_minimax_async(model: str, prompt: str, system_prompt: str = None) -> dict: """Chama API do MiniMax (V2) via httpx (async).""" api_key = get_api_key("minimax") url = "https://api.minimax.io/v1/text/chatcompletion_v2" messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) payload = { "model": model, "messages": messages, "tools": [], "tool_choice": "none", "stream": False } headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } try: async with httpx.AsyncClient() as client: res = await client.post(url, json=payload, headers=headers, timeout=120) if res.status_code == 200: data = res.json() # MiniMax V2 structure if "choices" in data and len(data["choices"]) > 0: return { "content": data["choices"][0]["message"]["content"], "usage": data.get("usage", {}), "model": data.get("base_resp", {}).get("model") or model } return {"content": f"Erro MiniMax (Resposta inesperada): {json.dumps(data)}", "usage": {}} return {"content": f"Erro MiniMax {res.status_code}: {res.text}", "usage": {}} except Exception as e: return {"content": f"Erro MiniMax: {str(e)}", "usage": {}} async def _call_openai_async(model: str, prompt: str, system_prompt: str = None) -> str: """Chama API da OpenAI via httpx (async).""" api_key = get_api_key("openai") url = f"https://api.openai.com/v1/chat/completions" messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) payload = { "model": model, "messages": messages, "temperature": 0.7, "max_completion_tokens": 4096 } headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} try: async with httpx.AsyncClient() as client: res = await client.post(url, json=payload, headers=headers, timeout=60) if res.status_code == 200: return res.json()["choices"][0]["message"]["content"] return f"Erro OpenAI: {res.status_code} - {res.text}" except Exception as e: return f"Erro OpenAI: {str(e)}" async def _call_anthropic_async(model: str, prompt: str, system_prompt: str = None) -> str: """Chama API da Anthropic via httpx (async).""" api_key = get_api_key("anthropic") url = "https://api.anthropic.com/v1/messages" headers = { "x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json" } payload = { "model": model, "max_tokens": 4096, "messages": [{"role": "user", "content": prompt}], "temperature": 0.7 } if system_prompt: payload["system"] = system_prompt try: async with httpx.AsyncClient() as client: res = await client.post(url, json=payload, headers=headers, timeout=60) if res.status_code == 200: return res.json()["content"][0]["text"] return f"Erro Anthropic: {res.status_code} - {res.text}" except Exception as e: return f"Erro Anthropic: {str(e)}" async def _call_ollama_async(model: str, prompt: str, system_prompt: str = None) -> str: """Chama Ollama local via httpx (async).""" endpoint = LLM_PROVIDERS["ollama"]["endpoint"] payload = { "model": model, "prompt": prompt, "stream": False, "options": { "num_ctx": 4096, "temperature": 0.7 } } if system_prompt: payload["system"] = system_prompt try: async with httpx.AsyncClient() as client: res = await client.post(f"{endpoint}/api/generate", json=payload, timeout=180) if res.status_code == 200: return res.json().get("response", "") return f"Erro Ollama: {res.status_code} - {res.text}" except Exception as e: return f"Erro Ollama: {str(e)}" def check_ollama_connection() -> dict: """Versão síncrona mantida para compatibilidade rápida de status.""" import requests endpoint = LLM_PROVIDERS["ollama"]["endpoint"] try: res = requests.get(f"{endpoint}/api/tags", timeout=10) if res.status_code == 200: models = [m.get("name") for m in res.json().get("models", [])] return {"status": "ok", "models": models, "endpoint": endpoint} return {"status": "error", "code": res.status_code, "endpoint": endpoint} except Exception as e: return {"status": "error", "message": str(e), "endpoint": endpoint} # ============================================================ # PLANNER & EXECUTOR WRAPPERS (PROMETE SER ASYNC) # ============================================================ def get_planner_llm() -> tuple: cfg = get_orchestrator_config() planner = cfg.get("planner", {"provider": "openrouter", "model": "qwen/qwen-2.5-72b-instruct"}) return planner["provider"], planner["model"] def get_executor_llm() -> tuple: cfg = get_orchestrator_config() executor = cfg.get("executor", {"provider": "ollama", "model": "llama3.2:1b"}) return executor["provider"], executor["model"] async def call_planner_async(prompt: str, system_prompt: str = None) -> str: provider, model = get_planner_llm() try: response_dict = await call_llm(provider, model, prompt, system_prompt) content = response_dict["content"] # Se a resposta indicar um erro de API, disparamos o fallback if content.startswith("Erro OpenRouter"): raise Exception(content) return content except Exception as e: # Lógica de FALLBACK: Se o Qwen falhar, tenta o Ling-2.6-flash if provider == "openrouter" and model == "qwen/qwen-2.5-72b-instruct": backup_model = "inclusionai/ling-2.6-flash:free" print(f"⚠️ [FALLBACK] Falha no Qwen ({str(e)}). Tentando {backup_model}...") res = await call_llm("openrouter", backup_model, prompt, system_prompt) return res["content"] return f"Erro Crítico no Planner: {str(e)}" async def call_executor_async(prompt: str, system_prompt: str = None) -> str: provider, model = get_executor_llm() res = await call_llm(provider, model, prompt, system_prompt) return res["content"] # --- BACKWARD COMPATIBILITY SHIMS (SYNC WRAPPERS) --- def call_planner(prompt: str, system_prompt: str = None) -> str: return asyncio.run(call_planner_async(prompt, system_prompt)) def call_executor(prompt: str, system_prompt: str = None) -> str: return asyncio.run(call_executor_async(prompt, system_prompt))