Files
BotVPS/llm_providers.py

525 lines
20 KiB
Python

import os
import httpx
import json
import asyncio
import time
from typing import Optional, Dict, List
from collections import deque
from config import get_config, save_config
# Monitor de requisições (Rate Limiting Monitoring)
REQUEST_HISTORY = deque()
ALERT_THRESHOLD = 20
ALERT_WINDOW = 60
LAST_ALERT_TIME = 0
ALERT_COOLDOWN = 300 # Evita flood de alertas (5 min)
async def send_telegram_alert(message: str):
"""Envia um alerta diretamente para o Telegram do administrador."""
token = os.getenv("TELEGRAM_BOT_TOKEN")
chat_id = os.getenv("TELEGRAM_CHAT_ID")
if not token or not chat_id:
return
url = f"https://api.telegram.org/bot{token}/sendMessage"
payload = {"chat_id": chat_id, "text": f"⚠️ **ALERTA DE MONITORAMENTO**\n\n{message}", "parse_mode": "Markdown"}
try:
async with httpx.AsyncClient() as client:
await client.post(url, json=payload, timeout=10)
except Exception as e:
print(f"Erro ao enviar alerta Telegram: {e}")
def track_request():
"""Registra uma requisição e verifica se o limite foi atingido."""
global LAST_ALERT_TIME
now = time.time()
REQUEST_HISTORY.append(now)
# Remove registros mais antigos que a janela de 60s
while REQUEST_HISTORY and REQUEST_HISTORY[0] < now - ALERT_WINDOW:
REQUEST_HISTORY.popleft()
# Verifica threshold
if len(REQUEST_HISTORY) > ALERT_THRESHOLD:
# Verifica cooldown para não floodar o Telegram
if now - LAST_ALERT_TIME > ALERT_COOLDOWN:
LAST_ALERT_TIME = now
msg = (
f"Detectado alto volume de requisições LLM!\n"
f"• Total: {len(REQUEST_HISTORY)} requisições nos últimos {ALERT_WINDOW}s\n"
f"• Limite: {ALERT_THRESHOLD} requisições\n"
f"• Horário: {time.strftime('%H:%M:%S', time.localtime(now))}"
)
# Como track_request é sync e chamada de locais variados,
# vamos disparar o alerta via background task ou garantir que seja async onde importa.
return msg
return None
# ============================================================
# CONFIGURAÇÃO DE PROVIDERS
# ============================================================
LLM_PROVIDERS = {
"gemini": {
"name": "Google Gemini",
"type": "api",
"models": ["gemini-1.5-flash", "gemini-1.5-pro", "gemini-2.0-flash-exp"],
"default": "gemini-1.5-flash", # Estabilizado para 1.5-flash
"endpoint": "https://generativelanguage.googleapis.com/v1beta/models"
},
"openai": {
"name": "OpenAI GPT",
"type": "api",
"models": ["gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"],
"default": "gpt-4o",
"endpoint": "https://api.openai.com/v1"
},
"anthropic": {
"name": "Anthropic Claude",
"type": "api",
"models": ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", "claude-3-opus-20240229"],
"default": "claude-3-5-sonnet-20241022",
"endpoint": "https://api.anthropic.com/v1"
},
"openrouter": {
"name": "OpenRouter",
"type": "api",
"models": ["qwen/qwen-2.5-72b-instruct", "inclusionai/ling-2.6-flash:free", "google/gemini-2.0-flash-001"],
"default": "qwen/qwen-2.5-72b-instruct",
"endpoint": "https://openrouter.ai/api/v1"
},
"minimax": {
"name": "MiniMax (Hermes)",
"type": "api",
"models": ["abab7-preview", "abab6.5s-chat", "minimax-text-01"],
"default": "abab7-preview",
"endpoint": "https://api.minimax.io/v1/text/chatcompletion_v2"
},
"ollama": {
"name": "Ollama (Local)",
"type": "local",
"endpoint": os.getenv("OLLAMA_HOST", "http://ollama:11434"),
"models": None,
"default": "llama3.2:1b"
}
}
# ============================================================
# CONFIG MANAGER (OBSOLETE LOCALLY, USING config.py)
# ============================================================
def get_orchestrator_config() -> dict:
"""Retorna config do orchestrator."""
cfg = get_config()
return cfg.get("orchestrator", {
"planner": {"provider": "openrouter", "model": "qwen/qwen-2.5-72b-instruct"},
"executor": {"provider": "ollama", "model": "llama3.2:1b"}
})
def set_planner(provider: str = None, model: str = None) -> dict:
"""Define o provider e modelo do planner."""
cfg = get_config()
if "orchestrator" not in cfg:
cfg["orchestrator"] = {}
if provider:
cfg["orchestrator"]["planner"] = {
"provider": provider,
"model": model or LLM_PROVIDERS[provider]["default"]
}
save_config(cfg)
return cfg["orchestrator"].get("planner", {"provider": "openrouter", "model": "qwen/qwen-2.5-72b-instruct"})
def set_executor(provider: str = None, model: str = None) -> dict:
"""Define o provider e modelo do executor."""
cfg = get_config()
if "orchestrator" not in cfg:
cfg["orchestrator"] = {}
if provider:
cfg["orchestrator"]["executor"] = {
"provider": provider,
"model": model or LLM_PROVIDERS[provider]["default"]
}
save_config(cfg)
return cfg["orchestrator"].get("executor", {"provider": "ollama", "model": "llama3.2:1b"})
def set_api_key(provider: str, key: str):
"""Armazena API key de um provider."""
cfg = get_config()
if "api_keys" not in cfg:
cfg["api_keys"] = {}
cfg["api_keys"][provider] = key
save_config(cfg)
def get_api_key(provider: str) -> str:
"""Busca API key de um provider (config, env var ou fallback)."""
cfg = get_config()
# 1. Tenta API Keys modernas no config
api_keys = cfg.get("api_keys", {})
if api_keys.get(provider):
return api_keys[provider]
# 2. Tenta chaves legadas no raiz do config (ex: gemini_api_key)
legacy_key = f"{provider}_api_key"
if cfg.get(legacy_key):
return cfg[legacy_key]
# 3. Fallback para environment variable
env_vars = {
"openai": "OPENAI_API_KEY",
"anthropic": "ANTHROPIC_API_KEY",
"gemini": "GEMINI_API_KEY",
"openrouter": "OPENROUTER_API_KEY",
"minimax": "MINIMAX_API_KEY"
}
# 3.1 Busca específica do provider
if provider in env_vars and os.getenv(env_vars[provider]):
return os.getenv(env_vars[provider])
# 3.2 Busca DINÂMICA (Chaves do Vault do Orquestrador)
# Procura qualquer variável que comece com o nome do provedor (ex: openrouter_qwen)
for key, value in os.environ.items():
if key.lower().startswith(f"{provider}_"):
return value
# 4. Fallback ÚLTIMO RECURSO (Segurança Antigravity)
if provider == "gemini":
return "AIzaSyAxuqqqtxWSnns_XbXV7DVePSdM4DXPaVo"
return ""
# ============================================================
# OLLAMA DISCOVERY
# ============================================================
async def list_ollama_models() -> List[str]:
"""Busca modelos disponíveis no Ollama em modo async."""
try:
endpoint = LLM_PROVIDERS["ollama"]["endpoint"]
async with httpx.AsyncClient() as client:
response = await client.get(f"{endpoint}/api/tags", timeout=5)
if response.status_code == 200:
models = [m["name"] for m in response.json().get("models", [])]
LLM_PROVIDERS["ollama"]["models"] = models
return models
except Exception as e:
print(f"Erro ao buscar modelos Ollama: {e}")
return []
async def get_available_models(provider: str = None) -> List[Dict]:
"""Retorna modelos disponíveis para um provider ou todos (async)."""
if provider:
p = LLM_PROVIDERS.get(provider)
if not p:
return []
if p["type"] == "local" and provider == "ollama":
models = await list_ollama_models()
return [{"provider": provider, "models": models}]
else:
return [{"provider": provider, "models": p.get("models", [p["default"]])}]
# Todos os providers
result = []
for prov_id, prov in LLM_PROVIDERS.items():
if prov_id == "ollama":
models = await list_ollama_models()
result.append({"provider": prov_id, "name": prov["name"], "models": models})
else:
result.append({"provider": prov_id, "name": prov["name"], "models": prov.get("models", [prov["default"]])})
return result
# ============================================================
# ASYNC LLM CALL FUNCTIONS
# ============================================================
async def call_llm(provider: str, model: str, prompt: str, system_prompt: str = None, **kwargs) -> dict:
"""Suporte universal async para chamadas de LLM com monitoramento de tráfego."""
# Monitoramento de Rate Limit
alert_msg = track_request()
if alert_msg:
asyncio.create_task(send_telegram_alert(alert_msg))
if provider == "gemini":
res = await _call_gemini_async(model, prompt, system_prompt)
elif provider == "openai":
res = await _call_openai_async(model, prompt, system_prompt)
elif provider == "anthropic":
res = await _call_anthropic_async(model, prompt, system_prompt)
elif provider == "ollama":
res = await _call_ollama_async(model, prompt, system_prompt)
elif provider == "openrouter":
res = await _call_openrouter_async(model, prompt, system_prompt)
elif provider == "minimax":
res = await _call_minimax_async(model, prompt, system_prompt)
else:
return {"content": f"Erro: Provider '{provider}' não suportado.", "usage": {}}
# Garante que o retorno seja um dicionário (compatibilidade com shims antigos se houver)
if isinstance(res, str):
return {"content": res, "usage": {}, "model": model}
return res
async def _call_openrouter_async(model: str, prompt: str, system_prompt: str = None) -> dict:
"""Chama API do OpenRouter (OpenAI Compatible) via httpx (async)."""
api_key = get_api_key("openrouter")
url = "https://openrouter.ai/api/v1/chat/completions"
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model,
"messages": messages,
"temperature": 0.7
}
headers = {
"Authorization": f"Bearer {api_key}",
"HTTP-Referer": "https://botvps.cloud", # Requisito OpenRouter
"X-Title": "BotVPS Factory",
"Content-Type": "application/json"
}
try:
async with httpx.AsyncClient() as client:
res = await client.post(url, json=payload, headers=headers, timeout=120)
if res.status_code == 200:
data = res.json()
if "choices" in data and len(data["choices"]) > 0:
return {
"content": data["choices"][0]["message"]["content"],
"usage": data.get("usage", {}),
"model": data.get("model", model)
}
return {"content": f"Erro OpenRouter (Resposta sem 'choices'): {json.dumps(data)}", "usage": {}}
# Se não for 200, tenta extrair erro detalhado
try:
error_data = res.json()
return {"content": f"Erro OpenRouter {res.status_code}: {json.dumps(error_data)}", "usage": {}}
except:
return {"content": f"Erro OpenRouter: {res.status_code} - {res.text}", "usage": {}}
except Exception as e:
return {"content": f"Erro OpenRouter: {str(e)}", "usage": {}}
async def _call_gemini_async(model: str, prompt: str, system_prompt: str = None) -> str:
"""Chama API do Google Gemini via httpx (async)."""
api_key = get_api_key("gemini")
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
contents = [{"parts": [{"text": prompt}]}]
if system_prompt:
contents.insert(0, {"role": "model", "parts": [{"text": system_prompt}]})
payload = {
"contents": contents,
"safetySettings": [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
],
"generationConfig": {
"temperature": 0.7,
"maxOutputTokens": 4096
}
}
try:
async with httpx.AsyncClient() as client:
res = await client.post(url, json=payload, timeout=60)
if res.status_code == 200:
data = res.json()
try:
candidate = data["candidates"][0]
return candidate["content"]["parts"][0]["text"]
except KeyError:
# Model might have blocked it due to safety or empty response
finish_reason = data.get("candidates", [{}])[0].get("finishReason", "Unknown")
return f"Erro Gemini (Parsing): Resposta sem formato esperado. Motivo/FinishReason: {finish_reason}. Raw: {json.dumps(data)}"
return f"Erro Gemini: {res.status_code} - {res.text}"
except Exception as e:
return f"Erro Gemini: {str(e)}"
async def _call_minimax_async(model: str, prompt: str, system_prompt: str = None) -> dict:
"""Chama API do MiniMax (V2) via httpx (async)."""
api_key = get_api_key("minimax")
url = "https://api.minimax.io/v1/text/chatcompletion_v2"
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model,
"messages": messages,
"tools": [],
"tool_choice": "none",
"stream": False
}
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
try:
async with httpx.AsyncClient() as client:
res = await client.post(url, json=payload, headers=headers, timeout=120)
if res.status_code == 200:
data = res.json()
# MiniMax V2 structure
if "choices" in data and len(data["choices"]) > 0:
return {
"content": data["choices"][0]["message"]["content"],
"usage": data.get("usage", {}),
"model": data.get("base_resp", {}).get("model") or model
}
return {"content": f"Erro MiniMax (Resposta inesperada): {json.dumps(data)}", "usage": {}}
return {"content": f"Erro MiniMax {res.status_code}: {res.text}", "usage": {}}
except Exception as e:
return {"content": f"Erro MiniMax: {str(e)}", "usage": {}}
async def _call_openai_async(model: str, prompt: str, system_prompt: str = None) -> str:
"""Chama API da OpenAI via httpx (async)."""
api_key = get_api_key("openai")
url = f"https://api.openai.com/v1/chat/completions"
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model,
"messages": messages,
"temperature": 0.7,
"max_completion_tokens": 4096
}
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
try:
async with httpx.AsyncClient() as client:
res = await client.post(url, json=payload, headers=headers, timeout=60)
if res.status_code == 200:
return res.json()["choices"][0]["message"]["content"]
return f"Erro OpenAI: {res.status_code} - {res.text}"
except Exception as e:
return f"Erro OpenAI: {str(e)}"
async def _call_anthropic_async(model: str, prompt: str, system_prompt: str = None) -> str:
"""Chama API da Anthropic via httpx (async)."""
api_key = get_api_key("anthropic")
url = "https://api.anthropic.com/v1/messages"
headers = {
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json"
}
payload = {
"model": model,
"max_tokens": 4096,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.7
}
if system_prompt: payload["system"] = system_prompt
try:
async with httpx.AsyncClient() as client:
res = await client.post(url, json=payload, headers=headers, timeout=60)
if res.status_code == 200:
return res.json()["content"][0]["text"]
return f"Erro Anthropic: {res.status_code} - {res.text}"
except Exception as e:
return f"Erro Anthropic: {str(e)}"
async def _call_ollama_async(model: str, prompt: str, system_prompt: str = None) -> str:
"""Chama Ollama local via httpx (async)."""
endpoint = LLM_PROVIDERS["ollama"]["endpoint"]
payload = {
"model": model,
"prompt": prompt,
"stream": False,
"options": {
"num_ctx": 4096,
"temperature": 0.7
}
}
if system_prompt: payload["system"] = system_prompt
try:
async with httpx.AsyncClient() as client:
res = await client.post(f"{endpoint}/api/generate", json=payload, timeout=180)
if res.status_code == 200:
return res.json().get("response", "")
return f"Erro Ollama: {res.status_code} - {res.text}"
except Exception as e:
return f"Erro Ollama: {str(e)}"
def check_ollama_connection() -> dict:
"""Versão síncrona mantida para compatibilidade rápida de status."""
import requests
endpoint = LLM_PROVIDERS["ollama"]["endpoint"]
try:
res = requests.get(f"{endpoint}/api/tags", timeout=10)
if res.status_code == 200:
models = [m.get("name") for m in res.json().get("models", [])]
return {"status": "ok", "models": models, "endpoint": endpoint}
return {"status": "error", "code": res.status_code, "endpoint": endpoint}
except Exception as e:
return {"status": "error", "message": str(e), "endpoint": endpoint}
# ============================================================
# PLANNER & EXECUTOR WRAPPERS (PROMETE SER ASYNC)
# ============================================================
def get_planner_llm() -> tuple:
cfg = get_orchestrator_config()
planner = cfg.get("planner", {"provider": "openrouter", "model": "qwen/qwen-2.5-72b-instruct"})
return planner["provider"], planner["model"]
def get_executor_llm() -> tuple:
cfg = get_orchestrator_config()
executor = cfg.get("executor", {"provider": "ollama", "model": "llama3.2:1b"})
return executor["provider"], executor["model"]
async def call_planner_async(prompt: str, system_prompt: str = None) -> str:
provider, model = get_planner_llm()
try:
response_dict = await call_llm(provider, model, prompt, system_prompt)
content = response_dict["content"]
# Se a resposta indicar um erro de API, disparamos o fallback
if content.startswith("Erro OpenRouter"):
raise Exception(content)
return content
except Exception as e:
# Lógica de FALLBACK: Se o Qwen falhar, tenta o Ling-2.6-flash
if provider == "openrouter" and model == "qwen/qwen-2.5-72b-instruct":
backup_model = "inclusionai/ling-2.6-flash:free"
print(f"⚠️ [FALLBACK] Falha no Qwen ({str(e)}). Tentando {backup_model}...")
res = await call_llm("openrouter", backup_model, prompt, system_prompt)
return res["content"]
return f"Erro Crítico no Planner: {str(e)}"
async def call_executor_async(prompt: str, system_prompt: str = None) -> str:
provider, model = get_executor_llm()
res = await call_llm(provider, model, prompt, system_prompt)
return res["content"]
# --- BACKWARD COMPATIBILITY SHIMS (SYNC WRAPPERS) ---
def call_planner(prompt: str, system_prompt: str = None) -> str:
return asyncio.run(call_planner_async(prompt, system_prompt))
def call_executor(prompt: str, system_prompt: str = None) -> str:
return asyncio.run(call_executor_async(prompt, system_prompt))