diff --git a/ai_agent.py b/ai_agent.py index 2920b6a..5240a06 100644 --- a/ai_agent.py +++ b/ai_agent.py @@ -78,32 +78,40 @@ DIRETRIZES: current_history = history_str max_iterations = 6 + total_in = 0 + total_out = 0 + final_model = current_model + for i in range(max_iterations): print(f"[AGENT] Iteração {i+1} - Enviando para {provider} (modelo padrão)...") try: - response = await get_llm_response_async(system_prompt + current_history, provider, cfg) + res_dict = await call_llm(provider, current_model, system_prompt + current_history) # Lógica de FALLBACK: Se o Qwen falhar ou retornar erro de API, tenta o Ling-2.6-flash - if response.startswith("Erro OpenRouter") and provider == "openrouter" and current_model == "qwen/qwen-2.5-72b-instruct": + if res_dict["content"].startswith("Erro OpenRouter") and provider == "openrouter" and current_model == "qwen/qwen-2.5-72b-instruct": backup_model = "inclusionai/ling-2.6-flash:free" print(f"⚠️ [FALLBACK CHAT] Falha no Qwen. Tentando {backup_model}...") - response = await call_llm("openrouter", backup_model, system_prompt + current_history) + res_dict = await call_llm("openrouter", backup_model, system_prompt + current_history) except Exception as e: if provider == "openrouter" and current_model == "qwen/qwen-2.5-72b-instruct": backup_model = "inclusionai/ling-2.6-flash:free" print(f"⚠️ [FALLBACK CHAT] Exceção no Qwen ({str(e)}). Tentando {backup_model}...") - response = await call_llm("openrouter", backup_model, system_prompt + current_history) + res_dict = await call_llm("openrouter", backup_model, system_prompt + current_history) else: return f"Erro Crítico no Agente: {str(e)}" + response = res_dict["content"] + usage = res_dict.get("usage", {}) + total_in += usage.get("prompt_tokens", 0) + total_out += usage.get("completion_tokens", 0) + final_model = res_dict.get("model", final_model) + print(f"[LLM RESPONSE]: {response}") # Regex mais flexível: tenta casar [TOOL:nome] e extrair o conteúdo até [/TOOL] ou final da string - # Regex robusto: captura [TOOL:nome] ou TOOL:nome (sem colchetes como fallback) match = re.search(r"(?:\[?TOOL:([\w_]+)\]?|\[TOOL:([\w_]+)\])", response, re.I) if match: t_name = (match.group(1) or match.group(2)).strip().lower() - # Mapeamento de conveniência/atalho if t_name == "run": t_name = "run_bash_command" content_after = response[match.end():] @@ -117,13 +125,11 @@ DIRETRIZES: func = tool_info["func"] print(f"[AGENT] Executando {t_name} com argumento: {arg[:50]}...") - # Execução (suporta async se necessário, embora tools.py seja sync) if asyncio.iscoroutinefunction(func): obs = await func(arg) if arg else await func() else: obs = func(arg) if arg else func() - # Se o resultado for um dicionário (comum em tools_v2), extrai o output ou converte para string if isinstance(obs, dict): obs = obs.get("output") or obs.get("message") or str(obs) @@ -136,10 +142,13 @@ DIRETRIZES: print(f"[AGENT] Erro: Ferramenta '{t_name}' não encontrada.") current_history += f"\nAgente: {response}\nSISTEMA: Erro: Ferramenta '{t_name}' inexistente no sistema.\n" else: - # Se não há ferramenta, terminou o pensamento. - return response + # Terminou o pensamento. Adiciona rodapé de tokens. + footer = f"\n\n---\n⚙️ **Modelo:** `{final_model}`\n📊 **Tokens:** `{total_in} IN` / `{total_out} OUT`" + if "RESUMO:" in response: + return response + footer + return response + footer # Ao atingir o limite, tenta ao menos limpar a resposta final final_reply = response if 'response' in locals() else 'Nenhuma' - final_reply = re.sub(r'[<\[]/?REFINED[>\]]', '', final_reply, flags=re.IGNORECASE).strip() - return f"Limite de iterações atingido. RESUMO: {final_reply}" + footer = f"\n\n---\n⚠️ *Limite de iterações atingido*\n⚙️ **Modelo:** `{final_model}`\n📊 **Tokens:** `{total_in} IN` / `{total_out} OUT`" + return f"RESUMO: {final_reply}" + footer diff --git a/llm_providers.py b/llm_providers.py index a0aa76d..541584a 100644 --- a/llm_providers.py +++ b/llm_providers.py @@ -231,7 +231,7 @@ async def get_available_models(provider: str = None) -> List[Dict]: # ASYNC LLM CALL FUNCTIONS # ============================================================ -async def call_llm(provider: str, model: str, prompt: str, system_prompt: str = None, **kwargs) -> str: +async def call_llm(provider: str, model: str, prompt: str, system_prompt: str = None, **kwargs) -> dict: """Suporte universal async para chamadas de LLM com monitoramento de tráfego.""" # Monitoramento de Rate Limit alert_msg = track_request() @@ -239,19 +239,24 @@ async def call_llm(provider: str, model: str, prompt: str, system_prompt: str = asyncio.create_task(send_telegram_alert(alert_msg)) if provider == "gemini": - return await _call_gemini_async(model, prompt, system_prompt) + res = await _call_gemini_async(model, prompt, system_prompt) elif provider == "openai": - return await _call_openai_async(model, prompt, system_prompt) + res = await _call_openai_async(model, prompt, system_prompt) elif provider == "anthropic": - return await _call_anthropic_async(model, prompt, system_prompt) + res = await _call_anthropic_async(model, prompt, system_prompt) elif provider == "ollama": - return await _call_ollama_async(model, prompt, system_prompt) + res = await _call_ollama_async(model, prompt, system_prompt) elif provider == "openrouter": - return await _call_openrouter_async(model, prompt, system_prompt) + res = await _call_openrouter_async(model, prompt, system_prompt) else: - return f"Erro: Provider '{provider}' não suportado." + return {"content": f"Erro: Provider '{provider}' não suportado.", "usage": {}} + + # Garante que o retorno seja um dicionário (compatibilidade com shims antigos se houver) + if isinstance(res, str): + return {"content": res, "usage": {}, "model": model} + return res -async def _call_openrouter_async(model: str, prompt: str, system_prompt: str = None) -> str: +async def _call_openrouter_async(model: str, prompt: str, system_prompt: str = None) -> dict: """Chama API do OpenRouter (OpenAI Compatible) via httpx (async).""" api_key = get_api_key("openrouter") url = "https://openrouter.ai/api/v1/chat/completions" @@ -279,17 +284,21 @@ async def _call_openrouter_async(model: str, prompt: str, system_prompt: str = N if res.status_code == 200: data = res.json() if "choices" in data and len(data["choices"]) > 0: - return data["choices"][0]["message"]["content"] - return f"Erro OpenRouter (Resposta sem 'choices'): {json.dumps(data)}" + return { + "content": data["choices"][0]["message"]["content"], + "usage": data.get("usage", {}), + "model": data.get("model", model) + } + return {"content": f"Erro OpenRouter (Resposta sem 'choices'): {json.dumps(data)}", "usage": {}} # Se não for 200, tenta extrair erro detalhado try: error_data = res.json() - return f"Erro OpenRouter {res.status_code}: {json.dumps(error_data)}" + return {"content": f"Erro OpenRouter {res.status_code}: {json.dumps(error_data)}", "usage": {}} except: - return f"Erro OpenRouter: {res.status_code} - {res.text}" + return {"content": f"Erro OpenRouter: {res.status_code} - {res.text}", "usage": {}} except Exception as e: - return f"Erro OpenRouter: {str(e)}" + return {"content": f"Erro OpenRouter: {str(e)}", "usage": {}} async def _call_gemini_async(model: str, prompt: str, system_prompt: str = None) -> str: """Chama API do Google Gemini via httpx (async).""" @@ -438,22 +447,25 @@ def get_executor_llm() -> tuple: async def call_planner_async(prompt: str, system_prompt: str = None) -> str: provider, model = get_planner_llm() try: - response = await call_llm(provider, model, prompt, system_prompt) + response_dict = await call_llm(provider, model, prompt, system_prompt) + content = response_dict["content"] # Se a resposta indicar um erro de API, disparamos o fallback - if response.startswith("Erro OpenRouter"): - raise Exception(response) - return response + if content.startswith("Erro OpenRouter"): + raise Exception(content) + return content except Exception as e: # Lógica de FALLBACK: Se o Qwen falhar, tenta o Ling-2.6-flash if provider == "openrouter" and model == "qwen/qwen-2.5-72b-instruct": backup_model = "inclusionai/ling-2.6-flash:free" print(f"⚠️ [FALLBACK] Falha no Qwen ({str(e)}). Tentando {backup_model}...") - return await call_llm("openrouter", backup_model, prompt, system_prompt) + res = await call_llm("openrouter", backup_model, prompt, system_prompt) + return res["content"] return f"Erro Crítico no Planner: {str(e)}" async def call_executor_async(prompt: str, system_prompt: str = None) -> str: provider, model = get_executor_llm() - return await call_llm(provider, model, prompt, system_prompt) + res = await call_llm(provider, model, prompt, system_prompt) + return res["content"] # --- BACKWARD COMPATIBILITY SHIMS (SYNC WRAPPERS) --- def call_planner(prompt: str, system_prompt: str = None) -> str: