From c2273a13f056c1bea52a32d334a031150fccd2ba Mon Sep 17 00:00:00 2001
From: admtracksteel <admtracksteel@gmail.com>
Date: Tue, 24 Mar 2026 11:57:55 +0000
Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=80=20Auto-deploy:=20BotVPS=20atualiza?=
 =?UTF-8?q?do=20em=2024/03/2026=2011:57:55?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 audio_handler.py | 37 ++++++++++++++++++-------------------
 bot_logic.py     | 14 +++++++-------
 main.py          | 42 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 66 insertions(+), 27 deletions(-)
diff --git a/audio_handler.py b/audio_handler.py
index f299eef..cc57179 100644
--- a/audio_handler.py
+++ b/audio_handler.py
@@ -24,16 +24,8 @@ def transcribe_audio(file_path: str) -> str:
         if os.path.exists(temp_wav):
             os.remove(temp_wav)
 
-async def _edge_tts_gen(text: str, filepath: str):
-    """Gera áudio usando Microsoft Edge TTS (Gratuito e Neural)."""
-    # Voz Masculina PT-BR: Antonio ou Donato
-    # Rate +20% para ser mais rápido conforme pedido
-    voice = "pt-BR-AntonioNeural"
-    communicate = edge_tts.Communicate(text, voice, rate="+20%")
-    await communicate.save(filepath)
-
-def text_to_speech(text: str) -> str:
-    """Sintetiza texto em áudio MP3 usando Edge TTS (Voz Masculina Rápida)."""
+async def text_to_speech_async(text: str) -> str:
+    """Sintetiza texto em áudio MP3 usando Edge TTS (Versão ASYNC)."""
     # Limpeza para narração
     texto_limpo = text.replace("🤖", "").replace("🧑‍🏫", "").replace("*", "").replace("`", "")
     texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL).strip()
@@ -44,13 +36,20 @@ def text_to_speech(text: str) -> str:
     filename = f"audio_reply_{uuid.uuid4().hex[:8]}.mp3"
     filepath = os.path.join("/tmp", filename)
     
+    # Voz Masculina PT-BR: Antonio
+    # Rate +20% para ser mais rápido
+    voice = "pt-BR-AntonioNeural"
+    communicate = edge_tts.Communicate(texto_limpo, voice, rate="+20%")
+    await communicate.save(filepath)
+    return filename
+
+def text_to_speech(text: str) -> str:
+    """Wrapper síncrono para compatibilidade legada (CUIDADO com loops eventuais)."""
     try:
-        # Edge TTS é async, precisamos rodar no loop
-        asyncio.run(_edge_tts_gen(texto_limpo, filepath))
-        return filename
-    except Exception as e:
-        print(f"[VOICE] Erro Edge TTS: {e}. Criando arquivo mudo ou ignorando.")
-        # Cria um arquivo vazio para não quebrar o retorno
-        with open(filepath, "wb") as f:
-            f.write(b"")
-        return filename
+        # Se já houver um loop rodando (ex: Telegram), isso vai falhar
+        return asyncio.run(text_to_speech_async(text))
+    except RuntimeError:
+        # Fallback: se houver loop, tenta rodar de forma síncrona ou retorna erro
+        # No nosso caso, o bot_logic e main.py devem usar a versão ASYNC diretamente
+        print("[VOICE] Erro: text_to_speech (sync) chamado dentro de um event loop.")
+        raise
diff --git a/bot_logic.py b/bot_logic.py
index c18bc84..c83479d 100644
--- a/bot_logic.py
+++ b/bot_logic.py
@@ -9,7 +9,7 @@ from orchestrator import (
     format_completion_message, execute_step_async
 )
 from ai_agent import query_agent_async
-from audio_handler import transcribe_audio, text_to_speech
+from audio_handler import transcribe_audio, text_to_speech_async
 from config import get_config
 
 # Configuração de logging
@@ -22,10 +22,10 @@ logger = logging.getLogger(__name__)
 TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
 ALLOWED_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID")
 
-def synthesize_audio(text: str) -> str:
-    """Wrapper para a síntese de voz centralizada."""
+async def synthesize_audio(text: str) -> str:
+    """Wrapper assíncrono para a síntese de voz centralizada."""
     try:
-        filename = text_to_speech(text)
+        filename = await text_to_speech_async(text)
         return os.path.join("/tmp", filename)
     except Exception as e:
         logger.error(f"TTS Error: {e}")
@@ -119,10 +119,10 @@ async def process_logic(update: Update, context: ContextTypes.DEFAULT_TYPE, user
     
     # Se foi por voz, responde por voz também
     if is_voice:
-        audio_path = synthesize_audio(reply_clean)
+        audio_path = await synthesize_audio(reply_clean)
         if audio_path and os.path.exists(audio_path):
-            with open(audio_path, 'rb') as voice:
-                await update.message.reply_voice(voice)
+            with open(audio_path, 'rb') as voice_file:
+                await update.message.reply_voice(voice_file)
             os.remove(audio_path)
 
 def get_telegram_app():
diff --git a/main.py b/main.py
index 5e73e20..fa63fa4 100644
--- a/main.py
+++ b/main.py
@@ -4,13 +4,16 @@ import subprocess
 import time
 import json
 import asyncio
-from fastapi import FastAPI, Request, Header, Depends, HTTPException, status
+from fastapi import FastAPI, Request, Header, Depends, HTTPException, status, UploadFile, File
 from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
 from fastapi.templating import Jinja2Templates
 from dotenv import load_dotenv
 from starlette.concurrency import run_in_threadpool
+import shutil
+import uuid
 
 from ai_agent import query_agent_async
+from audio_handler import transcribe_audio, text_to_speech_async
 from config import get_config, save_config
 from credential_manager import fetch_from_gitea_repo_async
 from orchestrator import (
@@ -134,6 +137,43 @@ async def web_chat(message: dict, is_auth: bool = Depends(verify_password)):
     reply = await query_agent_async(user_text, chat_history=history)
     return {"reply": reply}
 
+@app.post("/api/chat-audio")
+async def web_chat_audio(audio: UploadFile = File(...), is_auth: bool = Depends(verify_password)):
+    # 1. Salva o áudio vindo do navegador (/tmp)
+    temp_in = f"/tmp/{uuid.uuid4().hex}_{audio.filename}"
+    with open(temp_in, "wb") as buffer:
+        shutil.copyfileobj(audio.file, buffer)
+    
+    try:
+        # 2. Transcreve (STT)
+        text = transcribe_audio(temp_in)
+        if not text:
+            return {"reply": "Não entendi seu áudio.", "text": ""}
+        
+        # 3. Processa na IA
+        reply = await query_agent_async(text)
+        
+        # 4. Gera áudio da resposta (TTS)
+        reply_clean = re.sub(r'<REFINED>.*?</REFINED>', '', reply, flags=re.DOTALL).strip()
+        filename = await text_to_speech_async(reply_clean)
+        
+        return {
+            "text": text,
+            "reply": reply,
+            "audio_url": f"/api/audio/{filename}"
+        }
+    except Exception as e:
+        return {"reply": f"Erro Áudio: {str(e)}", "text": "Erro."}
+    finally:
+        if os.path.exists(temp_in): os.remove(temp_in)
+
+@app.get("/api/audio/{filename}")
+async def get_audio_file(filename: str):
+    path = os.path.join("/tmp", filename)
+    if os.path.exists(path):
+        return FileResponse(path, media_type="audio/mpeg")
+    return JSONResponse({"error": "File not found"}, status_code=404)
+
 @app.post("/api/orchestrate")
 async def orchestrate_task(task_data: dict, is_auth: bool = Depends(verify_password)):
     task = task_data.get("task", "")