🚀 Auto-deploy: BotVPS atualizado em 28/03/2026 23:32:56

2026-03-28 23:32:56 +00:00
parent bf407ea2d5
commit 746c76c413
2 changed files with 82 additions and 9 deletions
@@ -26,20 +26,25 @@ def transcribe_audio(file_path: str) -> str:

 async def text_to_speech_async(text: str) -> str:
    """Sintetiza texto em áudio MP3 usando Edge TTS (Versão ASYNC)."""
-    # Limpeza para narração
+    # Limpeza para narração: remove tudo o que a voz tenta ler literalmente mas não deve
    texto_limpo = text.replace("🤖", "").replace("🧑‍🏫", "").replace("*", "").replace("`", "")
-    texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL).strip()
+    texto_limpo = texto_limpo.replace("#", "").replace("- ", " ").replace("> ", " ")
+    # Remove blocos <REFINED>
+    texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL)
+    # Remove URLs e links [texto](url)
+    texto_limpo = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', texto_limpo)
+    texto_limpo = re.sub(r'http[s]?://\S+', '', texto_limpo).strip()
    
    if not texto_limpo:
-        texto_limpo = "Prompt vazio."
+        texto_limpo = "Prompt processado."

    filename = f"audio_reply_{uuid.uuid4().hex[:8]}.mp3"
    filepath = os.path.join("/tmp", filename)
    
-    # Voz Masculina PT-BR: Antonio
-    # Rate +20% para ser mais rápido
-    voice = "pt-BR-AntonioNeural"
-    communicate = edge_tts.Communicate(texto_limpo, voice, rate="+20%")
+    # Voz Masculina PT-BR: Donato é uma das mais realistas para comandos rápidos
+    voice = "pt-BR-DonatoNeural"
+    # Rate +35% para ser dinâmico e direto como solicitado
+    communicate = edge_tts.Communicate(texto_limpo, voice, rate="+35%")
    await communicate.save(filepath)
    return filename

@@ -12,7 +12,9 @@ load_dotenv()
 # Configurações obtidas do .env
 TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
 ALLOWED_USER_ID = os.getenv("TELEGRAM_CHAT_ID")
-API_BASE_URL = "http://localhost:8001"
+# Sincroniza com a PORTA definida no .env (Dica: .env diz 8000)
+API_PORT = os.getenv("PORT", "8001")
+API_BASE_URL = f"http://localhost:{API_PORT}"

 # O ID permitido deve ser comparado como string ou int, padronizando aqui
 if ALLOWED_USER_ID:
@@ -103,6 +105,67 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
        logger.error(f"Erro ao enviar Markdown: {e}. Tentando texto puro.")
        await update.message.reply_text(reply)

+async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Manipula mensagens de voz do Telegram."""
+    if not update.message or not update.message.voice:
+        return
+
+    chat_id = update.effective_chat.id
+    user_id = update.effective_user.id
+    
+    # Filtro de Segurança
+    if ALLOWED_USER_ID and user_id != ALLOWED_USER_ID:
+        return
+
+    await context.bot.send_chat_action(chat_id=chat_id, action="record_voice")
+    
+    # 1. Download do áudio do Telegram
+    voice_file = await update.message.voice.get_file()
+    temp_path = f"/tmp/tg_voice_{uuid.uuid4().hex}.ogg"
+    await voice_file.download_to_drive(temp_path)
+    
+    logger.info(f"Voz recebida de {user_id}. Enviando para API de Áudio...")
+
+    # 2. Envia para a API interna de áudio
+    # Como o bridge e API estão na mesma máquina, compartilhamos o /tmp se necessário
+    # Mas vamos usar multipart para ser fiel à API
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        try:
+            with open(temp_path, "rb") as f:
+                # O parâmetro history pode ser adicionado futuramente similar ao chat
+                files = {"audio": (os.path.basename(temp_path), f, "audio/ogg")}
+                response = await client.post(f"{API_BASE_URL}/api/chat-audio", files=files)
+                response.raise_for_status()
+                data = response.json()
+                
+                user_text = data.get("text", "[Voz não transcrita]")
+                bot_reply = data.get("reply", "Erro no processamento.")
+                audio_url = data.get("audio_url") # Ex: /api/audio/file.mp3
+
+                # Envia transcrição do usuário
+                await update.message.reply_text(f"🎤 *Sua mensagem:* {user_text}", parse_mode='Markdown')
+                
+                # Envia resposta em texto
+                await update.message.reply_text(bot_reply, parse_mode='Markdown')
+
+                # 3. Envia resposta em áudio (TTS)
+                if audio_url:
+                    filename = audio_url.split("/")[-1]
+                    audio_path = os.path.join("/tmp", filename)
+                    if os.path.exists(audio_path):
+                        with open(audio_path, "rb") as audio_file:
+                            await context.bot.send_voice(chat_id=chat_id, voice=audio_file)
+                
+                # Atualiza histórico local
+                if chat_id not in chat_histories: chat_histories[chat_id] = []
+                chat_histories[chat_id].append({"user": user_text, "bot": bot_reply})
+
+        except Exception as e:
+            logger.error(f"Erro ao processar áudio: {str(e)}")
+            await update.message.reply_text(f"❌ *Erro no áudio:* {str(e)}")
+        finally:
+            if os.path.exists(temp_path): os.remove(temp_path)
+
 if __name__ == '__main__':
    if not TOKEN:
        logger.error("ERRO: TELEGRAM_BOT_TOKEN não encontrado no .env!")
@@ -115,5 +178,10 @@ if __name__ == '__main__':
    text_handler = MessageHandler(filters.TEXT & (~filters.COMMAND), handle_message)
    application.add_handler(text_handler)
    
-    logger.info("Bot Ponte Antigravity (Middleware) iniciado e aguardando...")
+    # Adiciona o handler para mensagens de VOZ
+    import uuid
+    voice_handler = MessageHandler(filters.VOICE, handle_voice)
+    application.add_handler(voice_handler)
+    
+    logger.info("Bot Ponte Antigravity (Middleware - Texto & Voz) iniciado...")
    application.run_polling()