🚀 Auto-deploy: BotVPS atualizado em 24/03/2026 11:54:29

2026-03-24 11:54:29 +00:00
parent e71d871740
commit 9509ea8fe8
2 changed files with 23 additions and 28 deletions
@@ -1,10 +1,10 @@
 import os
 import speech_recognition as sr
 from pydub import AudioSegment
-from gtts import gTTS
 import uuid
 import re
-from elevenlabs.client import ElevenLabs
+import asyncio
+import edge_tts

 def transcribe_audio(file_path: str) -> str:
    """Converte áudio (qualquer formato compatível com pydub) para WAV e transcreve com Google Speech."""
@@ -24,39 +24,33 @@ def transcribe_audio(file_path: str) -> str:
        if os.path.exists(temp_wav):
            os.remove(temp_wav)

+async def _edge_tts_gen(text: str, filepath: str):
+    """Gera áudio usando Microsoft Edge TTS (Gratuito e Neural)."""
+    # Voz Masculina PT-BR: Antonio ou Donato
+    # Rate +20% para ser mais rápido conforme pedido
+    voice = "pt-BR-AntonioNeural"
+    communicate = edge_tts.Communicate(text, voice, rate="+20%")
+    await communicate.save(filepath)
+
 def text_to_speech(text: str) -> str:
-    """Sintetiza texto em áudio MP3 usando ElevenLabs (voz masculina) ou gTTS como fallback."""
+    """Sintetiza texto em áudio MP3 usando Edge TTS (Voz Masculina Rápida)."""
    # Limpeza para narração
    texto_limpo = text.replace("🤖", "").replace("🧑‍🏫", "").replace("*", "").replace("`", "")
    texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL).strip()
    
    if not texto_limpo:
-        texto_limpo = "Relatório processado. Os detalhes estão no painel visual."
+        texto_limpo = "Prompt vazio."

    filename = f"audio_reply_{uuid.uuid4().hex[:8]}.mp3"
    filepath = os.path.join("/tmp", filename)
    
-    api_key = os.getenv("ELEVENLABS_API_KEY")
-    voice_id = os.getenv("ELEVENLABS_VOICE_ID", "ErMmoak87FvRAs60I6g0") # Marco (Male, PT-BR) as default
-    
-    if api_key:
    try:
-            client = ElevenLabs(api_key=api_key)
-            audio = client.generate(
-                text=texto_limpo,
-                voice=voice_id,
-                model="eleven_multilingual_v2"
-            )
-            # Salvando o resultado para arquivo (audio é um gerador de bytes na v1.0)
-            with open(filepath, "wb") as f:
-                for chunk in audio:
-                    if chunk:
-                        f.write(chunk)
+        # Edge TTS é async, precisamos rodar no loop
+        asyncio.run(_edge_tts_gen(texto_limpo, filepath))
        return filename
    except Exception as e:
-            print(f"[VOICE] Erro ElevenLabs: {e}. Usando fallback gTTS.")
-    
-    # Fallback gTTS (Voz feminina lenta mas gratuita)
-    tts = gTTS(text=texto_limpo, lang='pt-br', tld='com.br', slow=False)
-    tts.save(filepath)
+        print(f"[VOICE] Erro Edge TTS: {e}. Criando arquivo mudo ou ignorando.")
+        # Cria um arquivo vazio para não quebrar o retorno
+        with open(filepath, "wb") as f:
+            f.write(b"")
        return filename
@@ -11,6 +11,7 @@ python-multipart
 psutil
 pydub
 jinja2
+edge-tts
 gTTS
 anthropic
 elevenlabs