feat: upgrade interface web e suporte a áudio completo

2026-03-22 01:05:27 +00:00
parent 2d3da03ee6
commit 3e2e81bd64
7 changed files with 435 additions and 131 deletions
@@ -0,0 +1,42 @@
+import os
+import speech_recognition as sr
+from pydub import AudioSegment
+from gtts import gTTS
+import uuid
+import re
+
+def transcribe_audio(file_path: str) -> str:
+    """Converte áudio (qualquer formato compatível com pydub) para WAV e transcreve com Google Speech."""
+    recognizer = sr.Recognizer()
+    
+    # Se não for wav, converte usando pydub (precisa de ffmpeg na VPS)
+    temp_wav = f"/tmp/{uuid.uuid4()}.wav"
+    try:
+        audio = AudioSegment.from_file(file_path)
+        audio.export(temp_wav, format="wav")
+        
+        with sr.AudioFile(temp_wav) as source:
+            audio_data = recognizer.record(source)
+            text = recognizer.recognize_google(audio_data, language="pt-BR")
+            return text
+    finally:
+        if os.path.exists(temp_wav):
+            os.remove(temp_wav)
+
+def text_to_speech(text: str) -> str:
+    """Sintetiza texto em áudio MP3, removendo tags visuais e emojis."""
+    # Limpeza para narração
+    texto_limpo = text.replace("🤖", "").replace("🧑‍🏫", "").replace("*", "").replace("`", "")
+    # Remove o bloco <REFINED> se houver, pois ele é para leitura visual apenas
+    texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL).strip()
+    
+    # Se sobrar texto vazio após limpar o refinado (raro), fala algo genérico
+    if not texto_limpo:
+        texto_limpo = "Relatório processado. Os detalhes estão no painel visual."
+
+    filename = f"audio_reply_{uuid.uuid4().hex[:8]}.mp3"
+    filepath = os.path.join("/tmp", filename)
+    
+    tts = gTTS(text=texto_limpo, lang='pt-br', tld='com.br', slow=False)
+    tts.save(filepath)
+    return filename