BotVPS/audio_handler.py

import os
import speech_recognition as sr
from pydub import AudioSegment
import uuid
import re
import asyncio
import edge_tts

def transcribe_audio(file_path: str) -> str:
    """Converte áudio (qualquer formato compatível com pydub) para WAV e transcreve com Google Speech."""
    recognizer = sr.Recognizer()

    # Se não for wav, converte usando pydub (precisa de ffmpeg na VPS)
    temp_wav = f"/tmp/{uuid.uuid4()}.wav"
    try:
        audio = AudioSegment.from_file(file_path)
        audio.export(temp_wav, format="wav")

        with sr.AudioFile(temp_wav) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data, language="pt-BR")
            return text
    finally:
        if os.path.exists(temp_wav):
            os.remove(temp_wav)

async def text_to_speech_async(text: str) -> str:
    """Sintetiza texto em áudio MP3 usando Edge TTS (Versão ASYNC)."""
    # Limpeza para narração: remove tudo o que a voz tenta ler literalmente mas não deve
    texto_limpo = text.replace("🤖", "").replace("🧑‍🏫", "").replace("*", "").replace("`", "")
    texto_limpo = texto_limpo.replace("#", "").replace("- ", " ").replace("> ", " ")
    # Remove blocos <REFINED>
    texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL)
    # Remove URLs e links [texto](url)
    texto_limpo = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', texto_limpo)
    texto_limpo = re.sub(r'http[s]?://\S+', '', texto_limpo).strip()

    if not texto_limpo:
        texto_limpo = "Prompt processado."

    filename = f"audio_reply_{uuid.uuid4().hex[:8]}.mp3"
    filepath = os.path.join("/tmp", filename)

    # Voz Masculina PT-BR: Donato é uma das mais realistas para comandos rápidos
    voice = "pt-BR-DonatoNeural"
    # Rate +35% para ser dinâmico e direto como solicitado
    communicate = edge_tts.Communicate(texto_limpo, voice, rate="+35%")
    await communicate.save(filepath)
    return filename

def text_to_speech(text: str) -> str:
    """Wrapper síncrono para compatibilidade legada (CUIDADO com loops eventuais)."""
    try:
        # Se já houver um loop rodando (ex: Telegram), isso vai falhar
        return asyncio.run(text_to_speech_async(text))
    except RuntimeError:
        # Fallback: se houver loop, tenta rodar de forma síncrona ou retorna erro
        # No nosso caso, o bot_logic e main.py devem usar a versão ASYNC diretamente
        print("[VOICE] Erro: text_to_speech (sync) chamado dentro de um event loop.")
        raise