🚀 Auto-deploy: BotVPS atualizado em 24/03/2026 11:57:55

This commit is contained in:
2026-03-24 11:57:55 +00:00
parent 9509ea8fe8
commit c2273a13f0
3 changed files with 66 additions and 27 deletions

View File

@@ -24,16 +24,8 @@ def transcribe_audio(file_path: str) -> str:
if os.path.exists(temp_wav): if os.path.exists(temp_wav):
os.remove(temp_wav) os.remove(temp_wav)
async def _edge_tts_gen(text: str, filepath: str): async def text_to_speech_async(text: str) -> str:
"""Gera áudio usando Microsoft Edge TTS (Gratuito e Neural).""" """Sintetiza texto em áudio MP3 usando Edge TTS (Versão ASYNC)."""
# Voz Masculina PT-BR: Antonio ou Donato
# Rate +20% para ser mais rápido conforme pedido
voice = "pt-BR-AntonioNeural"
communicate = edge_tts.Communicate(text, voice, rate="+20%")
await communicate.save(filepath)
def text_to_speech(text: str) -> str:
"""Sintetiza texto em áudio MP3 usando Edge TTS (Voz Masculina Rápida)."""
# Limpeza para narração # Limpeza para narração
texto_limpo = text.replace("🤖", "").replace("🧑‍🏫", "").replace("*", "").replace("`", "") texto_limpo = text.replace("🤖", "").replace("🧑‍🏫", "").replace("*", "").replace("`", "")
texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL).strip() texto_limpo = re.sub(r'<REFINED>.*?</REFINED>', '', texto_limpo, flags=re.DOTALL).strip()
@@ -44,13 +36,20 @@ def text_to_speech(text: str) -> str:
filename = f"audio_reply_{uuid.uuid4().hex[:8]}.mp3" filename = f"audio_reply_{uuid.uuid4().hex[:8]}.mp3"
filepath = os.path.join("/tmp", filename) filepath = os.path.join("/tmp", filename)
# Voz Masculina PT-BR: Antonio
# Rate +20% para ser mais rápido
voice = "pt-BR-AntonioNeural"
communicate = edge_tts.Communicate(texto_limpo, voice, rate="+20%")
await communicate.save(filepath)
return filename
def text_to_speech(text: str) -> str:
"""Wrapper síncrono para compatibilidade legada (CUIDADO com loops eventuais)."""
try: try:
# Edge TTS é async, precisamos rodar no loop # Se já houver um loop rodando (ex: Telegram), isso vai falhar
asyncio.run(_edge_tts_gen(texto_limpo, filepath)) return asyncio.run(text_to_speech_async(text))
return filename except RuntimeError:
except Exception as e: # Fallback: se houver loop, tenta rodar de forma síncrona ou retorna erro
print(f"[VOICE] Erro Edge TTS: {e}. Criando arquivo mudo ou ignorando.") # No nosso caso, o bot_logic e main.py devem usar a versão ASYNC diretamente
# Cria um arquivo vazio para não quebrar o retorno print("[VOICE] Erro: text_to_speech (sync) chamado dentro de um event loop.")
with open(filepath, "wb") as f: raise
f.write(b"")
return filename

View File

@@ -9,7 +9,7 @@ from orchestrator import (
format_completion_message, execute_step_async format_completion_message, execute_step_async
) )
from ai_agent import query_agent_async from ai_agent import query_agent_async
from audio_handler import transcribe_audio, text_to_speech from audio_handler import transcribe_audio, text_to_speech_async
from config import get_config from config import get_config
# Configuração de logging # Configuração de logging
@@ -22,10 +22,10 @@ logger = logging.getLogger(__name__)
TOKEN = os.getenv("TELEGRAM_BOT_TOKEN") TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
ALLOWED_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID") ALLOWED_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID")
def synthesize_audio(text: str) -> str: async def synthesize_audio(text: str) -> str:
"""Wrapper para a síntese de voz centralizada.""" """Wrapper assíncrono para a síntese de voz centralizada."""
try: try:
filename = text_to_speech(text) filename = await text_to_speech_async(text)
return os.path.join("/tmp", filename) return os.path.join("/tmp", filename)
except Exception as e: except Exception as e:
logger.error(f"TTS Error: {e}") logger.error(f"TTS Error: {e}")
@@ -119,10 +119,10 @@ async def process_logic(update: Update, context: ContextTypes.DEFAULT_TYPE, user
# Se foi por voz, responde por voz também # Se foi por voz, responde por voz também
if is_voice: if is_voice:
audio_path = synthesize_audio(reply_clean) audio_path = await synthesize_audio(reply_clean)
if audio_path and os.path.exists(audio_path): if audio_path and os.path.exists(audio_path):
with open(audio_path, 'rb') as voice: with open(audio_path, 'rb') as voice_file:
await update.message.reply_voice(voice) await update.message.reply_voice(voice_file)
os.remove(audio_path) os.remove(audio_path)
def get_telegram_app(): def get_telegram_app():

42
main.py
View File

@@ -4,13 +4,16 @@ import subprocess
import time import time
import json import json
import asyncio import asyncio
from fastapi import FastAPI, Request, Header, Depends, HTTPException, status from fastapi import FastAPI, Request, Header, Depends, HTTPException, status, UploadFile, File
from fastapi.responses import HTMLResponse, JSONResponse, FileResponse from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from dotenv import load_dotenv from dotenv import load_dotenv
from starlette.concurrency import run_in_threadpool from starlette.concurrency import run_in_threadpool
import shutil
import uuid
from ai_agent import query_agent_async from ai_agent import query_agent_async
from audio_handler import transcribe_audio, text_to_speech_async
from config import get_config, save_config from config import get_config, save_config
from credential_manager import fetch_from_gitea_repo_async from credential_manager import fetch_from_gitea_repo_async
from orchestrator import ( from orchestrator import (
@@ -134,6 +137,43 @@ async def web_chat(message: dict, is_auth: bool = Depends(verify_password)):
reply = await query_agent_async(user_text, chat_history=history) reply = await query_agent_async(user_text, chat_history=history)
return {"reply": reply} return {"reply": reply}
@app.post("/api/chat-audio")
async def web_chat_audio(audio: UploadFile = File(...), is_auth: bool = Depends(verify_password)):
# 1. Salva o áudio vindo do navegador (/tmp)
temp_in = f"/tmp/{uuid.uuid4().hex}_{audio.filename}"
with open(temp_in, "wb") as buffer:
shutil.copyfileobj(audio.file, buffer)
try:
# 2. Transcreve (STT)
text = transcribe_audio(temp_in)
if not text:
return {"reply": "Não entendi seu áudio.", "text": ""}
# 3. Processa na IA
reply = await query_agent_async(text)
# 4. Gera áudio da resposta (TTS)
reply_clean = re.sub(r'<REFINED>.*?</REFINED>', '', reply, flags=re.DOTALL).strip()
filename = await text_to_speech_async(reply_clean)
return {
"text": text,
"reply": reply,
"audio_url": f"/api/audio/{filename}"
}
except Exception as e:
return {"reply": f"Erro Áudio: {str(e)}", "text": "Erro."}
finally:
if os.path.exists(temp_in): os.remove(temp_in)
@app.get("/api/audio/{filename}")
async def get_audio_file(filename: str):
path = os.path.join("/tmp", filename)
if os.path.exists(path):
return FileResponse(path, media_type="audio/mpeg")
return JSONResponse({"error": "File not found"}, status_code=404)
@app.post("/api/orchestrate") @app.post("/api/orchestrate")
async def orchestrate_task(task_data: dict, is_auth: bool = Depends(verify_password)): async def orchestrate_task(task_data: dict, is_auth: bool = Depends(verify_password)):
task = task_data.get("task", "") task = task_data.get("task", "")