Ekaropolus db61f56a24
All checks were successful
continuous-integration/drone/push Build is passing
Open AI voice admin
2025-09-17 22:44:12 -06:00

230 lines
8.7 KiB
Python

# pxy_openai/views.py
import json
import logging
import tempfile
from typing import Any, Dict, Optional
from django.http import JsonResponse, HttpResponse
from django.views.decorators.csrf import csrf_exempt
from django.apps import apps
from django.utils.text import slugify
from .assistants import OpenAIAssistant
logger = logging.getLogger(__name__)
# -----------------------
# Helpers
# -----------------------
def _parse_json(request) -> Dict[str, Any]:
try:
if request.content_type and "application/json" in request.content_type:
return json.loads(request.body.decode("utf-8") or "{}")
# allow x-www-form-urlencoded fallback
if request.POST and request.POST.get("payload"):
return json.loads(request.POST["payload"])
except Exception as e:
logger.warning("openai.api.bad_json: %s", e)
return {}
def _render_text(text: str) -> Dict[str, Any]:
return {
"schema_version": "render.v1",
"messages": [{"type": "text", "text": str(text)}],
}
def _render_error(text: str, status: int = 400) -> JsonResponse:
# Return as render.v1 for bot consumption (keeps UX consistent)
return JsonResponse(_render_text(f"⚠️ {text}"), status=status)
def _get_bot_and_token(bot_username: Optional[str]) -> Optional[str]:
"""
Find Telegram bot token by either TelegramBot.name or .username matching the provided bot_username.
Returns token or None.
"""
if not bot_username:
return None
TelegramBot = apps.get_model("pxy_bots", "TelegramBot")
bot = (TelegramBot.objects.filter(name=bot_username, is_active=True).first() or
TelegramBot.objects.filter(username=bot_username, is_active=True).first())
return bot.token if bot else None
def _assistant_from_payload(env: Dict[str, Any]) -> Optional[OpenAIAssistant]:
"""
Build OpenAIAssistant from a provided name in payload, otherwise try a sensible default.
You can pass "assistant": "Urbanista" in the root payload.
"""
name = (env.get("assistant") or "Urbanista")
try:
return OpenAIAssistant(name=name)
except Exception as e:
logger.error("openai.assistant.init_failed name=%s err=%s", name, e)
return None
# -----------------------
# /api/openai/transcribe
# -----------------------
@csrf_exempt
def transcribe(request):
"""
Transcribe audio to text.
JSON mode (Telegram voice):
{
"schema_version":"req.v1",
"bot":{"username":"PepeBasuritaCoinsBot"},
"user":{"language":"es"},
"input":{"media":{"type":"voice","file_id":"..."}}
"assistant":"Urbanista" # optional
}
Multipart mode (direct upload):
POST multipart/form-data with file=<audio file>, and optional fields:
assistant=Urbanista
language=es
"""
if request.method != "POST":
return HttpResponse(status=405)
# Multipart direct upload?
if request.FILES.get("file"):
audio = request.FILES["file"]
language = request.POST.get("language") or "es"
assistant_name = request.POST.get("assistant") or "Urbanista"
try:
assistant = OpenAIAssistant(assistant_name)
except Exception as e:
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
# Save to temp file and run whisper
try:
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
for chunk in audio.chunks():
tmp.write(chunk)
tmp_path = tmp.name
text = assistant.transcribe_file(tmp_path, language=language)
return JsonResponse(_render_text(text))
except Exception as e:
logger.exception("openai.transcribe.upload_error")
return _render_error(f"No se pudo transcribir el audio subido: {e}", status=500)
# JSON mode
env = _parse_json(request)
assistant = _assistant_from_payload(env)
if not assistant:
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
user_lang = ((env.get("user") or {}).get("language")) or "es"
media = ((env.get("input") or {}).get("media") or {})
file_id = media.get("file_id")
bot_username = ((env.get("bot") or {}).get("username"))
if not file_id:
return _render_error("No encontré audio en la petición (falta media.file_id o file).", status=400)
token = _get_bot_and_token(bot_username)
if not token:
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
try:
text = assistant.transcribe_telegram(token, file_id, language=user_lang)
return JsonResponse(_render_text(text))
except Exception as e:
logger.exception("openai.transcribe.telegram_error")
return _render_error(f"No se pudo transcribir el audio: {e}", status=500)
# -----------------------
# /api/openai/voice_chat
# -----------------------
@csrf_exempt
def voice_chat(request):
"""
Transcribe (if voice present) and then chat with the transcript.
JSON (Telegram):
{
"schema_version":"req.v1",
"bot":{"username":"PepeBasuritaCoinsBot"},
"user":{"id":999,"language":"es"},
"command":{"name":"__voice__","version":1,"trigger":"text_command"},
"input":{"media":{"type":"voice","file_id":"..."}, "text":"(optional prompt)"},
"assistant":"Urbanista" # optional
}
Multipart (direct upload + prompt):
POST multipart/form-data with:
file=<audio>, prompt="...", assistant="Urbanista", language="es"
"""
if request.method != "POST":
return HttpResponse(status=405)
# Multipart mode (file + optional prompt)
if request.FILES.get("file"):
audio = request.FILES["file"]
prompt = request.POST.get("prompt") or ""
language = request.POST.get("language") or "es"
assistant_name = request.POST.get("assistant") or "Urbanista"
try:
assistant = OpenAIAssistant(assistant_name)
except Exception as e:
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
try:
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
for chunk in audio.chunks():
tmp.write(chunk)
tmp_path = tmp.name
transcript = assistant.transcribe_file(tmp_path, language=language)
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
reply = assistant.chat_completion(user_message)
return JsonResponse(_render_text(reply))
except Exception as e:
logger.exception("openai.voice_chat.upload_error")
return _render_error(f"No se pudo procesar el audio: {e}", status=500)
# JSON mode (Telegram)
env = _parse_json(request)
assistant = _assistant_from_payload(env)
if not assistant:
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
user_lang = ((env.get("user") or {}).get("language")) or "es"
media = ((env.get("input") or {}).get("media") or {})
file_id = media.get("file_id")
prompt = ((env.get("input") or {}).get("text")) or ""
# If no audio present, just chat using provided text (keeps endpoint usable)
if not file_id:
if not prompt:
return JsonResponse(_render_text("¡Hola! 🌆 ¿Sobre qué te gustaría hablar hoy?"))
try:
reply = assistant.chat_completion(prompt)
return JsonResponse(_render_text(reply))
except Exception as e:
logger.exception("openai.voice_chat.text_only_error")
return _render_error(f"No se pudo generar respuesta: {e}", status=500)
# With Telegram voice
bot_username = ((env.get("bot") or {}).get("username"))
token = _get_bot_and_token(bot_username)
if not token:
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
try:
transcript = assistant.transcribe_telegram(token, file_id, language=user_lang)
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
reply = assistant.chat_completion(user_message)
return JsonResponse(_render_text(reply))
except Exception as e:
logger.exception("openai.voice_chat.telegram_error")
return _render_error(f"No se pudo procesar el audio: {e}", status=500)