230 lines
8.7 KiB
Python
230 lines
8.7 KiB
Python
# pxy_openai/views.py
|
|
import json
|
|
import logging
|
|
import tempfile
|
|
from typing import Any, Dict, Optional
|
|
|
|
from django.http import JsonResponse, HttpResponse
|
|
from django.views.decorators.csrf import csrf_exempt
|
|
from django.apps import apps
|
|
from django.utils.text import slugify
|
|
|
|
from .assistants import OpenAIAssistant
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# -----------------------
|
|
# Helpers
|
|
# -----------------------
|
|
|
|
def _parse_json(request) -> Dict[str, Any]:
|
|
try:
|
|
if request.content_type and "application/json" in request.content_type:
|
|
return json.loads(request.body.decode("utf-8") or "{}")
|
|
# allow x-www-form-urlencoded fallback
|
|
if request.POST and request.POST.get("payload"):
|
|
return json.loads(request.POST["payload"])
|
|
except Exception as e:
|
|
logger.warning("openai.api.bad_json: %s", e)
|
|
return {}
|
|
|
|
def _render_text(text: str) -> Dict[str, Any]:
|
|
return {
|
|
"schema_version": "render.v1",
|
|
"messages": [{"type": "text", "text": str(text)}],
|
|
}
|
|
|
|
def _render_error(text: str, status: int = 400) -> JsonResponse:
|
|
# Return as render.v1 for bot consumption (keeps UX consistent)
|
|
return JsonResponse(_render_text(f"⚠️ {text}"), status=status)
|
|
|
|
def _get_bot_and_token(bot_username: Optional[str]) -> Optional[str]:
|
|
"""
|
|
Find Telegram bot token by either TelegramBot.name or .username matching the provided bot_username.
|
|
Returns token or None.
|
|
"""
|
|
if not bot_username:
|
|
return None
|
|
TelegramBot = apps.get_model("pxy_bots", "TelegramBot")
|
|
bot = (TelegramBot.objects.filter(name=bot_username, is_active=True).first() or
|
|
TelegramBot.objects.filter(username=bot_username, is_active=True).first())
|
|
return bot.token if bot else None
|
|
|
|
def _assistant_from_payload(env: Dict[str, Any]) -> Optional[OpenAIAssistant]:
|
|
"""
|
|
Build OpenAIAssistant from a provided name in payload, otherwise try a sensible default.
|
|
You can pass "assistant": "Urbanista" in the root payload.
|
|
"""
|
|
name = (env.get("assistant") or "Urbanista")
|
|
try:
|
|
return OpenAIAssistant(name=name)
|
|
except Exception as e:
|
|
logger.error("openai.assistant.init_failed name=%s err=%s", name, e)
|
|
return None
|
|
|
|
|
|
# -----------------------
|
|
# /api/openai/transcribe
|
|
# -----------------------
|
|
|
|
@csrf_exempt
|
|
def transcribe(request):
|
|
"""
|
|
Transcribe audio to text.
|
|
|
|
JSON mode (Telegram voice):
|
|
{
|
|
"schema_version":"req.v1",
|
|
"bot":{"username":"PepeBasuritaCoinsBot"},
|
|
"user":{"language":"es"},
|
|
"input":{"media":{"type":"voice","file_id":"..."}}
|
|
"assistant":"Urbanista" # optional
|
|
}
|
|
|
|
Multipart mode (direct upload):
|
|
POST multipart/form-data with file=<audio file>, and optional fields:
|
|
assistant=Urbanista
|
|
language=es
|
|
"""
|
|
if request.method != "POST":
|
|
return HttpResponse(status=405)
|
|
|
|
# Multipart direct upload?
|
|
if request.FILES.get("file"):
|
|
audio = request.FILES["file"]
|
|
language = request.POST.get("language") or "es"
|
|
assistant_name = request.POST.get("assistant") or "Urbanista"
|
|
try:
|
|
assistant = OpenAIAssistant(assistant_name)
|
|
except Exception as e:
|
|
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
|
|
|
|
# Save to temp file and run whisper
|
|
try:
|
|
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
for chunk in audio.chunks():
|
|
tmp.write(chunk)
|
|
tmp_path = tmp.name
|
|
text = assistant.transcribe_file(tmp_path, language=language)
|
|
return JsonResponse(_render_text(text))
|
|
except Exception as e:
|
|
logger.exception("openai.transcribe.upload_error")
|
|
return _render_error(f"No se pudo transcribir el audio subido: {e}", status=500)
|
|
|
|
# JSON mode
|
|
env = _parse_json(request)
|
|
assistant = _assistant_from_payload(env)
|
|
if not assistant:
|
|
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
|
|
|
|
user_lang = ((env.get("user") or {}).get("language")) or "es"
|
|
media = ((env.get("input") or {}).get("media") or {})
|
|
file_id = media.get("file_id")
|
|
bot_username = ((env.get("bot") or {}).get("username"))
|
|
|
|
if not file_id:
|
|
return _render_error("No encontré audio en la petición (falta media.file_id o file).", status=400)
|
|
|
|
token = _get_bot_and_token(bot_username)
|
|
if not token:
|
|
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
|
|
|
|
try:
|
|
text = assistant.transcribe_telegram(token, file_id, language=user_lang)
|
|
return JsonResponse(_render_text(text))
|
|
except Exception as e:
|
|
logger.exception("openai.transcribe.telegram_error")
|
|
return _render_error(f"No se pudo transcribir el audio: {e}", status=500)
|
|
|
|
|
|
# -----------------------
|
|
# /api/openai/voice_chat
|
|
# -----------------------
|
|
|
|
@csrf_exempt
|
|
def voice_chat(request):
|
|
"""
|
|
Transcribe (if voice present) and then chat with the transcript.
|
|
|
|
JSON (Telegram):
|
|
{
|
|
"schema_version":"req.v1",
|
|
"bot":{"username":"PepeBasuritaCoinsBot"},
|
|
"user":{"id":999,"language":"es"},
|
|
"command":{"name":"__voice__","version":1,"trigger":"text_command"},
|
|
"input":{"media":{"type":"voice","file_id":"..."}, "text":"(optional prompt)"},
|
|
"assistant":"Urbanista" # optional
|
|
}
|
|
|
|
Multipart (direct upload + prompt):
|
|
POST multipart/form-data with:
|
|
file=<audio>, prompt="...", assistant="Urbanista", language="es"
|
|
"""
|
|
if request.method != "POST":
|
|
return HttpResponse(status=405)
|
|
|
|
# Multipart mode (file + optional prompt)
|
|
if request.FILES.get("file"):
|
|
audio = request.FILES["file"]
|
|
prompt = request.POST.get("prompt") or ""
|
|
language = request.POST.get("language") or "es"
|
|
assistant_name = request.POST.get("assistant") or "Urbanista"
|
|
try:
|
|
assistant = OpenAIAssistant(assistant_name)
|
|
except Exception as e:
|
|
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
|
|
|
|
try:
|
|
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
for chunk in audio.chunks():
|
|
tmp.write(chunk)
|
|
tmp_path = tmp.name
|
|
|
|
transcript = assistant.transcribe_file(tmp_path, language=language)
|
|
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
|
|
reply = assistant.chat_completion(user_message)
|
|
return JsonResponse(_render_text(reply))
|
|
except Exception as e:
|
|
logger.exception("openai.voice_chat.upload_error")
|
|
return _render_error(f"No se pudo procesar el audio: {e}", status=500)
|
|
|
|
# JSON mode (Telegram)
|
|
env = _parse_json(request)
|
|
assistant = _assistant_from_payload(env)
|
|
if not assistant:
|
|
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
|
|
|
|
user_lang = ((env.get("user") or {}).get("language")) or "es"
|
|
media = ((env.get("input") or {}).get("media") or {})
|
|
file_id = media.get("file_id")
|
|
prompt = ((env.get("input") or {}).get("text")) or ""
|
|
|
|
# If no audio present, just chat using provided text (keeps endpoint usable)
|
|
if not file_id:
|
|
if not prompt:
|
|
return JsonResponse(_render_text("¡Hola! 🌆 ¿Sobre qué te gustaría hablar hoy?"))
|
|
try:
|
|
reply = assistant.chat_completion(prompt)
|
|
return JsonResponse(_render_text(reply))
|
|
except Exception as e:
|
|
logger.exception("openai.voice_chat.text_only_error")
|
|
return _render_error(f"No se pudo generar respuesta: {e}", status=500)
|
|
|
|
# With Telegram voice
|
|
bot_username = ((env.get("bot") or {}).get("username"))
|
|
token = _get_bot_and_token(bot_username)
|
|
if not token:
|
|
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
|
|
|
|
try:
|
|
transcript = assistant.transcribe_telegram(token, file_id, language=user_lang)
|
|
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
|
|
reply = assistant.chat_completion(user_message)
|
|
return JsonResponse(_render_text(reply))
|
|
except Exception as e:
|
|
logger.exception("openai.voice_chat.telegram_error")
|
|
return _render_error(f"No se pudo procesar el audio: {e}", status=500)
|