This commit is contained in:
parent
88fe85c802
commit
db61f56a24
@ -1,4 +1,8 @@
|
||||
# pxy_openai/assistants.py
|
||||
import logging
|
||||
import tempfile
|
||||
import requests # NEW
|
||||
from typing import Optional # NEW
|
||||
from .client import OpenAIClient
|
||||
from .models import OpenAIAssistant as OpenAIAssistantModel
|
||||
|
||||
@ -6,30 +10,81 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class OpenAIAssistant:
|
||||
"""
|
||||
OpenAI Assistant for handling AI interactions.
|
||||
OpenAI Assistant for handling AI interactions (chat + voice).
|
||||
"""
|
||||
def __init__(self, name):
|
||||
"""
|
||||
Initialize the assistant by loading its configuration from the database.
|
||||
"""
|
||||
try:
|
||||
self.config = OpenAIAssistantModel.objects.get(name=name)
|
||||
self.client = OpenAIClient(self.config.api_key).get_client()
|
||||
except OpenAIAssistantModel.DoesNotExist:
|
||||
raise ValueError(f"Assistant '{name}' not found in the database.")
|
||||
|
||||
# ---------- NEW: Whisper helpers ----------
|
||||
|
||||
def transcribe_file(self, path: str, language: Optional[str] = "es") -> str:
|
||||
"""
|
||||
Transcribe a local audio file using Whisper. Returns plain text.
|
||||
Supports both new OpenAI SDK (client.audio.transcriptions.create)
|
||||
and legacy (openai.Audio.transcriptions.create).
|
||||
"""
|
||||
try:
|
||||
# New SDK path
|
||||
if hasattr(self.client, "audio") and hasattr(self.client.audio, "transcriptions"):
|
||||
with open(path, "rb") as f:
|
||||
tx = self.client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=f,
|
||||
response_format="text",
|
||||
language=language or None,
|
||||
)
|
||||
return tx.strip() if isinstance(tx, str) else str(tx)
|
||||
|
||||
# Legacy SDK fallback
|
||||
with open(path, "rb") as f:
|
||||
tx = self.client.Audio.transcriptions.create( # type: ignore[attr-defined]
|
||||
model="whisper-1",
|
||||
file=f,
|
||||
response_format="text",
|
||||
language=language or None,
|
||||
)
|
||||
return tx.strip() if isinstance(tx, str) else str(tx)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Whisper transcription error: {e}")
|
||||
raise
|
||||
|
||||
def transcribe_telegram(self, bot_token: str, file_id: str, language: Optional[str] = "es") -> str:
|
||||
"""
|
||||
Download a Telegram voice/audio by file_id and transcribe it.
|
||||
"""
|
||||
# 1) getFile
|
||||
r = requests.get(
|
||||
f"https://api.telegram.org/bot{bot_token}/getFile",
|
||||
params={"file_id": file_id},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
file_path = r.json()["result"]["file_path"]
|
||||
|
||||
# 2) download actual bytes
|
||||
url = f"https://api.telegram.org/file/bot{bot_token}/{file_path}"
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix="." + file_path.split(".")[-1]) as tmp:
|
||||
resp = requests.get(url, timeout=30)
|
||||
resp.raise_for_status()
|
||||
tmp.write(resp.content)
|
||||
local_path = tmp.name
|
||||
|
||||
# 3) transcribe
|
||||
return self.transcribe_file(local_path, language=language)
|
||||
|
||||
# ---------- existing chat/agents methods ----------
|
||||
|
||||
def chat_completion(self, user_message):
|
||||
"""
|
||||
Call OpenAI's chat completion API.
|
||||
"""
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model="gpt-4o-mini",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": self.config.description, # Use description as the system prompt
|
||||
},
|
||||
{"role": "system", "content": self.config.description},
|
||||
{"role": "user", "content": user_message},
|
||||
],
|
||||
)
|
||||
@ -39,23 +94,15 @@ class OpenAIAssistant:
|
||||
return f"Error in chat completion: {e}"
|
||||
|
||||
def agent_workflow(self, user_message):
|
||||
"""
|
||||
Call OpenAI's advanced agent workflow API.
|
||||
"""
|
||||
try:
|
||||
if not self.config.assistant_id:
|
||||
raise ValueError(f"Assistant '{self.config.name}' does not have an associated assistant ID.")
|
||||
|
||||
assistant = self.client.beta.assistants.retrieve(self.config.assistant_id)
|
||||
thread = self.client.beta.threads.create()
|
||||
|
||||
# Create a message in the thread
|
||||
self.client.beta.threads.messages.create(thread_id=thread.id, role="user", content=user_message)
|
||||
|
||||
# Run the assistant workflow
|
||||
run = self.client.beta.threads.runs.create(thread_id=thread.id, assistant_id=assistant.id)
|
||||
|
||||
# Poll for the result
|
||||
while run.status in ["queued", "in_progress"]:
|
||||
run = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
|
||||
if run.status == "completed":
|
||||
@ -68,9 +115,6 @@ class OpenAIAssistant:
|
||||
return f"Error in agent workflow: {e}"
|
||||
|
||||
def handle_message(self, user_message):
|
||||
"""
|
||||
Automatically select the correct method based on assistant type.
|
||||
"""
|
||||
if self.config.is_special_assistant():
|
||||
return self.agent_workflow(user_message)
|
||||
return self.chat_completion(user_message)
|
||||
|
@ -1,176 +1,229 @@
|
||||
# pxy_openai/views.py
|
||||
import os
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import tempfile
|
||||
import requests
|
||||
import openai
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from django.http import JsonResponse, HttpResponseBadRequest
|
||||
from django.http import JsonResponse, HttpResponse
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.apps import apps
|
||||
from django.utils.text import slugify
|
||||
|
||||
from pxy_bots.models import TelegramBot # to fetch the bot token from DB
|
||||
from .assistants import OpenAIAssistant
|
||||
|
||||
# Configure OpenAI
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Where to forward the transcript for chat
|
||||
LANGCHAIN_CHAT_URL = os.getenv(
|
||||
"LANGCHAIN_CHAT_URL",
|
||||
"http://app.polisplexity.tech:8010/api/langchain/chat"
|
||||
)
|
||||
|
||||
def _download_telegram_file(bot_username: str, file_id: str) -> bytes:
|
||||
# -----------------------
|
||||
# Helpers
|
||||
# -----------------------
|
||||
|
||||
def _parse_json(request) -> Dict[str, Any]:
|
||||
try:
|
||||
if request.content_type and "application/json" in request.content_type:
|
||||
return json.loads(request.body.decode("utf-8") or "{}")
|
||||
# allow x-www-form-urlencoded fallback
|
||||
if request.POST and request.POST.get("payload"):
|
||||
return json.loads(request.POST["payload"])
|
||||
except Exception as e:
|
||||
logger.warning("openai.api.bad_json: %s", e)
|
||||
return {}
|
||||
|
||||
def _render_text(text: str) -> Dict[str, Any]:
|
||||
return {
|
||||
"schema_version": "render.v1",
|
||||
"messages": [{"type": "text", "text": str(text)}],
|
||||
}
|
||||
|
||||
def _render_error(text: str, status: int = 400) -> JsonResponse:
|
||||
# Return as render.v1 for bot consumption (keeps UX consistent)
|
||||
return JsonResponse(_render_text(f"⚠️ {text}"), status=status)
|
||||
|
||||
def _get_bot_and_token(bot_username: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Resolve a Telegram file_id to bytes using the bot's token.
|
||||
Raises ValueError on any failure.
|
||||
Find Telegram bot token by either TelegramBot.name or .username matching the provided bot_username.
|
||||
Returns token or None.
|
||||
"""
|
||||
bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first()
|
||||
if not bot:
|
||||
raise ValueError(f"bot '{bot_username}' not found or inactive")
|
||||
if not bot_username:
|
||||
return None
|
||||
TelegramBot = apps.get_model("pxy_bots", "TelegramBot")
|
||||
bot = (TelegramBot.objects.filter(name=bot_username, is_active=True).first() or
|
||||
TelegramBot.objects.filter(username=bot_username, is_active=True).first())
|
||||
return bot.token if bot else None
|
||||
|
||||
tg_api = f"https://api.telegram.org/bot{bot.token}"
|
||||
r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10)
|
||||
if r.status_code != 200 or not r.json().get("ok"):
|
||||
raise ValueError("telegram getFile failed")
|
||||
|
||||
file_path = r.json()["result"]["file_path"]
|
||||
file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}"
|
||||
dl = requests.get(file_url, timeout=20)
|
||||
if dl.status_code != 200:
|
||||
raise ValueError("telegram file download failed")
|
||||
return dl.content
|
||||
|
||||
|
||||
def _transcribe_bytes(raw: bytes, language: str = "es") -> str:
|
||||
def _assistant_from_payload(env: Dict[str, Any]) -> Optional[OpenAIAssistant]:
|
||||
"""
|
||||
Transcribe OGG/Opus (or other) audio bytes with OpenAI.
|
||||
Returns plain text.
|
||||
Build OpenAIAssistant from a provided name in payload, otherwise try a sensible default.
|
||||
You can pass "assistant": "Urbanista" in the root payload.
|
||||
"""
|
||||
# Write to a temp file so OpenAI client can stream it
|
||||
with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp:
|
||||
tmp.write(raw)
|
||||
tmp.flush()
|
||||
with open(tmp.name, "rb") as fh:
|
||||
# "gpt-4o-transcribe" or "whisper-1" depending on your account
|
||||
result = openai.audio.transcriptions.create(
|
||||
model="gpt-4o-transcribe",
|
||||
file=fh,
|
||||
response_format="text",
|
||||
language=language or "es"
|
||||
)
|
||||
return (result.strip() if isinstance(result, str) else str(result)).strip()
|
||||
name = (env.get("assistant") or "Urbanista")
|
||||
try:
|
||||
return OpenAIAssistant(name=name)
|
||||
except Exception as e:
|
||||
logger.error("openai.assistant.init_failed name=%s err=%s", name, e)
|
||||
return None
|
||||
|
||||
|
||||
# -----------------------
|
||||
# /api/openai/transcribe
|
||||
# -----------------------
|
||||
|
||||
@csrf_exempt
|
||||
def transcribe(request):
|
||||
"""
|
||||
POST /api/openai/transcribe
|
||||
Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe.
|
||||
If not, we fall back to input.text (handy for quick tests).
|
||||
Returns:
|
||||
{"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}}
|
||||
Transcribe audio to text.
|
||||
|
||||
JSON mode (Telegram voice):
|
||||
{
|
||||
"schema_version":"req.v1",
|
||||
"bot":{"username":"PepeBasuritaCoinsBot"},
|
||||
"user":{"language":"es"},
|
||||
"input":{"media":{"type":"voice","file_id":"..."}}
|
||||
"assistant":"Urbanista" # optional
|
||||
}
|
||||
|
||||
Multipart mode (direct upload):
|
||||
POST multipart/form-data with file=<audio file>, and optional fields:
|
||||
assistant=Urbanista
|
||||
language=es
|
||||
"""
|
||||
if request.method != "POST":
|
||||
return HttpResponseBadRequest("POST only")
|
||||
return HttpResponse(status=405)
|
||||
|
||||
# Multipart direct upload?
|
||||
if request.FILES.get("file"):
|
||||
audio = request.FILES["file"]
|
||||
language = request.POST.get("language") or "es"
|
||||
assistant_name = request.POST.get("assistant") or "Urbanista"
|
||||
try:
|
||||
assistant = OpenAIAssistant(assistant_name)
|
||||
except Exception as e:
|
||||
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
|
||||
|
||||
# Save to temp file and run whisper
|
||||
try:
|
||||
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
for chunk in audio.chunks():
|
||||
tmp.write(chunk)
|
||||
tmp_path = tmp.name
|
||||
text = assistant.transcribe_file(tmp_path, language=language)
|
||||
return JsonResponse(_render_text(text))
|
||||
except Exception as e:
|
||||
logger.exception("openai.transcribe.upload_error")
|
||||
return _render_error(f"No se pudo transcribir el audio subido: {e}", status=500)
|
||||
|
||||
# JSON mode
|
||||
env = _parse_json(request)
|
||||
assistant = _assistant_from_payload(env)
|
||||
if not assistant:
|
||||
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
|
||||
|
||||
user_lang = ((env.get("user") or {}).get("language")) or "es"
|
||||
media = ((env.get("input") or {}).get("media") or {})
|
||||
file_id = media.get("file_id")
|
||||
bot_username = ((env.get("bot") or {}).get("username"))
|
||||
|
||||
if not file_id:
|
||||
return _render_error("No encontré audio en la petición (falta media.file_id o file).", status=400)
|
||||
|
||||
token = _get_bot_and_token(bot_username)
|
||||
if not token:
|
||||
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
|
||||
|
||||
try:
|
||||
data = json.loads(request.body.decode("utf-8") or "{}")
|
||||
except Exception:
|
||||
return HttpResponseBadRequest("invalid json")
|
||||
text = assistant.transcribe_telegram(token, file_id, language=user_lang)
|
||||
return JsonResponse(_render_text(text))
|
||||
except Exception as e:
|
||||
logger.exception("openai.transcribe.telegram_error")
|
||||
return _render_error(f"No se pudo transcribir el audio: {e}", status=500)
|
||||
|
||||
bot_username = ((data.get("bot") or {}).get("username"))
|
||||
media = ((data.get("input") or {}).get("media")) or {}
|
||||
language = ((data.get("user") or {}).get("language")) or "es"
|
||||
text_fallback = ((data.get("input") or {}).get("text")) or ""
|
||||
|
||||
if not bot_username:
|
||||
return JsonResponse({"error": "missing bot.username"}, status=400)
|
||||
|
||||
transcript = None
|
||||
if media and media.get("file_id"):
|
||||
try:
|
||||
blob = _download_telegram_file(bot_username, media["file_id"])
|
||||
transcript = _transcribe_bytes(blob, language=language)
|
||||
except Exception as e:
|
||||
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
|
||||
|
||||
if not transcript:
|
||||
# Fallback to provided text so you can test without a voice note
|
||||
if not text_fallback:
|
||||
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
|
||||
transcript = text_fallback
|
||||
|
||||
return JsonResponse({
|
||||
"schema_version": "proc.v1",
|
||||
"kind": "transcript",
|
||||
"text": transcript,
|
||||
"meta": {
|
||||
"language": language,
|
||||
"has_media": bool(media and media.get("file_id")),
|
||||
}
|
||||
})
|
||||
|
||||
# -----------------------
|
||||
# /api/openai/voice_chat
|
||||
# -----------------------
|
||||
|
||||
@csrf_exempt
|
||||
def voice_chat(request):
|
||||
"""
|
||||
POST /api/openai/voice_chat
|
||||
- Transcribe Telegram voice/audio from req.v1
|
||||
- Forward a modified req.v1 (with input.text = transcript) to LangChain chat
|
||||
- Return the LangChain render.v1 response (pass-through)
|
||||
Transcribe (if voice present) and then chat with the transcript.
|
||||
|
||||
JSON (Telegram):
|
||||
{
|
||||
"schema_version":"req.v1",
|
||||
"bot":{"username":"PepeBasuritaCoinsBot"},
|
||||
"user":{"id":999,"language":"es"},
|
||||
"command":{"name":"__voice__","version":1,"trigger":"text_command"},
|
||||
"input":{"media":{"type":"voice","file_id":"..."}, "text":"(optional prompt)"},
|
||||
"assistant":"Urbanista" # optional
|
||||
}
|
||||
|
||||
Multipart (direct upload + prompt):
|
||||
POST multipart/form-data with:
|
||||
file=<audio>, prompt="...", assistant="Urbanista", language="es"
|
||||
"""
|
||||
if request.method != "POST":
|
||||
return HttpResponseBadRequest("POST only")
|
||||
return HttpResponse(status=405)
|
||||
|
||||
try:
|
||||
data = json.loads(request.body.decode("utf-8") or "{}")
|
||||
except Exception:
|
||||
return HttpResponseBadRequest("invalid json")
|
||||
|
||||
bot_username = ((data.get("bot") or {}).get("username"))
|
||||
media = ((data.get("input") or {}).get("media")) or {}
|
||||
language = ((data.get("user") or {}).get("language")) or "es"
|
||||
text_fallback = ((data.get("input") or {}).get("text")) or ""
|
||||
|
||||
if not bot_username:
|
||||
return JsonResponse({"error": "missing bot.username"}, status=400)
|
||||
|
||||
transcript = None
|
||||
if media and media.get("file_id"):
|
||||
# Multipart mode (file + optional prompt)
|
||||
if request.FILES.get("file"):
|
||||
audio = request.FILES["file"]
|
||||
prompt = request.POST.get("prompt") or ""
|
||||
language = request.POST.get("language") or "es"
|
||||
assistant_name = request.POST.get("assistant") or "Urbanista"
|
||||
try:
|
||||
blob = _download_telegram_file(bot_username, media["file_id"])
|
||||
transcript = _transcribe_bytes(blob, language=language)
|
||||
assistant = OpenAIAssistant(assistant_name)
|
||||
except Exception as e:
|
||||
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
|
||||
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
|
||||
|
||||
if not transcript:
|
||||
if not text_fallback:
|
||||
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
|
||||
transcript = text_fallback
|
||||
try:
|
||||
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
for chunk in audio.chunks():
|
||||
tmp.write(chunk)
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Build a new req.v1 for LangChain: keep everything, set input.text to transcript
|
||||
forward_payload = dict(data)
|
||||
forward_payload.setdefault("input", {})
|
||||
forward_payload["input"] = dict(forward_payload["input"])
|
||||
forward_payload["input"]["text"] = transcript
|
||||
# keep media in case downstream wants it; also annotate
|
||||
forward_payload["input"]["_transcript"] = True
|
||||
transcript = assistant.transcribe_file(tmp_path, language=language)
|
||||
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
|
||||
reply = assistant.chat_completion(user_message)
|
||||
return JsonResponse(_render_text(reply))
|
||||
except Exception as e:
|
||||
logger.exception("openai.voice_chat.upload_error")
|
||||
return _render_error(f"No se pudo procesar el audio: {e}", status=500)
|
||||
|
||||
# JSON mode (Telegram)
|
||||
env = _parse_json(request)
|
||||
assistant = _assistant_from_payload(env)
|
||||
if not assistant:
|
||||
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
|
||||
|
||||
user_lang = ((env.get("user") or {}).get("language")) or "es"
|
||||
media = ((env.get("input") or {}).get("media") or {})
|
||||
file_id = media.get("file_id")
|
||||
prompt = ((env.get("input") or {}).get("text")) or ""
|
||||
|
||||
# If no audio present, just chat using provided text (keeps endpoint usable)
|
||||
if not file_id:
|
||||
if not prompt:
|
||||
return JsonResponse(_render_text("¡Hola! 🌆 ¿Sobre qué te gustaría hablar hoy?"))
|
||||
try:
|
||||
reply = assistant.chat_completion(prompt)
|
||||
return JsonResponse(_render_text(reply))
|
||||
except Exception as e:
|
||||
logger.exception("openai.voice_chat.text_only_error")
|
||||
return _render_error(f"No se pudo generar respuesta: {e}", status=500)
|
||||
|
||||
# With Telegram voice
|
||||
bot_username = ((env.get("bot") or {}).get("username"))
|
||||
token = _get_bot_and_token(bot_username)
|
||||
if not token:
|
||||
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
|
||||
|
||||
try:
|
||||
r = requests.post(
|
||||
LANGCHAIN_CHAT_URL,
|
||||
headers={"Content-Type": "application/json"},
|
||||
data=json.dumps(forward_payload, ensure_ascii=False).encode("utf-8"),
|
||||
timeout=30
|
||||
)
|
||||
transcript = assistant.transcribe_telegram(token, file_id, language=user_lang)
|
||||
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
|
||||
reply = assistant.chat_completion(user_message)
|
||||
return JsonResponse(_render_text(reply))
|
||||
except Exception as e:
|
||||
return JsonResponse({"error": f"forward to langchain failed: {e}"}, status=502)
|
||||
|
||||
# Proxy through the downstream response (expecting render.v1)
|
||||
try:
|
||||
body = r.json()
|
||||
except Exception:
|
||||
body = {"schema_version": "render.v1",
|
||||
"messages": [{"type": "text", "text": r.text[:1000]}]}
|
||||
return JsonResponse(body, status=r.status_code or 200, safe=False)
|
||||
logger.exception("openai.voice_chat.telegram_error")
|
||||
return _render_error(f"No se pudo procesar el audio: {e}", status=500)
|
||||
|
Loading…
x
Reference in New Issue
Block a user