Open AI voice admin
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Ekaropolus 2025-09-17 22:44:12 -06:00
parent 88fe85c802
commit db61f56a24
2 changed files with 256 additions and 159 deletions

View File

@ -1,4 +1,8 @@
# pxy_openai/assistants.py
import logging import logging
import tempfile
import requests # NEW
from typing import Optional # NEW
from .client import OpenAIClient from .client import OpenAIClient
from .models import OpenAIAssistant as OpenAIAssistantModel from .models import OpenAIAssistant as OpenAIAssistantModel
@ -6,30 +10,81 @@ logger = logging.getLogger(__name__)
class OpenAIAssistant: class OpenAIAssistant:
""" """
OpenAI Assistant for handling AI interactions. OpenAI Assistant for handling AI interactions (chat + voice).
""" """
def __init__(self, name): def __init__(self, name):
"""
Initialize the assistant by loading its configuration from the database.
"""
try: try:
self.config = OpenAIAssistantModel.objects.get(name=name) self.config = OpenAIAssistantModel.objects.get(name=name)
self.client = OpenAIClient(self.config.api_key).get_client() self.client = OpenAIClient(self.config.api_key).get_client()
except OpenAIAssistantModel.DoesNotExist: except OpenAIAssistantModel.DoesNotExist:
raise ValueError(f"Assistant '{name}' not found in the database.") raise ValueError(f"Assistant '{name}' not found in the database.")
# ---------- NEW: Whisper helpers ----------
def transcribe_file(self, path: str, language: Optional[str] = "es") -> str:
"""
Transcribe a local audio file using Whisper. Returns plain text.
Supports both new OpenAI SDK (client.audio.transcriptions.create)
and legacy (openai.Audio.transcriptions.create).
"""
try:
# New SDK path
if hasattr(self.client, "audio") and hasattr(self.client.audio, "transcriptions"):
with open(path, "rb") as f:
tx = self.client.audio.transcriptions.create(
model="whisper-1",
file=f,
response_format="text",
language=language or None,
)
return tx.strip() if isinstance(tx, str) else str(tx)
# Legacy SDK fallback
with open(path, "rb") as f:
tx = self.client.Audio.transcriptions.create( # type: ignore[attr-defined]
model="whisper-1",
file=f,
response_format="text",
language=language or None,
)
return tx.strip() if isinstance(tx, str) else str(tx)
except Exception as e:
logger.error(f"Whisper transcription error: {e}")
raise
def transcribe_telegram(self, bot_token: str, file_id: str, language: Optional[str] = "es") -> str:
"""
Download a Telegram voice/audio by file_id and transcribe it.
"""
# 1) getFile
r = requests.get(
f"https://api.telegram.org/bot{bot_token}/getFile",
params={"file_id": file_id},
timeout=10,
)
r.raise_for_status()
file_path = r.json()["result"]["file_path"]
# 2) download actual bytes
url = f"https://api.telegram.org/file/bot{bot_token}/{file_path}"
with tempfile.NamedTemporaryFile(delete=False, suffix="." + file_path.split(".")[-1]) as tmp:
resp = requests.get(url, timeout=30)
resp.raise_for_status()
tmp.write(resp.content)
local_path = tmp.name
# 3) transcribe
return self.transcribe_file(local_path, language=language)
# ---------- existing chat/agents methods ----------
def chat_completion(self, user_message): def chat_completion(self, user_message):
"""
Call OpenAI's chat completion API.
"""
try: try:
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model="gpt-4o-mini", model="gpt-4o-mini",
messages=[ messages=[
{ {"role": "system", "content": self.config.description},
"role": "system",
"content": self.config.description, # Use description as the system prompt
},
{"role": "user", "content": user_message}, {"role": "user", "content": user_message},
], ],
) )
@ -39,23 +94,15 @@ class OpenAIAssistant:
return f"Error in chat completion: {e}" return f"Error in chat completion: {e}"
def agent_workflow(self, user_message): def agent_workflow(self, user_message):
"""
Call OpenAI's advanced agent workflow API.
"""
try: try:
if not self.config.assistant_id: if not self.config.assistant_id:
raise ValueError(f"Assistant '{self.config.name}' does not have an associated assistant ID.") raise ValueError(f"Assistant '{self.config.name}' does not have an associated assistant ID.")
assistant = self.client.beta.assistants.retrieve(self.config.assistant_id) assistant = self.client.beta.assistants.retrieve(self.config.assistant_id)
thread = self.client.beta.threads.create() thread = self.client.beta.threads.create()
# Create a message in the thread
self.client.beta.threads.messages.create(thread_id=thread.id, role="user", content=user_message) self.client.beta.threads.messages.create(thread_id=thread.id, role="user", content=user_message)
# Run the assistant workflow
run = self.client.beta.threads.runs.create(thread_id=thread.id, assistant_id=assistant.id) run = self.client.beta.threads.runs.create(thread_id=thread.id, assistant_id=assistant.id)
# Poll for the result
while run.status in ["queued", "in_progress"]: while run.status in ["queued", "in_progress"]:
run = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id) run = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
if run.status == "completed": if run.status == "completed":
@ -68,9 +115,6 @@ class OpenAIAssistant:
return f"Error in agent workflow: {e}" return f"Error in agent workflow: {e}"
def handle_message(self, user_message): def handle_message(self, user_message):
"""
Automatically select the correct method based on assistant type.
"""
if self.config.is_special_assistant(): if self.config.is_special_assistant():
return self.agent_workflow(user_message) return self.agent_workflow(user_message)
return self.chat_completion(user_message) return self.chat_completion(user_message)

View File

@ -1,176 +1,229 @@
# pxy_openai/views.py # pxy_openai/views.py
import os
import io
import json import json
import logging
import tempfile import tempfile
import requests from typing import Any, Dict, Optional
import openai
from django.http import JsonResponse, HttpResponseBadRequest from django.http import JsonResponse, HttpResponse
from django.views.decorators.csrf import csrf_exempt from django.views.decorators.csrf import csrf_exempt
from django.apps import apps
from django.utils.text import slugify
from pxy_bots.models import TelegramBot # to fetch the bot token from DB from .assistants import OpenAIAssistant
# Configure OpenAI logger = logging.getLogger(__name__)
openai.api_key = os.getenv("OPENAI_API_KEY")
# Where to forward the transcript for chat
LANGCHAIN_CHAT_URL = os.getenv(
"LANGCHAIN_CHAT_URL",
"http://app.polisplexity.tech:8010/api/langchain/chat"
)
def _download_telegram_file(bot_username: str, file_id: str) -> bytes: # -----------------------
# Helpers
# -----------------------
def _parse_json(request) -> Dict[str, Any]:
try:
if request.content_type and "application/json" in request.content_type:
return json.loads(request.body.decode("utf-8") or "{}")
# allow x-www-form-urlencoded fallback
if request.POST and request.POST.get("payload"):
return json.loads(request.POST["payload"])
except Exception as e:
logger.warning("openai.api.bad_json: %s", e)
return {}
def _render_text(text: str) -> Dict[str, Any]:
return {
"schema_version": "render.v1",
"messages": [{"type": "text", "text": str(text)}],
}
def _render_error(text: str, status: int = 400) -> JsonResponse:
# Return as render.v1 for bot consumption (keeps UX consistent)
return JsonResponse(_render_text(f"⚠️ {text}"), status=status)
def _get_bot_and_token(bot_username: Optional[str]) -> Optional[str]:
""" """
Resolve a Telegram file_id to bytes using the bot's token. Find Telegram bot token by either TelegramBot.name or .username matching the provided bot_username.
Raises ValueError on any failure. Returns token or None.
""" """
bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first() if not bot_username:
if not bot: return None
raise ValueError(f"bot '{bot_username}' not found or inactive") TelegramBot = apps.get_model("pxy_bots", "TelegramBot")
bot = (TelegramBot.objects.filter(name=bot_username, is_active=True).first() or
TelegramBot.objects.filter(username=bot_username, is_active=True).first())
return bot.token if bot else None
tg_api = f"https://api.telegram.org/bot{bot.token}" def _assistant_from_payload(env: Dict[str, Any]) -> Optional[OpenAIAssistant]:
r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10)
if r.status_code != 200 or not r.json().get("ok"):
raise ValueError("telegram getFile failed")
file_path = r.json()["result"]["file_path"]
file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}"
dl = requests.get(file_url, timeout=20)
if dl.status_code != 200:
raise ValueError("telegram file download failed")
return dl.content
def _transcribe_bytes(raw: bytes, language: str = "es") -> str:
""" """
Transcribe OGG/Opus (or other) audio bytes with OpenAI. Build OpenAIAssistant from a provided name in payload, otherwise try a sensible default.
Returns plain text. You can pass "assistant": "Urbanista" in the root payload.
""" """
# Write to a temp file so OpenAI client can stream it name = (env.get("assistant") or "Urbanista")
with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp: try:
tmp.write(raw) return OpenAIAssistant(name=name)
tmp.flush() except Exception as e:
with open(tmp.name, "rb") as fh: logger.error("openai.assistant.init_failed name=%s err=%s", name, e)
# "gpt-4o-transcribe" or "whisper-1" depending on your account return None
result = openai.audio.transcriptions.create(
model="gpt-4o-transcribe",
file=fh,
response_format="text",
language=language or "es"
)
return (result.strip() if isinstance(result, str) else str(result)).strip()
# -----------------------
# /api/openai/transcribe
# -----------------------
@csrf_exempt @csrf_exempt
def transcribe(request): def transcribe(request):
""" """
POST /api/openai/transcribe Transcribe audio to text.
Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe.
If not, we fall back to input.text (handy for quick tests). JSON mode (Telegram voice):
Returns: {
{"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}} "schema_version":"req.v1",
"bot":{"username":"PepeBasuritaCoinsBot"},
"user":{"language":"es"},
"input":{"media":{"type":"voice","file_id":"..."}}
"assistant":"Urbanista" # optional
}
Multipart mode (direct upload):
POST multipart/form-data with file=<audio file>, and optional fields:
assistant=Urbanista
language=es
""" """
if request.method != "POST": if request.method != "POST":
return HttpResponseBadRequest("POST only") return HttpResponse(status=405)
# Multipart direct upload?
if request.FILES.get("file"):
audio = request.FILES["file"]
language = request.POST.get("language") or "es"
assistant_name = request.POST.get("assistant") or "Urbanista"
try: try:
data = json.loads(request.body.decode("utf-8") or "{}") assistant = OpenAIAssistant(assistant_name)
except Exception:
return HttpResponseBadRequest("invalid json")
bot_username = ((data.get("bot") or {}).get("username"))
media = ((data.get("input") or {}).get("media")) or {}
language = ((data.get("user") or {}).get("language")) or "es"
text_fallback = ((data.get("input") or {}).get("text")) or ""
if not bot_username:
return JsonResponse({"error": "missing bot.username"}, status=400)
transcript = None
if media and media.get("file_id"):
try:
blob = _download_telegram_file(bot_username, media["file_id"])
transcript = _transcribe_bytes(blob, language=language)
except Exception as e: except Exception as e:
return JsonResponse({"error": f"transcription failed: {e}"}, status=502) return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
if not transcript: # Save to temp file and run whisper
# Fallback to provided text so you can test without a voice note try:
if not text_fallback: suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400) with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
transcript = text_fallback for chunk in audio.chunks():
tmp.write(chunk)
tmp_path = tmp.name
text = assistant.transcribe_file(tmp_path, language=language)
return JsonResponse(_render_text(text))
except Exception as e:
logger.exception("openai.transcribe.upload_error")
return _render_error(f"No se pudo transcribir el audio subido: {e}", status=500)
return JsonResponse({ # JSON mode
"schema_version": "proc.v1", env = _parse_json(request)
"kind": "transcript", assistant = _assistant_from_payload(env)
"text": transcript, if not assistant:
"meta": { return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
"language": language,
"has_media": bool(media and media.get("file_id")),
}
})
user_lang = ((env.get("user") or {}).get("language")) or "es"
media = ((env.get("input") or {}).get("media") or {})
file_id = media.get("file_id")
bot_username = ((env.get("bot") or {}).get("username"))
if not file_id:
return _render_error("No encontré audio en la petición (falta media.file_id o file).", status=400)
token = _get_bot_and_token(bot_username)
if not token:
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
try:
text = assistant.transcribe_telegram(token, file_id, language=user_lang)
return JsonResponse(_render_text(text))
except Exception as e:
logger.exception("openai.transcribe.telegram_error")
return _render_error(f"No se pudo transcribir el audio: {e}", status=500)
# -----------------------
# /api/openai/voice_chat
# -----------------------
@csrf_exempt @csrf_exempt
def voice_chat(request): def voice_chat(request):
""" """
POST /api/openai/voice_chat Transcribe (if voice present) and then chat with the transcript.
- Transcribe Telegram voice/audio from req.v1
- Forward a modified req.v1 (with input.text = transcript) to LangChain chat JSON (Telegram):
- Return the LangChain render.v1 response (pass-through) {
"schema_version":"req.v1",
"bot":{"username":"PepeBasuritaCoinsBot"},
"user":{"id":999,"language":"es"},
"command":{"name":"__voice__","version":1,"trigger":"text_command"},
"input":{"media":{"type":"voice","file_id":"..."}, "text":"(optional prompt)"},
"assistant":"Urbanista" # optional
}
Multipart (direct upload + prompt):
POST multipart/form-data with:
file=<audio>, prompt="...", assistant="Urbanista", language="es"
""" """
if request.method != "POST": if request.method != "POST":
return HttpResponseBadRequest("POST only") return HttpResponse(status=405)
# Multipart mode (file + optional prompt)
if request.FILES.get("file"):
audio = request.FILES["file"]
prompt = request.POST.get("prompt") or ""
language = request.POST.get("language") or "es"
assistant_name = request.POST.get("assistant") or "Urbanista"
try: try:
data = json.loads(request.body.decode("utf-8") or "{}") assistant = OpenAIAssistant(assistant_name)
except Exception:
return HttpResponseBadRequest("invalid json")
bot_username = ((data.get("bot") or {}).get("username"))
media = ((data.get("input") or {}).get("media")) or {}
language = ((data.get("user") or {}).get("language")) or "es"
text_fallback = ((data.get("input") or {}).get("text")) or ""
if not bot_username:
return JsonResponse({"error": "missing bot.username"}, status=400)
transcript = None
if media and media.get("file_id"):
try:
blob = _download_telegram_file(bot_username, media["file_id"])
transcript = _transcribe_bytes(blob, language=language)
except Exception as e: except Exception as e:
return JsonResponse({"error": f"transcription failed: {e}"}, status=502) return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
if not transcript:
if not text_fallback:
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
transcript = text_fallback
# Build a new req.v1 for LangChain: keep everything, set input.text to transcript
forward_payload = dict(data)
forward_payload.setdefault("input", {})
forward_payload["input"] = dict(forward_payload["input"])
forward_payload["input"]["text"] = transcript
# keep media in case downstream wants it; also annotate
forward_payload["input"]["_transcript"] = True
try: try:
r = requests.post( suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
LANGCHAIN_CHAT_URL, with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
headers={"Content-Type": "application/json"}, for chunk in audio.chunks():
data=json.dumps(forward_payload, ensure_ascii=False).encode("utf-8"), tmp.write(chunk)
timeout=30 tmp_path = tmp.name
)
transcript = assistant.transcribe_file(tmp_path, language=language)
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
reply = assistant.chat_completion(user_message)
return JsonResponse(_render_text(reply))
except Exception as e: except Exception as e:
return JsonResponse({"error": f"forward to langchain failed: {e}"}, status=502) logger.exception("openai.voice_chat.upload_error")
return _render_error(f"No se pudo procesar el audio: {e}", status=500)
# Proxy through the downstream response (expecting render.v1) # JSON mode (Telegram)
env = _parse_json(request)
assistant = _assistant_from_payload(env)
if not assistant:
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
user_lang = ((env.get("user") or {}).get("language")) or "es"
media = ((env.get("input") or {}).get("media") or {})
file_id = media.get("file_id")
prompt = ((env.get("input") or {}).get("text")) or ""
# If no audio present, just chat using provided text (keeps endpoint usable)
if not file_id:
if not prompt:
return JsonResponse(_render_text("¡Hola! 🌆 ¿Sobre qué te gustaría hablar hoy?"))
try: try:
body = r.json() reply = assistant.chat_completion(prompt)
except Exception: return JsonResponse(_render_text(reply))
body = {"schema_version": "render.v1", except Exception as e:
"messages": [{"type": "text", "text": r.text[:1000]}]} logger.exception("openai.voice_chat.text_only_error")
return JsonResponse(body, status=r.status_code or 200, safe=False) return _render_error(f"No se pudo generar respuesta: {e}", status=500)
# With Telegram voice
bot_username = ((env.get("bot") or {}).get("username"))
token = _get_bot_and_token(bot_username)
if not token:
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
try:
transcript = assistant.transcribe_telegram(token, file_id, language=user_lang)
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
reply = assistant.chat_completion(user_message)
return JsonResponse(_render_text(reply))
except Exception as e:
logger.exception("openai.voice_chat.telegram_error")
return _render_error(f"No se pudo procesar el audio: {e}", status=500)