Open AI voice admin
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Ekaropolus 2025-09-17 22:44:12 -06:00
parent 88fe85c802
commit db61f56a24
2 changed files with 256 additions and 159 deletions

View File

@ -1,4 +1,8 @@
# pxy_openai/assistants.py
import logging
import tempfile
import requests # NEW
from typing import Optional # NEW
from .client import OpenAIClient
from .models import OpenAIAssistant as OpenAIAssistantModel
@ -6,30 +10,81 @@ logger = logging.getLogger(__name__)
class OpenAIAssistant:
"""
OpenAI Assistant for handling AI interactions.
OpenAI Assistant for handling AI interactions (chat + voice).
"""
def __init__(self, name):
"""
Initialize the assistant by loading its configuration from the database.
"""
try:
self.config = OpenAIAssistantModel.objects.get(name=name)
self.client = OpenAIClient(self.config.api_key).get_client()
except OpenAIAssistantModel.DoesNotExist:
raise ValueError(f"Assistant '{name}' not found in the database.")
# ---------- NEW: Whisper helpers ----------
def transcribe_file(self, path: str, language: Optional[str] = "es") -> str:
"""
Transcribe a local audio file using Whisper. Returns plain text.
Supports both new OpenAI SDK (client.audio.transcriptions.create)
and legacy (openai.Audio.transcriptions.create).
"""
try:
# New SDK path
if hasattr(self.client, "audio") and hasattr(self.client.audio, "transcriptions"):
with open(path, "rb") as f:
tx = self.client.audio.transcriptions.create(
model="whisper-1",
file=f,
response_format="text",
language=language or None,
)
return tx.strip() if isinstance(tx, str) else str(tx)
# Legacy SDK fallback
with open(path, "rb") as f:
tx = self.client.Audio.transcriptions.create( # type: ignore[attr-defined]
model="whisper-1",
file=f,
response_format="text",
language=language or None,
)
return tx.strip() if isinstance(tx, str) else str(tx)
except Exception as e:
logger.error(f"Whisper transcription error: {e}")
raise
def transcribe_telegram(self, bot_token: str, file_id: str, language: Optional[str] = "es") -> str:
"""
Download a Telegram voice/audio by file_id and transcribe it.
"""
# 1) getFile
r = requests.get(
f"https://api.telegram.org/bot{bot_token}/getFile",
params={"file_id": file_id},
timeout=10,
)
r.raise_for_status()
file_path = r.json()["result"]["file_path"]
# 2) download actual bytes
url = f"https://api.telegram.org/file/bot{bot_token}/{file_path}"
with tempfile.NamedTemporaryFile(delete=False, suffix="." + file_path.split(".")[-1]) as tmp:
resp = requests.get(url, timeout=30)
resp.raise_for_status()
tmp.write(resp.content)
local_path = tmp.name
# 3) transcribe
return self.transcribe_file(local_path, language=language)
# ---------- existing chat/agents methods ----------
def chat_completion(self, user_message):
"""
Call OpenAI's chat completion API.
"""
try:
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": self.config.description, # Use description as the system prompt
},
{"role": "system", "content": self.config.description},
{"role": "user", "content": user_message},
],
)
@ -39,23 +94,15 @@ class OpenAIAssistant:
return f"Error in chat completion: {e}"
def agent_workflow(self, user_message):
"""
Call OpenAI's advanced agent workflow API.
"""
try:
if not self.config.assistant_id:
raise ValueError(f"Assistant '{self.config.name}' does not have an associated assistant ID.")
assistant = self.client.beta.assistants.retrieve(self.config.assistant_id)
thread = self.client.beta.threads.create()
# Create a message in the thread
self.client.beta.threads.messages.create(thread_id=thread.id, role="user", content=user_message)
# Run the assistant workflow
run = self.client.beta.threads.runs.create(thread_id=thread.id, assistant_id=assistant.id)
# Poll for the result
while run.status in ["queued", "in_progress"]:
run = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
if run.status == "completed":
@ -68,9 +115,6 @@ class OpenAIAssistant:
return f"Error in agent workflow: {e}"
def handle_message(self, user_message):
"""
Automatically select the correct method based on assistant type.
"""
if self.config.is_special_assistant():
return self.agent_workflow(user_message)
return self.chat_completion(user_message)

View File

@ -1,176 +1,229 @@
# pxy_openai/views.py
import os
import io
import json
import logging
import tempfile
import requests
import openai
from typing import Any, Dict, Optional
from django.http import JsonResponse, HttpResponseBadRequest
from django.http import JsonResponse, HttpResponse
from django.views.decorators.csrf import csrf_exempt
from django.apps import apps
from django.utils.text import slugify
from pxy_bots.models import TelegramBot # to fetch the bot token from DB
from .assistants import OpenAIAssistant
# Configure OpenAI
openai.api_key = os.getenv("OPENAI_API_KEY")
logger = logging.getLogger(__name__)
# Where to forward the transcript for chat
LANGCHAIN_CHAT_URL = os.getenv(
"LANGCHAIN_CHAT_URL",
"http://app.polisplexity.tech:8010/api/langchain/chat"
)
def _download_telegram_file(bot_username: str, file_id: str) -> bytes:
# -----------------------
# Helpers
# -----------------------
def _parse_json(request) -> Dict[str, Any]:
try:
if request.content_type and "application/json" in request.content_type:
return json.loads(request.body.decode("utf-8") or "{}")
# allow x-www-form-urlencoded fallback
if request.POST and request.POST.get("payload"):
return json.loads(request.POST["payload"])
except Exception as e:
logger.warning("openai.api.bad_json: %s", e)
return {}
def _render_text(text: str) -> Dict[str, Any]:
return {
"schema_version": "render.v1",
"messages": [{"type": "text", "text": str(text)}],
}
def _render_error(text: str, status: int = 400) -> JsonResponse:
# Return as render.v1 for bot consumption (keeps UX consistent)
return JsonResponse(_render_text(f"⚠️ {text}"), status=status)
def _get_bot_and_token(bot_username: Optional[str]) -> Optional[str]:
"""
Resolve a Telegram file_id to bytes using the bot's token.
Raises ValueError on any failure.
Find Telegram bot token by either TelegramBot.name or .username matching the provided bot_username.
Returns token or None.
"""
bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first()
if not bot:
raise ValueError(f"bot '{bot_username}' not found or inactive")
if not bot_username:
return None
TelegramBot = apps.get_model("pxy_bots", "TelegramBot")
bot = (TelegramBot.objects.filter(name=bot_username, is_active=True).first() or
TelegramBot.objects.filter(username=bot_username, is_active=True).first())
return bot.token if bot else None
tg_api = f"https://api.telegram.org/bot{bot.token}"
r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10)
if r.status_code != 200 or not r.json().get("ok"):
raise ValueError("telegram getFile failed")
file_path = r.json()["result"]["file_path"]
file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}"
dl = requests.get(file_url, timeout=20)
if dl.status_code != 200:
raise ValueError("telegram file download failed")
return dl.content
def _transcribe_bytes(raw: bytes, language: str = "es") -> str:
def _assistant_from_payload(env: Dict[str, Any]) -> Optional[OpenAIAssistant]:
"""
Transcribe OGG/Opus (or other) audio bytes with OpenAI.
Returns plain text.
Build OpenAIAssistant from a provided name in payload, otherwise try a sensible default.
You can pass "assistant": "Urbanista" in the root payload.
"""
# Write to a temp file so OpenAI client can stream it
with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp:
tmp.write(raw)
tmp.flush()
with open(tmp.name, "rb") as fh:
# "gpt-4o-transcribe" or "whisper-1" depending on your account
result = openai.audio.transcriptions.create(
model="gpt-4o-transcribe",
file=fh,
response_format="text",
language=language or "es"
)
return (result.strip() if isinstance(result, str) else str(result)).strip()
name = (env.get("assistant") or "Urbanista")
try:
return OpenAIAssistant(name=name)
except Exception as e:
logger.error("openai.assistant.init_failed name=%s err=%s", name, e)
return None
# -----------------------
# /api/openai/transcribe
# -----------------------
@csrf_exempt
def transcribe(request):
"""
POST /api/openai/transcribe
Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe.
If not, we fall back to input.text (handy for quick tests).
Returns:
{"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}}
Transcribe audio to text.
JSON mode (Telegram voice):
{
"schema_version":"req.v1",
"bot":{"username":"PepeBasuritaCoinsBot"},
"user":{"language":"es"},
"input":{"media":{"type":"voice","file_id":"..."}}
"assistant":"Urbanista" # optional
}
Multipart mode (direct upload):
POST multipart/form-data with file=<audio file>, and optional fields:
assistant=Urbanista
language=es
"""
if request.method != "POST":
return HttpResponseBadRequest("POST only")
return HttpResponse(status=405)
# Multipart direct upload?
if request.FILES.get("file"):
audio = request.FILES["file"]
language = request.POST.get("language") or "es"
assistant_name = request.POST.get("assistant") or "Urbanista"
try:
assistant = OpenAIAssistant(assistant_name)
except Exception as e:
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
# Save to temp file and run whisper
try:
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
for chunk in audio.chunks():
tmp.write(chunk)
tmp_path = tmp.name
text = assistant.transcribe_file(tmp_path, language=language)
return JsonResponse(_render_text(text))
except Exception as e:
logger.exception("openai.transcribe.upload_error")
return _render_error(f"No se pudo transcribir el audio subido: {e}", status=500)
# JSON mode
env = _parse_json(request)
assistant = _assistant_from_payload(env)
if not assistant:
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
user_lang = ((env.get("user") or {}).get("language")) or "es"
media = ((env.get("input") or {}).get("media") or {})
file_id = media.get("file_id")
bot_username = ((env.get("bot") or {}).get("username"))
if not file_id:
return _render_error("No encontré audio en la petición (falta media.file_id o file).", status=400)
token = _get_bot_and_token(bot_username)
if not token:
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
try:
data = json.loads(request.body.decode("utf-8") or "{}")
except Exception:
return HttpResponseBadRequest("invalid json")
text = assistant.transcribe_telegram(token, file_id, language=user_lang)
return JsonResponse(_render_text(text))
except Exception as e:
logger.exception("openai.transcribe.telegram_error")
return _render_error(f"No se pudo transcribir el audio: {e}", status=500)
bot_username = ((data.get("bot") or {}).get("username"))
media = ((data.get("input") or {}).get("media")) or {}
language = ((data.get("user") or {}).get("language")) or "es"
text_fallback = ((data.get("input") or {}).get("text")) or ""
if not bot_username:
return JsonResponse({"error": "missing bot.username"}, status=400)
transcript = None
if media and media.get("file_id"):
try:
blob = _download_telegram_file(bot_username, media["file_id"])
transcript = _transcribe_bytes(blob, language=language)
except Exception as e:
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
if not transcript:
# Fallback to provided text so you can test without a voice note
if not text_fallback:
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
transcript = text_fallback
return JsonResponse({
"schema_version": "proc.v1",
"kind": "transcript",
"text": transcript,
"meta": {
"language": language,
"has_media": bool(media and media.get("file_id")),
}
})
# -----------------------
# /api/openai/voice_chat
# -----------------------
@csrf_exempt
def voice_chat(request):
"""
POST /api/openai/voice_chat
- Transcribe Telegram voice/audio from req.v1
- Forward a modified req.v1 (with input.text = transcript) to LangChain chat
- Return the LangChain render.v1 response (pass-through)
Transcribe (if voice present) and then chat with the transcript.
JSON (Telegram):
{
"schema_version":"req.v1",
"bot":{"username":"PepeBasuritaCoinsBot"},
"user":{"id":999,"language":"es"},
"command":{"name":"__voice__","version":1,"trigger":"text_command"},
"input":{"media":{"type":"voice","file_id":"..."}, "text":"(optional prompt)"},
"assistant":"Urbanista" # optional
}
Multipart (direct upload + prompt):
POST multipart/form-data with:
file=<audio>, prompt="...", assistant="Urbanista", language="es"
"""
if request.method != "POST":
return HttpResponseBadRequest("POST only")
return HttpResponse(status=405)
try:
data = json.loads(request.body.decode("utf-8") or "{}")
except Exception:
return HttpResponseBadRequest("invalid json")
bot_username = ((data.get("bot") or {}).get("username"))
media = ((data.get("input") or {}).get("media")) or {}
language = ((data.get("user") or {}).get("language")) or "es"
text_fallback = ((data.get("input") or {}).get("text")) or ""
if not bot_username:
return JsonResponse({"error": "missing bot.username"}, status=400)
transcript = None
if media and media.get("file_id"):
# Multipart mode (file + optional prompt)
if request.FILES.get("file"):
audio = request.FILES["file"]
prompt = request.POST.get("prompt") or ""
language = request.POST.get("language") or "es"
assistant_name = request.POST.get("assistant") or "Urbanista"
try:
blob = _download_telegram_file(bot_username, media["file_id"])
transcript = _transcribe_bytes(blob, language=language)
assistant = OpenAIAssistant(assistant_name)
except Exception as e:
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
return _render_error(f"No pude iniciar el asistente '{assistant_name}': {e}", status=500)
if not transcript:
if not text_fallback:
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
transcript = text_fallback
try:
suffix = "." + (audio.name.split(".")[-1] if "." in audio.name else "ogg")
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
for chunk in audio.chunks():
tmp.write(chunk)
tmp_path = tmp.name
# Build a new req.v1 for LangChain: keep everything, set input.text to transcript
forward_payload = dict(data)
forward_payload.setdefault("input", {})
forward_payload["input"] = dict(forward_payload["input"])
forward_payload["input"]["text"] = transcript
# keep media in case downstream wants it; also annotate
forward_payload["input"]["_transcript"] = True
transcript = assistant.transcribe_file(tmp_path, language=language)
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
reply = assistant.chat_completion(user_message)
return JsonResponse(_render_text(reply))
except Exception as e:
logger.exception("openai.voice_chat.upload_error")
return _render_error(f"No se pudo procesar el audio: {e}", status=500)
# JSON mode (Telegram)
env = _parse_json(request)
assistant = _assistant_from_payload(env)
if not assistant:
return _render_error("No pude cargar el asistente de OpenAI (revisa el nombre o la configuración).", status=500)
user_lang = ((env.get("user") or {}).get("language")) or "es"
media = ((env.get("input") or {}).get("media") or {})
file_id = media.get("file_id")
prompt = ((env.get("input") or {}).get("text")) or ""
# If no audio present, just chat using provided text (keeps endpoint usable)
if not file_id:
if not prompt:
return JsonResponse(_render_text("¡Hola! 🌆 ¿Sobre qué te gustaría hablar hoy?"))
try:
reply = assistant.chat_completion(prompt)
return JsonResponse(_render_text(reply))
except Exception as e:
logger.exception("openai.voice_chat.text_only_error")
return _render_error(f"No se pudo generar respuesta: {e}", status=500)
# With Telegram voice
bot_username = ((env.get("bot") or {}).get("username"))
token = _get_bot_and_token(bot_username)
if not token:
return _render_error("No encontré el bot o su token para descargar el audio (bot.username).", status=400)
try:
r = requests.post(
LANGCHAIN_CHAT_URL,
headers={"Content-Type": "application/json"},
data=json.dumps(forward_payload, ensure_ascii=False).encode("utf-8"),
timeout=30
)
transcript = assistant.transcribe_telegram(token, file_id, language=user_lang)
user_message = transcript if not prompt else f"{prompt}\n\n[Transcripción]\n{transcript}"
reply = assistant.chat_completion(user_message)
return JsonResponse(_render_text(reply))
except Exception as e:
return JsonResponse({"error": f"forward to langchain failed: {e}"}, status=502)
# Proxy through the downstream response (expecting render.v1)
try:
body = r.json()
except Exception:
body = {"schema_version": "render.v1",
"messages": [{"type": "text", "text": r.text[:1000]}]}
return JsonResponse(body, status=r.status_code or 200, safe=False)
logger.exception("openai.voice_chat.telegram_error")
return _render_error(f"No se pudo procesar el audio: {e}", status=500)