From db61f56a24b127df6ac1e6106a36e7ac69c66c18 Mon Sep 17 00:00:00 2001 From: Ekaropolus Date: Wed, 17 Sep 2025 22:44:12 -0600 Subject: [PATCH] Open AI voice admin --- pxy_openai/assistants.py | 88 ++++++++--- pxy_openai/views.py | 327 +++++++++++++++++++++++---------------- 2 files changed, 256 insertions(+), 159 deletions(-) diff --git a/pxy_openai/assistants.py b/pxy_openai/assistants.py index 5c2adc4..0c5892c 100644 --- a/pxy_openai/assistants.py +++ b/pxy_openai/assistants.py @@ -1,4 +1,8 @@ +# pxy_openai/assistants.py import logging +import tempfile +import requests # NEW +from typing import Optional # NEW from .client import OpenAIClient from .models import OpenAIAssistant as OpenAIAssistantModel @@ -6,30 +10,81 @@ logger = logging.getLogger(__name__) class OpenAIAssistant: """ - OpenAI Assistant for handling AI interactions. + OpenAI Assistant for handling AI interactions (chat + voice). """ def __init__(self, name): - """ - Initialize the assistant by loading its configuration from the database. - """ try: self.config = OpenAIAssistantModel.objects.get(name=name) self.client = OpenAIClient(self.config.api_key).get_client() except OpenAIAssistantModel.DoesNotExist: raise ValueError(f"Assistant '{name}' not found in the database.") + # ---------- NEW: Whisper helpers ---------- + + def transcribe_file(self, path: str, language: Optional[str] = "es") -> str: + """ + Transcribe a local audio file using Whisper. Returns plain text. + Supports both new OpenAI SDK (client.audio.transcriptions.create) + and legacy (openai.Audio.transcriptions.create). + """ + try: + # New SDK path + if hasattr(self.client, "audio") and hasattr(self.client.audio, "transcriptions"): + with open(path, "rb") as f: + tx = self.client.audio.transcriptions.create( + model="whisper-1", + file=f, + response_format="text", + language=language or None, + ) + return tx.strip() if isinstance(tx, str) else str(tx) + + # Legacy SDK fallback + with open(path, "rb") as f: + tx = self.client.Audio.transcriptions.create( # type: ignore[attr-defined] + model="whisper-1", + file=f, + response_format="text", + language=language or None, + ) + return tx.strip() if isinstance(tx, str) else str(tx) + + except Exception as e: + logger.error(f"Whisper transcription error: {e}") + raise + + def transcribe_telegram(self, bot_token: str, file_id: str, language: Optional[str] = "es") -> str: + """ + Download a Telegram voice/audio by file_id and transcribe it. + """ + # 1) getFile + r = requests.get( + f"https://api.telegram.org/bot{bot_token}/getFile", + params={"file_id": file_id}, + timeout=10, + ) + r.raise_for_status() + file_path = r.json()["result"]["file_path"] + + # 2) download actual bytes + url = f"https://api.telegram.org/file/bot{bot_token}/{file_path}" + with tempfile.NamedTemporaryFile(delete=False, suffix="." + file_path.split(".")[-1]) as tmp: + resp = requests.get(url, timeout=30) + resp.raise_for_status() + tmp.write(resp.content) + local_path = tmp.name + + # 3) transcribe + return self.transcribe_file(local_path, language=language) + + # ---------- existing chat/agents methods ---------- + def chat_completion(self, user_message): - """ - Call OpenAI's chat completion API. - """ try: response = self.client.chat.completions.create( model="gpt-4o-mini", messages=[ - { - "role": "system", - "content": self.config.description, # Use description as the system prompt - }, + {"role": "system", "content": self.config.description}, {"role": "user", "content": user_message}, ], ) @@ -39,23 +94,15 @@ class OpenAIAssistant: return f"Error in chat completion: {e}" def agent_workflow(self, user_message): - """ - Call OpenAI's advanced agent workflow API. - """ try: if not self.config.assistant_id: raise ValueError(f"Assistant '{self.config.name}' does not have an associated assistant ID.") assistant = self.client.beta.assistants.retrieve(self.config.assistant_id) thread = self.client.beta.threads.create() - - # Create a message in the thread self.client.beta.threads.messages.create(thread_id=thread.id, role="user", content=user_message) - - # Run the assistant workflow run = self.client.beta.threads.runs.create(thread_id=thread.id, assistant_id=assistant.id) - # Poll for the result while run.status in ["queued", "in_progress"]: run = self.client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id) if run.status == "completed": @@ -68,9 +115,6 @@ class OpenAIAssistant: return f"Error in agent workflow: {e}" def handle_message(self, user_message): - """ - Automatically select the correct method based on assistant type. - """ if self.config.is_special_assistant(): return self.agent_workflow(user_message) return self.chat_completion(user_message) diff --git a/pxy_openai/views.py b/pxy_openai/views.py index c5df8a6..6262773 100644 --- a/pxy_openai/views.py +++ b/pxy_openai/views.py @@ -1,176 +1,229 @@ # pxy_openai/views.py -import os -import io import json +import logging import tempfile -import requests -import openai +from typing import Any, Dict, Optional -from django.http import JsonResponse, HttpResponseBadRequest +from django.http import JsonResponse, HttpResponse from django.views.decorators.csrf import csrf_exempt +from django.apps import apps +from django.utils.text import slugify -from pxy_bots.models import TelegramBot # to fetch the bot token from DB +from .assistants import OpenAIAssistant -# Configure OpenAI -openai.api_key = os.getenv("OPENAI_API_KEY") +logger = logging.getLogger(__name__) -# Where to forward the transcript for chat -LANGCHAIN_CHAT_URL = os.getenv( - "LANGCHAIN_CHAT_URL", - "http://app.polisplexity.tech:8010/api/langchain/chat" -) -def _download_telegram_file(bot_username: str, file_id: str) -> bytes: +# ----------------------- +# Helpers +# ----------------------- + +def _parse_json(request) -> Dict[str, Any]: + try: + if request.content_type and "application/json" in request.content_type: + return json.loads(request.body.decode("utf-8") or "{}") + # allow x-www-form-urlencoded fallback + if request.POST and request.POST.get("payload"): + return json.loads(request.POST["payload"]) + except Exception as e: + logger.warning("openai.api.bad_json: %s", e) + return {} + +def _render_text(text: str) -> Dict[str, Any]: + return { + "schema_version": "render.v1", + "messages": [{"type": "text", "text": str(text)}], + } + +def _render_error(text: str, status: int = 400) -> JsonResponse: + # Return as render.v1 for bot consumption (keeps UX consistent) + return JsonResponse(_render_text(f"⚠️ {text}"), status=status) + +def _get_bot_and_token(bot_username: Optional[str]) -> Optional[str]: """ - Resolve a Telegram file_id to bytes using the bot's token. - Raises ValueError on any failure. + Find Telegram bot token by either TelegramBot.name or .username matching the provided bot_username. + Returns token or None. """ - bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first() - if not bot: - raise ValueError(f"bot '{bot_username}' not found or inactive") + if not bot_username: + return None + TelegramBot = apps.get_model("pxy_bots", "TelegramBot") + bot = (TelegramBot.objects.filter(name=bot_username, is_active=True).first() or + TelegramBot.objects.filter(username=bot_username, is_active=True).first()) + return bot.token if bot else None - tg_api = f"https://api.telegram.org/bot{bot.token}" - r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10) - if r.status_code != 200 or not r.json().get("ok"): - raise ValueError("telegram getFile failed") - - file_path = r.json()["result"]["file_path"] - file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}" - dl = requests.get(file_url, timeout=20) - if dl.status_code != 200: - raise ValueError("telegram file download failed") - return dl.content - - -def _transcribe_bytes(raw: bytes, language: str = "es") -> str: +def _assistant_from_payload(env: Dict[str, Any]) -> Optional[OpenAIAssistant]: """ - Transcribe OGG/Opus (or other) audio bytes with OpenAI. - Returns plain text. + Build OpenAIAssistant from a provided name in payload, otherwise try a sensible default. + You can pass "assistant": "Urbanista" in the root payload. """ - # Write to a temp file so OpenAI client can stream it - with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp: - tmp.write(raw) - tmp.flush() - with open(tmp.name, "rb") as fh: - # "gpt-4o-transcribe" or "whisper-1" depending on your account - result = openai.audio.transcriptions.create( - model="gpt-4o-transcribe", - file=fh, - response_format="text", - language=language or "es" - ) - return (result.strip() if isinstance(result, str) else str(result)).strip() + name = (env.get("assistant") or "Urbanista") + try: + return OpenAIAssistant(name=name) + except Exception as e: + logger.error("openai.assistant.init_failed name=%s err=%s", name, e) + return None +# ----------------------- +# /api/openai/transcribe +# ----------------------- + @csrf_exempt def transcribe(request): """ - POST /api/openai/transcribe - Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe. - If not, we fall back to input.text (handy for quick tests). - Returns: - {"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}} + Transcribe audio to text. + + JSON mode (Telegram voice): + { + "schema_version":"req.v1", + "bot":{"username":"PepeBasuritaCoinsBot"}, + "user":{"language":"es"}, + "input":{"media":{"type":"voice","file_id":"..."}} + "assistant":"Urbanista" # optional + } + + Multipart mode (direct upload): + POST multipart/form-data with file=