From 88fe85c802ad22d2250368d0559fd3e4921b4018 Mon Sep 17 00:00:00 2001 From: Ekaropolus Date: Wed, 17 Sep 2025 20:29:33 -0600 Subject: [PATCH] Whisper Open AI connections --- .gitignore | 1 + polisplexity/urls.py | 2 + pxy_dashboard/middleware.py | 12 +++ pxy_openai/services.py | 0 pxy_openai/urls.py | 8 ++ pxy_openai/views.py | 177 +++++++++++++++++++++++++++++++++++- 6 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 pxy_openai/services.py create mode 100644 pxy_openai/urls.py diff --git a/.gitignore b/.gitignore index 4959ece..402cda9 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,4 @@ Dockerfile.dev docker-compose.override.yml docker-compose.override.yml pxy_meta_pages.zip +pxy_openai.zip diff --git a/polisplexity/urls.py b/polisplexity/urls.py index f365ffd..7ba4c81 100644 --- a/polisplexity/urls.py +++ b/polisplexity/urls.py @@ -48,6 +48,8 @@ urlpatterns = [ path("api/", include("pxy_bots.api.urls")), path("api/langchain/", include("pxy_langchain.api.urls")), + path("", include("pxy_openai.urls")), + ] diff --git a/pxy_dashboard/middleware.py b/pxy_dashboard/middleware.py index ad7e898..d66b470 100644 --- a/pxy_dashboard/middleware.py +++ b/pxy_dashboard/middleware.py @@ -105,6 +105,18 @@ EXEMPT_URLS += [ re.compile(r"^api/langchain/chat/?$"), ] +EXEMPT_URLS += [ + re.compile(r"^api/openai/transcribe$"), +] + +EXEMPT_URLS += [ + re.compile(r"^api/openai/voice_chat$"), +] + + + + + class LoginRequiredMiddleware(MiddlewareMixin): def process_request(self, request): diff --git a/pxy_openai/services.py b/pxy_openai/services.py new file mode 100644 index 0000000..e69de29 diff --git a/pxy_openai/urls.py b/pxy_openai/urls.py new file mode 100644 index 0000000..ffaabca --- /dev/null +++ b/pxy_openai/urls.py @@ -0,0 +1,8 @@ +# pxy_openai/urls.py +from django.urls import path +from .views import transcribe, voice_chat + +urlpatterns = [ + path("api/openai/transcribe", transcribe, name="openai_transcribe"), + path("api/openai/voice_chat", voice_chat, name="openai_voice_chat"), +] diff --git a/pxy_openai/views.py b/pxy_openai/views.py index 91ea44a..c5df8a6 100644 --- a/pxy_openai/views.py +++ b/pxy_openai/views.py @@ -1,3 +1,176 @@ -from django.shortcuts import render +# pxy_openai/views.py +import os +import io +import json +import tempfile +import requests +import openai -# Create your views here. +from django.http import JsonResponse, HttpResponseBadRequest +from django.views.decorators.csrf import csrf_exempt + +from pxy_bots.models import TelegramBot # to fetch the bot token from DB + +# Configure OpenAI +openai.api_key = os.getenv("OPENAI_API_KEY") + +# Where to forward the transcript for chat +LANGCHAIN_CHAT_URL = os.getenv( + "LANGCHAIN_CHAT_URL", + "http://app.polisplexity.tech:8010/api/langchain/chat" +) + +def _download_telegram_file(bot_username: str, file_id: str) -> bytes: + """ + Resolve a Telegram file_id to bytes using the bot's token. + Raises ValueError on any failure. + """ + bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first() + if not bot: + raise ValueError(f"bot '{bot_username}' not found or inactive") + + tg_api = f"https://api.telegram.org/bot{bot.token}" + r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10) + if r.status_code != 200 or not r.json().get("ok"): + raise ValueError("telegram getFile failed") + + file_path = r.json()["result"]["file_path"] + file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}" + dl = requests.get(file_url, timeout=20) + if dl.status_code != 200: + raise ValueError("telegram file download failed") + return dl.content + + +def _transcribe_bytes(raw: bytes, language: str = "es") -> str: + """ + Transcribe OGG/Opus (or other) audio bytes with OpenAI. + Returns plain text. + """ + # Write to a temp file so OpenAI client can stream it + with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp: + tmp.write(raw) + tmp.flush() + with open(tmp.name, "rb") as fh: + # "gpt-4o-transcribe" or "whisper-1" depending on your account + result = openai.audio.transcriptions.create( + model="gpt-4o-transcribe", + file=fh, + response_format="text", + language=language or "es" + ) + return (result.strip() if isinstance(result, str) else str(result)).strip() + + +@csrf_exempt +def transcribe(request): + """ + POST /api/openai/transcribe + Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe. + If not, we fall back to input.text (handy for quick tests). + Returns: + {"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}} + """ + if request.method != "POST": + return HttpResponseBadRequest("POST only") + + try: + data = json.loads(request.body.decode("utf-8") or "{}") + except Exception: + return HttpResponseBadRequest("invalid json") + + bot_username = ((data.get("bot") or {}).get("username")) + media = ((data.get("input") or {}).get("media")) or {} + language = ((data.get("user") or {}).get("language")) or "es" + text_fallback = ((data.get("input") or {}).get("text")) or "" + + if not bot_username: + return JsonResponse({"error": "missing bot.username"}, status=400) + + transcript = None + if media and media.get("file_id"): + try: + blob = _download_telegram_file(bot_username, media["file_id"]) + transcript = _transcribe_bytes(blob, language=language) + except Exception as e: + return JsonResponse({"error": f"transcription failed: {e}"}, status=502) + + if not transcript: + # Fallback to provided text so you can test without a voice note + if not text_fallback: + return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400) + transcript = text_fallback + + return JsonResponse({ + "schema_version": "proc.v1", + "kind": "transcript", + "text": transcript, + "meta": { + "language": language, + "has_media": bool(media and media.get("file_id")), + } + }) + + +@csrf_exempt +def voice_chat(request): + """ + POST /api/openai/voice_chat + - Transcribe Telegram voice/audio from req.v1 + - Forward a modified req.v1 (with input.text = transcript) to LangChain chat + - Return the LangChain render.v1 response (pass-through) + """ + if request.method != "POST": + return HttpResponseBadRequest("POST only") + + try: + data = json.loads(request.body.decode("utf-8") or "{}") + except Exception: + return HttpResponseBadRequest("invalid json") + + bot_username = ((data.get("bot") or {}).get("username")) + media = ((data.get("input") or {}).get("media")) or {} + language = ((data.get("user") or {}).get("language")) or "es" + text_fallback = ((data.get("input") or {}).get("text")) or "" + + if not bot_username: + return JsonResponse({"error": "missing bot.username"}, status=400) + + transcript = None + if media and media.get("file_id"): + try: + blob = _download_telegram_file(bot_username, media["file_id"]) + transcript = _transcribe_bytes(blob, language=language) + except Exception as e: + return JsonResponse({"error": f"transcription failed: {e}"}, status=502) + + if not transcript: + if not text_fallback: + return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400) + transcript = text_fallback + + # Build a new req.v1 for LangChain: keep everything, set input.text to transcript + forward_payload = dict(data) + forward_payload.setdefault("input", {}) + forward_payload["input"] = dict(forward_payload["input"]) + forward_payload["input"]["text"] = transcript + # keep media in case downstream wants it; also annotate + forward_payload["input"]["_transcript"] = True + + try: + r = requests.post( + LANGCHAIN_CHAT_URL, + headers={"Content-Type": "application/json"}, + data=json.dumps(forward_payload, ensure_ascii=False).encode("utf-8"), + timeout=30 + ) + except Exception as e: + return JsonResponse({"error": f"forward to langchain failed: {e}"}, status=502) + + # Proxy through the downstream response (expecting render.v1) + try: + body = r.json() + except Exception: + body = {"schema_version": "render.v1", + "messages": [{"type": "text", "text": r.text[:1000]}]} + return JsonResponse(body, status=r.status_code or 200, safe=False)