# pxy_openai/views.py import os import io import json import tempfile import requests import openai from django.http import JsonResponse, HttpResponseBadRequest from django.views.decorators.csrf import csrf_exempt from pxy_bots.models import TelegramBot # to fetch the bot token from DB # Configure OpenAI openai.api_key = os.getenv("OPENAI_API_KEY") # Where to forward the transcript for chat LANGCHAIN_CHAT_URL = os.getenv( "LANGCHAIN_CHAT_URL", "http://app.polisplexity.tech:8010/api/langchain/chat" ) def _download_telegram_file(bot_username: str, file_id: str) -> bytes: """ Resolve a Telegram file_id to bytes using the bot's token. Raises ValueError on any failure. """ bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first() if not bot: raise ValueError(f"bot '{bot_username}' not found or inactive") tg_api = f"https://api.telegram.org/bot{bot.token}" r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10) if r.status_code != 200 or not r.json().get("ok"): raise ValueError("telegram getFile failed") file_path = r.json()["result"]["file_path"] file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}" dl = requests.get(file_url, timeout=20) if dl.status_code != 200: raise ValueError("telegram file download failed") return dl.content def _transcribe_bytes(raw: bytes, language: str = "es") -> str: """ Transcribe OGG/Opus (or other) audio bytes with OpenAI. Returns plain text. """ # Write to a temp file so OpenAI client can stream it with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp: tmp.write(raw) tmp.flush() with open(tmp.name, "rb") as fh: # "gpt-4o-transcribe" or "whisper-1" depending on your account result = openai.audio.transcriptions.create( model="gpt-4o-transcribe", file=fh, response_format="text", language=language or "es" ) return (result.strip() if isinstance(result, str) else str(result)).strip() @csrf_exempt def transcribe(request): """ POST /api/openai/transcribe Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe. If not, we fall back to input.text (handy for quick tests). Returns: {"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}} """ if request.method != "POST": return HttpResponseBadRequest("POST only") try: data = json.loads(request.body.decode("utf-8") or "{}") except Exception: return HttpResponseBadRequest("invalid json") bot_username = ((data.get("bot") or {}).get("username")) media = ((data.get("input") or {}).get("media")) or {} language = ((data.get("user") or {}).get("language")) or "es" text_fallback = ((data.get("input") or {}).get("text")) or "" if not bot_username: return JsonResponse({"error": "missing bot.username"}, status=400) transcript = None if media and media.get("file_id"): try: blob = _download_telegram_file(bot_username, media["file_id"]) transcript = _transcribe_bytes(blob, language=language) except Exception as e: return JsonResponse({"error": f"transcription failed: {e}"}, status=502) if not transcript: # Fallback to provided text so you can test without a voice note if not text_fallback: return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400) transcript = text_fallback return JsonResponse({ "schema_version": "proc.v1", "kind": "transcript", "text": transcript, "meta": { "language": language, "has_media": bool(media and media.get("file_id")), } }) @csrf_exempt def voice_chat(request): """ POST /api/openai/voice_chat - Transcribe Telegram voice/audio from req.v1 - Forward a modified req.v1 (with input.text = transcript) to LangChain chat - Return the LangChain render.v1 response (pass-through) """ if request.method != "POST": return HttpResponseBadRequest("POST only") try: data = json.loads(request.body.decode("utf-8") or "{}") except Exception: return HttpResponseBadRequest("invalid json") bot_username = ((data.get("bot") or {}).get("username")) media = ((data.get("input") or {}).get("media")) or {} language = ((data.get("user") or {}).get("language")) or "es" text_fallback = ((data.get("input") or {}).get("text")) or "" if not bot_username: return JsonResponse({"error": "missing bot.username"}, status=400) transcript = None if media and media.get("file_id"): try: blob = _download_telegram_file(bot_username, media["file_id"]) transcript = _transcribe_bytes(blob, language=language) except Exception as e: return JsonResponse({"error": f"transcription failed: {e}"}, status=502) if not transcript: if not text_fallback: return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400) transcript = text_fallback # Build a new req.v1 for LangChain: keep everything, set input.text to transcript forward_payload = dict(data) forward_payload.setdefault("input", {}) forward_payload["input"] = dict(forward_payload["input"]) forward_payload["input"]["text"] = transcript # keep media in case downstream wants it; also annotate forward_payload["input"]["_transcript"] = True try: r = requests.post( LANGCHAIN_CHAT_URL, headers={"Content-Type": "application/json"}, data=json.dumps(forward_payload, ensure_ascii=False).encode("utf-8"), timeout=30 ) except Exception as e: return JsonResponse({"error": f"forward to langchain failed: {e}"}, status=502) # Proxy through the downstream response (expecting render.v1) try: body = r.json() except Exception: body = {"schema_version": "render.v1", "messages": [{"type": "text", "text": r.text[:1000]}]} return JsonResponse(body, status=r.status_code or 200, safe=False)