Whisper Open AI connections
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Ekaropolus 2025-09-17 20:29:33 -06:00
parent 3fa732efbc
commit 88fe85c802
6 changed files with 198 additions and 2 deletions

1
.gitignore vendored
View File

@ -31,3 +31,4 @@ Dockerfile.dev
docker-compose.override.yml
docker-compose.override.yml
pxy_meta_pages.zip
pxy_openai.zip

View File

@ -48,6 +48,8 @@ urlpatterns = [
path("api/", include("pxy_bots.api.urls")),
path("api/langchain/", include("pxy_langchain.api.urls")),
path("", include("pxy_openai.urls")),
]

View File

@ -105,6 +105,18 @@ EXEMPT_URLS += [
re.compile(r"^api/langchain/chat/?$"),
]
EXEMPT_URLS += [
re.compile(r"^api/openai/transcribe$"),
]
EXEMPT_URLS += [
re.compile(r"^api/openai/voice_chat$"),
]
class LoginRequiredMiddleware(MiddlewareMixin):
def process_request(self, request):

0
pxy_openai/services.py Normal file
View File

8
pxy_openai/urls.py Normal file
View File

@ -0,0 +1,8 @@
# pxy_openai/urls.py
from django.urls import path
from .views import transcribe, voice_chat
urlpatterns = [
path("api/openai/transcribe", transcribe, name="openai_transcribe"),
path("api/openai/voice_chat", voice_chat, name="openai_voice_chat"),
]

View File

@ -1,3 +1,176 @@
from django.shortcuts import render
# pxy_openai/views.py
import os
import io
import json
import tempfile
import requests
import openai
# Create your views here.
from django.http import JsonResponse, HttpResponseBadRequest
from django.views.decorators.csrf import csrf_exempt
from pxy_bots.models import TelegramBot # to fetch the bot token from DB
# Configure OpenAI
openai.api_key = os.getenv("OPENAI_API_KEY")
# Where to forward the transcript for chat
LANGCHAIN_CHAT_URL = os.getenv(
"LANGCHAIN_CHAT_URL",
"http://app.polisplexity.tech:8010/api/langchain/chat"
)
def _download_telegram_file(bot_username: str, file_id: str) -> bytes:
"""
Resolve a Telegram file_id to bytes using the bot's token.
Raises ValueError on any failure.
"""
bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first()
if not bot:
raise ValueError(f"bot '{bot_username}' not found or inactive")
tg_api = f"https://api.telegram.org/bot{bot.token}"
r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10)
if r.status_code != 200 or not r.json().get("ok"):
raise ValueError("telegram getFile failed")
file_path = r.json()["result"]["file_path"]
file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}"
dl = requests.get(file_url, timeout=20)
if dl.status_code != 200:
raise ValueError("telegram file download failed")
return dl.content
def _transcribe_bytes(raw: bytes, language: str = "es") -> str:
"""
Transcribe OGG/Opus (or other) audio bytes with OpenAI.
Returns plain text.
"""
# Write to a temp file so OpenAI client can stream it
with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp:
tmp.write(raw)
tmp.flush()
with open(tmp.name, "rb") as fh:
# "gpt-4o-transcribe" or "whisper-1" depending on your account
result = openai.audio.transcriptions.create(
model="gpt-4o-transcribe",
file=fh,
response_format="text",
language=language or "es"
)
return (result.strip() if isinstance(result, str) else str(result)).strip()
@csrf_exempt
def transcribe(request):
"""
POST /api/openai/transcribe
Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe.
If not, we fall back to input.text (handy for quick tests).
Returns:
{"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}}
"""
if request.method != "POST":
return HttpResponseBadRequest("POST only")
try:
data = json.loads(request.body.decode("utf-8") or "{}")
except Exception:
return HttpResponseBadRequest("invalid json")
bot_username = ((data.get("bot") or {}).get("username"))
media = ((data.get("input") or {}).get("media")) or {}
language = ((data.get("user") or {}).get("language")) or "es"
text_fallback = ((data.get("input") or {}).get("text")) or ""
if not bot_username:
return JsonResponse({"error": "missing bot.username"}, status=400)
transcript = None
if media and media.get("file_id"):
try:
blob = _download_telegram_file(bot_username, media["file_id"])
transcript = _transcribe_bytes(blob, language=language)
except Exception as e:
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
if not transcript:
# Fallback to provided text so you can test without a voice note
if not text_fallback:
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
transcript = text_fallback
return JsonResponse({
"schema_version": "proc.v1",
"kind": "transcript",
"text": transcript,
"meta": {
"language": language,
"has_media": bool(media and media.get("file_id")),
}
})
@csrf_exempt
def voice_chat(request):
"""
POST /api/openai/voice_chat
- Transcribe Telegram voice/audio from req.v1
- Forward a modified req.v1 (with input.text = transcript) to LangChain chat
- Return the LangChain render.v1 response (pass-through)
"""
if request.method != "POST":
return HttpResponseBadRequest("POST only")
try:
data = json.loads(request.body.decode("utf-8") or "{}")
except Exception:
return HttpResponseBadRequest("invalid json")
bot_username = ((data.get("bot") or {}).get("username"))
media = ((data.get("input") or {}).get("media")) or {}
language = ((data.get("user") or {}).get("language")) or "es"
text_fallback = ((data.get("input") or {}).get("text")) or ""
if not bot_username:
return JsonResponse({"error": "missing bot.username"}, status=400)
transcript = None
if media and media.get("file_id"):
try:
blob = _download_telegram_file(bot_username, media["file_id"])
transcript = _transcribe_bytes(blob, language=language)
except Exception as e:
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
if not transcript:
if not text_fallback:
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
transcript = text_fallback
# Build a new req.v1 for LangChain: keep everything, set input.text to transcript
forward_payload = dict(data)
forward_payload.setdefault("input", {})
forward_payload["input"] = dict(forward_payload["input"])
forward_payload["input"]["text"] = transcript
# keep media in case downstream wants it; also annotate
forward_payload["input"]["_transcript"] = True
try:
r = requests.post(
LANGCHAIN_CHAT_URL,
headers={"Content-Type": "application/json"},
data=json.dumps(forward_payload, ensure_ascii=False).encode("utf-8"),
timeout=30
)
except Exception as e:
return JsonResponse({"error": f"forward to langchain failed: {e}"}, status=502)
# Proxy through the downstream response (expecting render.v1)
try:
body = r.json()
except Exception:
body = {"schema_version": "render.v1",
"messages": [{"type": "text", "text": r.text[:1000]}]}
return JsonResponse(body, status=r.status_code or 200, safe=False)