Whisper Open AI connections
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
3fa732efbc
commit
88fe85c802
1
.gitignore
vendored
1
.gitignore
vendored
@ -31,3 +31,4 @@ Dockerfile.dev
|
|||||||
docker-compose.override.yml
|
docker-compose.override.yml
|
||||||
docker-compose.override.yml
|
docker-compose.override.yml
|
||||||
pxy_meta_pages.zip
|
pxy_meta_pages.zip
|
||||||
|
pxy_openai.zip
|
||||||
|
@ -48,6 +48,8 @@ urlpatterns = [
|
|||||||
path("api/", include("pxy_bots.api.urls")),
|
path("api/", include("pxy_bots.api.urls")),
|
||||||
path("api/langchain/", include("pxy_langchain.api.urls")),
|
path("api/langchain/", include("pxy_langchain.api.urls")),
|
||||||
|
|
||||||
|
path("", include("pxy_openai.urls")),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
]
|
]
|
||||||
|
@ -105,6 +105,18 @@ EXEMPT_URLS += [
|
|||||||
re.compile(r"^api/langchain/chat/?$"),
|
re.compile(r"^api/langchain/chat/?$"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
EXEMPT_URLS += [
|
||||||
|
re.compile(r"^api/openai/transcribe$"),
|
||||||
|
]
|
||||||
|
|
||||||
|
EXEMPT_URLS += [
|
||||||
|
re.compile(r"^api/openai/voice_chat$"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LoginRequiredMiddleware(MiddlewareMixin):
|
class LoginRequiredMiddleware(MiddlewareMixin):
|
||||||
def process_request(self, request):
|
def process_request(self, request):
|
||||||
|
0
pxy_openai/services.py
Normal file
0
pxy_openai/services.py
Normal file
8
pxy_openai/urls.py
Normal file
8
pxy_openai/urls.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# pxy_openai/urls.py
|
||||||
|
from django.urls import path
|
||||||
|
from .views import transcribe, voice_chat
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path("api/openai/transcribe", transcribe, name="openai_transcribe"),
|
||||||
|
path("api/openai/voice_chat", voice_chat, name="openai_voice_chat"),
|
||||||
|
]
|
@ -1,3 +1,176 @@
|
|||||||
from django.shortcuts import render
|
# pxy_openai/views.py
|
||||||
|
import os
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
import requests
|
||||||
|
import openai
|
||||||
|
|
||||||
# Create your views here.
|
from django.http import JsonResponse, HttpResponseBadRequest
|
||||||
|
from django.views.decorators.csrf import csrf_exempt
|
||||||
|
|
||||||
|
from pxy_bots.models import TelegramBot # to fetch the bot token from DB
|
||||||
|
|
||||||
|
# Configure OpenAI
|
||||||
|
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
# Where to forward the transcript for chat
|
||||||
|
LANGCHAIN_CHAT_URL = os.getenv(
|
||||||
|
"LANGCHAIN_CHAT_URL",
|
||||||
|
"http://app.polisplexity.tech:8010/api/langchain/chat"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _download_telegram_file(bot_username: str, file_id: str) -> bytes:
|
||||||
|
"""
|
||||||
|
Resolve a Telegram file_id to bytes using the bot's token.
|
||||||
|
Raises ValueError on any failure.
|
||||||
|
"""
|
||||||
|
bot = TelegramBot.objects.filter(username=bot_username, is_active=True).first()
|
||||||
|
if not bot:
|
||||||
|
raise ValueError(f"bot '{bot_username}' not found or inactive")
|
||||||
|
|
||||||
|
tg_api = f"https://api.telegram.org/bot{bot.token}"
|
||||||
|
r = requests.get(f"{tg_api}/getFile", params={"file_id": file_id}, timeout=10)
|
||||||
|
if r.status_code != 200 or not r.json().get("ok"):
|
||||||
|
raise ValueError("telegram getFile failed")
|
||||||
|
|
||||||
|
file_path = r.json()["result"]["file_path"]
|
||||||
|
file_url = f"https://api.telegram.org/file/bot{bot.token}/{file_path}"
|
||||||
|
dl = requests.get(file_url, timeout=20)
|
||||||
|
if dl.status_code != 200:
|
||||||
|
raise ValueError("telegram file download failed")
|
||||||
|
return dl.content
|
||||||
|
|
||||||
|
|
||||||
|
def _transcribe_bytes(raw: bytes, language: str = "es") -> str:
|
||||||
|
"""
|
||||||
|
Transcribe OGG/Opus (or other) audio bytes with OpenAI.
|
||||||
|
Returns plain text.
|
||||||
|
"""
|
||||||
|
# Write to a temp file so OpenAI client can stream it
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".ogg") as tmp:
|
||||||
|
tmp.write(raw)
|
||||||
|
tmp.flush()
|
||||||
|
with open(tmp.name, "rb") as fh:
|
||||||
|
# "gpt-4o-transcribe" or "whisper-1" depending on your account
|
||||||
|
result = openai.audio.transcriptions.create(
|
||||||
|
model="gpt-4o-transcribe",
|
||||||
|
file=fh,
|
||||||
|
response_format="text",
|
||||||
|
language=language or "es"
|
||||||
|
)
|
||||||
|
return (result.strip() if isinstance(result, str) else str(result)).strip()
|
||||||
|
|
||||||
|
|
||||||
|
@csrf_exempt
|
||||||
|
def transcribe(request):
|
||||||
|
"""
|
||||||
|
POST /api/openai/transcribe
|
||||||
|
Accepts req.v1. If input.media.file_id exists, we fetch from Telegram and transcribe.
|
||||||
|
If not, we fall back to input.text (handy for quick tests).
|
||||||
|
Returns:
|
||||||
|
{"schema_version":"proc.v1","kind":"transcript","text":"...","meta":{...}}
|
||||||
|
"""
|
||||||
|
if request.method != "POST":
|
||||||
|
return HttpResponseBadRequest("POST only")
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except Exception:
|
||||||
|
return HttpResponseBadRequest("invalid json")
|
||||||
|
|
||||||
|
bot_username = ((data.get("bot") or {}).get("username"))
|
||||||
|
media = ((data.get("input") or {}).get("media")) or {}
|
||||||
|
language = ((data.get("user") or {}).get("language")) or "es"
|
||||||
|
text_fallback = ((data.get("input") or {}).get("text")) or ""
|
||||||
|
|
||||||
|
if not bot_username:
|
||||||
|
return JsonResponse({"error": "missing bot.username"}, status=400)
|
||||||
|
|
||||||
|
transcript = None
|
||||||
|
if media and media.get("file_id"):
|
||||||
|
try:
|
||||||
|
blob = _download_telegram_file(bot_username, media["file_id"])
|
||||||
|
transcript = _transcribe_bytes(blob, language=language)
|
||||||
|
except Exception as e:
|
||||||
|
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
|
||||||
|
|
||||||
|
if not transcript:
|
||||||
|
# Fallback to provided text so you can test without a voice note
|
||||||
|
if not text_fallback:
|
||||||
|
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
|
||||||
|
transcript = text_fallback
|
||||||
|
|
||||||
|
return JsonResponse({
|
||||||
|
"schema_version": "proc.v1",
|
||||||
|
"kind": "transcript",
|
||||||
|
"text": transcript,
|
||||||
|
"meta": {
|
||||||
|
"language": language,
|
||||||
|
"has_media": bool(media and media.get("file_id")),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@csrf_exempt
|
||||||
|
def voice_chat(request):
|
||||||
|
"""
|
||||||
|
POST /api/openai/voice_chat
|
||||||
|
- Transcribe Telegram voice/audio from req.v1
|
||||||
|
- Forward a modified req.v1 (with input.text = transcript) to LangChain chat
|
||||||
|
- Return the LangChain render.v1 response (pass-through)
|
||||||
|
"""
|
||||||
|
if request.method != "POST":
|
||||||
|
return HttpResponseBadRequest("POST only")
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except Exception:
|
||||||
|
return HttpResponseBadRequest("invalid json")
|
||||||
|
|
||||||
|
bot_username = ((data.get("bot") or {}).get("username"))
|
||||||
|
media = ((data.get("input") or {}).get("media")) or {}
|
||||||
|
language = ((data.get("user") or {}).get("language")) or "es"
|
||||||
|
text_fallback = ((data.get("input") or {}).get("text")) or ""
|
||||||
|
|
||||||
|
if not bot_username:
|
||||||
|
return JsonResponse({"error": "missing bot.username"}, status=400)
|
||||||
|
|
||||||
|
transcript = None
|
||||||
|
if media and media.get("file_id"):
|
||||||
|
try:
|
||||||
|
blob = _download_telegram_file(bot_username, media["file_id"])
|
||||||
|
transcript = _transcribe_bytes(blob, language=language)
|
||||||
|
except Exception as e:
|
||||||
|
return JsonResponse({"error": f"transcription failed: {e}"}, status=502)
|
||||||
|
|
||||||
|
if not transcript:
|
||||||
|
if not text_fallback:
|
||||||
|
return JsonResponse({"error": "missing voice file_id or text fallback"}, status=400)
|
||||||
|
transcript = text_fallback
|
||||||
|
|
||||||
|
# Build a new req.v1 for LangChain: keep everything, set input.text to transcript
|
||||||
|
forward_payload = dict(data)
|
||||||
|
forward_payload.setdefault("input", {})
|
||||||
|
forward_payload["input"] = dict(forward_payload["input"])
|
||||||
|
forward_payload["input"]["text"] = transcript
|
||||||
|
# keep media in case downstream wants it; also annotate
|
||||||
|
forward_payload["input"]["_transcript"] = True
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.post(
|
||||||
|
LANGCHAIN_CHAT_URL,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
data=json.dumps(forward_payload, ensure_ascii=False).encode("utf-8"),
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return JsonResponse({"error": f"forward to langchain failed: {e}"}, status=502)
|
||||||
|
|
||||||
|
# Proxy through the downstream response (expecting render.v1)
|
||||||
|
try:
|
||||||
|
body = r.json()
|
||||||
|
except Exception:
|
||||||
|
body = {"schema_version": "render.v1",
|
||||||
|
"messages": [{"type": "text", "text": r.text[:1000]}]}
|
||||||
|
return JsonResponse(body, status=r.status_code or 200, safe=False)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user