nit

fix(voice): plumb fatal errors to the frontend
chore(voice): support non-default FE ports for IS_DEV
2026-03-25 01:22:45 +00:00 · 2026-03-14 19:38:43 -07:00 · 2026-03-14 19:26:41 -07:00 · 2026-03-14 19:02:35 -07:00
6 changed files with 41 additions and 6 deletions
--- a/backend/onyx/server/manage/voice/websocket_api.py
+++ b/backend/onyx/server/manage/voice/websocket_api.py
@@ -118,6 +118,12 @@ async def handle_streaming_transcription(
            if result is None:  # End of stream
                logger.info("Streaming transcription: transcript stream ended")
                break
+            if result.error:
+                logger.warning(
+                    f"Streaming transcription: provider error: {result.error}"
+                )
+                await websocket.send_json({"type": "error", "message": result.error})
+                continue
            # Send if text changed OR if VAD detected end of speech (for auto-send trigger)
            if result.text and (result.text != last_transcript or result.is_vad_end):
                last_transcript = result.text
--- a/backend/onyx/voice/interface.py
+++ b/backend/onyx/voice/interface.py
@@ -15,6 +15,9 @@ class TranscriptResult(BaseModel):
    is_vad_end: bool = False
    """True if VAD detected end of speech (silence). Use for auto-send."""

+    error: str | None = None
+    """Provider error message to forward to the client, if any."""
+

 class StreamingTranscriberProtocol(Protocol):
    """Protocol for streaming transcription sessions."""
--- a/backend/onyx/voice/providers/openai.py
+++ b/backend/onyx/voice/providers/openai.py
@@ -56,6 +56,17 @@ def _http_to_ws_url(http_url: str) -> str:
    return http_url


+_USER_FACING_ERROR_MESSAGES: dict[str, str] = {
+    "input_audio_buffer_commit_empty": (
+        "No audio was recorded. Please check your microphone and try again."
+    ),
+    "invalid_api_key": "Voice service authentication failed. Please contact support.",
+    "rate_limit_exceeded": "Voice service is temporarily busy. Please try again shortly.",
+}
+
+_DEFAULT_USER_ERROR = "A voice transcription error occurred. Please try again."
+
+
 class OpenAIStreamingTranscriber(StreamingTranscriberProtocol):
    """Streaming transcription using OpenAI Realtime API."""

@@ -142,6 +153,17 @@ class OpenAIStreamingTranscriber(StreamingTranscriberProtocol):
                    if msg_type == OpenAIRealtimeMessageType.ERROR:
                        error = data.get("error", {})
                        self._logger.error(f"OpenAI error: {error}")
+                        error_code = error.get("code", "")
+                        user_message = _USER_FACING_ERROR_MESSAGES.get(
+                            error_code, _DEFAULT_USER_ERROR
+                        )
+                        await self._transcript_queue.put(
+                            TranscriptResult(
+                                text="",
+                                is_vad_end=False,
+                                error=user_message,
+                            )
+                        )
                        continue

                    # Handle VAD events
--- a/web/src/hooks/useVoiceRecorder.ts
+++ b/web/src/hooks/useVoiceRecorder.ts
@@ -1,5 +1,7 @@
 import { useState, useRef, useCallback, useEffect } from "react";

+import { IS_DEV } from "@/lib/constants";
+
 // Target format for OpenAI Realtime API
 const TARGET_SAMPLE_RATE = 24000;
 const CHUNK_INTERVAL_MS = 250;
@@ -245,9 +247,8 @@ class VoiceRecorderSession {
    const { token } = await tokenResponse.json();

    const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
-    const isDev = window.location.port === "3000";
-    const host = isDev ? "localhost:8080" : window.location.host;
-    const path = isDev
+    const host = IS_DEV ? "localhost:8080" : window.location.host;
+    const path = IS_DEV
      ? "/voice/transcribe/stream"
      : "/api/voice/transcribe/stream";
    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;
--- a/web/src/lib/constants.ts
+++ b/web/src/lib/constants.ts
@@ -1,3 +1,5 @@
+export const IS_DEV = process.env.NODE_ENV === "development";
+
 export enum AuthType {
  BASIC = "basic",
  GOOGLE_OAUTH = "google_oauth",
--- a/web/src/lib/streamingTTS.ts
+++ b/web/src/lib/streamingTTS.ts
@@ -3,6 +3,8 @@
 * Plays audio chunks as they arrive for smooth, low-latency playback.
 */

+import { IS_DEV } from "@/lib/constants";
+
 /**
 * HTTPStreamingTTSPlayer - Uses HTTP streaming with MediaSource Extensions
 * for smooth, gapless audio playback. This is the recommended approach for
@@ -382,9 +384,8 @@ export class WebSocketStreamingTTSPlayer {
    const { token } = await tokenResponse.json();

    const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
-    const isDev = window.location.port === "3000";
-    const host = isDev ? "localhost:8080" : window.location.host;
-    const path = isDev
+    const host = IS_DEV ? "localhost:8080" : window.location.host;
+    const path = IS_DEV
      ? "/voice/synthesize/stream"
      : "/api/voice/synthesize/stream";
    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;
Author	SHA1	Message	Date
Jamison Lahman	0f46e1e084	nit	2026-03-14 19:38:43 -07:00
Jamison Lahman	f4d379ceed	fix(voice): plumb fatal errors to the frontend	2026-03-14 19:26:41 -07:00
Jamison Lahman	8f1076e69d	chore(voice): support non-default FE ports for `IS_DEV`	2026-03-14 19:02:35 -07:00