Compare commits

...

3 Commits

Author SHA1 Message Date
Jamison Lahman
0f46e1e084 nit 2026-03-14 19:38:43 -07:00
Jamison Lahman
f4d379ceed fix(voice): plumb fatal errors to the frontend 2026-03-14 19:26:41 -07:00
Jamison Lahman
8f1076e69d chore(voice): support non-default FE ports for IS_DEV 2026-03-14 19:02:35 -07:00
6 changed files with 41 additions and 6 deletions

View File

@@ -118,6 +118,12 @@ async def handle_streaming_transcription(
if result is None: # End of stream
logger.info("Streaming transcription: transcript stream ended")
break
if result.error:
logger.warning(
f"Streaming transcription: provider error: {result.error}"
)
await websocket.send_json({"type": "error", "message": result.error})
continue
# Send if text changed OR if VAD detected end of speech (for auto-send trigger)
if result.text and (result.text != last_transcript or result.is_vad_end):
last_transcript = result.text

View File

@@ -15,6 +15,9 @@ class TranscriptResult(BaseModel):
is_vad_end: bool = False
"""True if VAD detected end of speech (silence). Use for auto-send."""
error: str | None = None
"""Provider error message to forward to the client, if any."""
class StreamingTranscriberProtocol(Protocol):
"""Protocol for streaming transcription sessions."""

View File

@@ -56,6 +56,17 @@ def _http_to_ws_url(http_url: str) -> str:
return http_url
_USER_FACING_ERROR_MESSAGES: dict[str, str] = {
"input_audio_buffer_commit_empty": (
"No audio was recorded. Please check your microphone and try again."
),
"invalid_api_key": "Voice service authentication failed. Please contact support.",
"rate_limit_exceeded": "Voice service is temporarily busy. Please try again shortly.",
}
_DEFAULT_USER_ERROR = "A voice transcription error occurred. Please try again."
class OpenAIStreamingTranscriber(StreamingTranscriberProtocol):
"""Streaming transcription using OpenAI Realtime API."""
@@ -142,6 +153,17 @@ class OpenAIStreamingTranscriber(StreamingTranscriberProtocol):
if msg_type == OpenAIRealtimeMessageType.ERROR:
error = data.get("error", {})
self._logger.error(f"OpenAI error: {error}")
error_code = error.get("code", "")
user_message = _USER_FACING_ERROR_MESSAGES.get(
error_code, _DEFAULT_USER_ERROR
)
await self._transcript_queue.put(
TranscriptResult(
text="",
is_vad_end=False,
error=user_message,
)
)
continue
# Handle VAD events

View File

@@ -1,5 +1,7 @@
import { useState, useRef, useCallback, useEffect } from "react";
import { IS_DEV } from "@/lib/constants";
// Target format for OpenAI Realtime API
const TARGET_SAMPLE_RATE = 24000;
const CHUNK_INTERVAL_MS = 250;
@@ -245,9 +247,8 @@ class VoiceRecorderSession {
const { token } = await tokenResponse.json();
const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
const isDev = window.location.port === "3000";
const host = isDev ? "localhost:8080" : window.location.host;
const path = isDev
const host = IS_DEV ? "localhost:8080" : window.location.host;
const path = IS_DEV
? "/voice/transcribe/stream"
: "/api/voice/transcribe/stream";
return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;

View File

@@ -1,3 +1,5 @@
export const IS_DEV = process.env.NODE_ENV === "development";
export enum AuthType {
BASIC = "basic",
GOOGLE_OAUTH = "google_oauth",

View File

@@ -3,6 +3,8 @@
* Plays audio chunks as they arrive for smooth, low-latency playback.
*/
import { IS_DEV } from "@/lib/constants";
/**
* HTTPStreamingTTSPlayer - Uses HTTP streaming with MediaSource Extensions
* for smooth, gapless audio playback. This is the recommended approach for
@@ -382,9 +384,8 @@ export class WebSocketStreamingTTSPlayer {
const { token } = await tokenResponse.json();
const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
const isDev = window.location.port === "3000";
const host = isDev ? "localhost:8080" : window.location.host;
const path = isDev
const host = IS_DEV ? "localhost:8080" : window.location.host;
const path = IS_DEV
? "/voice/synthesize/stream"
: "/api/voice/synthesize/stream";
return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;