mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-12 19:22:40 +00:00
Compare commits
13 Commits
voice-mode
...
nikg/fix-s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3fb8dc1db0 | ||
|
|
fda1528174 | ||
|
|
5f34c83f0a | ||
|
|
3509e9c48c | ||
|
|
c57ea65d42 | ||
|
|
c1ce180b72 | ||
|
|
b5474dc127 | ||
|
|
e1df3f533a | ||
|
|
df5252db05 | ||
|
|
f01f210af8 | ||
|
|
781219cf18 | ||
|
|
ca39da7de9 | ||
|
|
abf76cd747 |
@@ -0,0 +1,43 @@
|
||||
"""add timestamps to user table
|
||||
|
||||
Revision ID: 27fb147a843f
|
||||
Revises: b5c4d7e8f9a1
|
||||
Create Date: 2026-03-08 17:18:40.828644
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "27fb147a843f"
|
||||
down_revision = "b5c4d7e8f9a1"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("user", "updated_at")
|
||||
op.drop_column("user", "created_at")
|
||||
@@ -1,117 +0,0 @@
|
||||
"""add_voice_provider_and_user_voice_prefs
|
||||
|
||||
Revision ID: 93a2e195e25c
|
||||
Revises: b5c4d7e8f9a1
|
||||
Create Date: 2026-02-23 15:16:39.507304
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import column
|
||||
from sqlalchemy import true
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "93a2e195e25c"
|
||||
down_revision = "b5c4d7e8f9a1"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create voice_provider table
|
||||
op.create_table(
|
||||
"voice_provider",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("name", sa.String(), unique=True, nullable=False),
|
||||
sa.Column("provider_type", sa.String(), nullable=False),
|
||||
sa.Column("api_key", sa.LargeBinary(), nullable=True),
|
||||
sa.Column("api_base", sa.String(), nullable=True),
|
||||
sa.Column("custom_config", postgresql.JSONB(), nullable=True),
|
||||
sa.Column("stt_model", sa.String(), nullable=True),
|
||||
sa.Column("tts_model", sa.String(), nullable=True),
|
||||
sa.Column("default_voice", sa.String(), nullable=True),
|
||||
sa.Column(
|
||||
"is_default_stt", sa.Boolean(), nullable=False, server_default="false"
|
||||
),
|
||||
sa.Column(
|
||||
"is_default_tts", sa.Boolean(), nullable=False, server_default="false"
|
||||
),
|
||||
sa.Column("deleted", sa.Boolean(), nullable=False, server_default="false"),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"time_updated",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
onupdate=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Add partial unique indexes to enforce only one default STT/TTS provider
|
||||
op.create_index(
|
||||
"ix_voice_provider_one_default_stt",
|
||||
"voice_provider",
|
||||
["is_default_stt"],
|
||||
unique=True,
|
||||
postgresql_where=column("is_default_stt") == true(),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_voice_provider_one_default_tts",
|
||||
"voice_provider",
|
||||
["is_default_tts"],
|
||||
unique=True,
|
||||
postgresql_where=column("is_default_tts") == true(),
|
||||
)
|
||||
|
||||
# Add voice preference columns to user table
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"voice_auto_send",
|
||||
sa.Boolean(),
|
||||
default=False,
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"voice_auto_playback",
|
||||
sa.Boolean(),
|
||||
default=False,
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"voice_playback_speed",
|
||||
sa.Float(),
|
||||
default=1.0,
|
||||
nullable=False,
|
||||
server_default="1.0",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Remove user voice preference columns
|
||||
op.drop_column("user", "voice_playback_speed")
|
||||
op.drop_column("user", "voice_auto_playback")
|
||||
op.drop_column("user", "voice_auto_send")
|
||||
|
||||
op.drop_index("ix_voice_provider_one_default_tts", table_name="voice_provider")
|
||||
op.drop_index("ix_voice_provider_one_default_stt", table_name="voice_provider")
|
||||
|
||||
# Drop voice_provider table
|
||||
op.drop_table("voice_provider")
|
||||
@@ -29,7 +29,6 @@ from fastapi import Query
|
||||
from fastapi import Request
|
||||
from fastapi import Response
|
||||
from fastapi import status
|
||||
from fastapi import WebSocket
|
||||
from fastapi.responses import RedirectResponse
|
||||
from fastapi.security import OAuth2PasswordRequestForm
|
||||
from fastapi_users import BaseUserManager
|
||||
@@ -122,7 +121,6 @@ from onyx.db.models import User
|
||||
from onyx.db.pat import fetch_user_for_pat
|
||||
from onyx.db.users import get_user_by_email
|
||||
from onyx.redis.redis_pool import get_async_redis_connection
|
||||
from onyx.redis.redis_pool import retrieve_ws_token_data
|
||||
from onyx.server.settings.store import load_settings
|
||||
from onyx.server.utils import BasicAuthenticationError
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -1614,102 +1612,6 @@ async def current_admin_user(user: User = Depends(current_user)) -> User:
|
||||
return user
|
||||
|
||||
|
||||
async def _get_user_from_token_data(token_data: dict) -> User | None:
|
||||
"""Shared logic: token data dict → User object.
|
||||
|
||||
Args:
|
||||
token_data: Decoded token data containing 'sub' (user ID).
|
||||
|
||||
Returns:
|
||||
User object if found and active, None otherwise.
|
||||
"""
|
||||
user_id = token_data.get("sub")
|
||||
if not user_id:
|
||||
return None
|
||||
|
||||
try:
|
||||
user_uuid = uuid.UUID(user_id)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
async with get_async_session_context_manager() as async_db_session:
|
||||
user = await async_db_session.get(User, user_uuid)
|
||||
if user is None or not user.is_active:
|
||||
return None
|
||||
return user
|
||||
|
||||
|
||||
async def current_user_from_websocket(
|
||||
websocket: WebSocket,
|
||||
token: str = Query(..., description="WebSocket authentication token"),
|
||||
) -> User:
|
||||
"""
|
||||
WebSocket authentication dependency using query parameter.
|
||||
|
||||
Validates the WS token from query param and returns the User.
|
||||
Raises BasicAuthenticationError if authentication fails.
|
||||
|
||||
The token must be obtained from POST /voice/ws-token before connecting.
|
||||
Tokens are single-use and expire after 60 seconds.
|
||||
|
||||
Usage:
|
||||
1. POST /voice/ws-token -> {"token": "xxx"}
|
||||
2. Connect to ws://host/path?token=xxx
|
||||
|
||||
This applies the same auth checks as current_user() for HTTP endpoints.
|
||||
"""
|
||||
# Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH)
|
||||
# Browsers always send Origin on WebSocket connections
|
||||
origin = websocket.headers.get("origin")
|
||||
expected_origin = WEB_DOMAIN.rstrip("/")
|
||||
if not origin:
|
||||
logger.warning("WS auth: missing Origin header")
|
||||
raise BasicAuthenticationError(detail="Access denied. Missing origin.")
|
||||
|
||||
actual_origin = origin.rstrip("/")
|
||||
if actual_origin != expected_origin:
|
||||
logger.warning(
|
||||
f"WS auth: origin mismatch. Expected {expected_origin}, got {actual_origin}"
|
||||
)
|
||||
raise BasicAuthenticationError(detail="Access denied. Invalid origin.")
|
||||
|
||||
# Validate WS token in Redis (single-use, deleted after retrieval)
|
||||
try:
|
||||
token_data = await retrieve_ws_token_data(token)
|
||||
if token_data is None:
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. Invalid or expired authentication token."
|
||||
)
|
||||
except BasicAuthenticationError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"WS auth: error during token validation: {e}")
|
||||
raise BasicAuthenticationError(
|
||||
detail="Authentication verification failed."
|
||||
) from e
|
||||
|
||||
# Get user from token data
|
||||
user = await _get_user_from_token_data(token_data)
|
||||
if user is None:
|
||||
logger.warning(f"WS auth: user not found for id={token_data.get('sub')}")
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. User not found or inactive."
|
||||
)
|
||||
|
||||
# Apply same checks as HTTP auth (verification, OIDC expiry, role)
|
||||
user = await double_check_user(user)
|
||||
|
||||
# Block LIMITED users (same as current_user)
|
||||
if user.role == UserRole.LIMITED:
|
||||
logger.warning(f"WS auth: user {user.email} has LIMITED role")
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. User role is LIMITED. BASIC or higher permissions are required.",
|
||||
)
|
||||
|
||||
logger.debug(f"WS auth: authenticated {user.email}")
|
||||
return user
|
||||
|
||||
|
||||
def get_default_admin_user_emails_() -> list[str]:
|
||||
# No default seeding available for Onyx MIT
|
||||
return []
|
||||
|
||||
@@ -339,15 +339,20 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
|
||||
TIMESTAMPAware(timezone=True), nullable=True
|
||||
)
|
||||
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
updated_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
default_model: Mapped[str] = mapped_column(Text, nullable=True)
|
||||
# organized in typical structured fashion
|
||||
# formatted as `displayName__provider__modelName`
|
||||
|
||||
# Voice preferences
|
||||
voice_auto_send: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
voice_auto_playback: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
voice_playback_speed: Mapped[float] = mapped_column(Float, default=1.0)
|
||||
|
||||
# relationships
|
||||
credentials: Mapped[list["Credential"]] = relationship(
|
||||
"Credential", back_populates="user"
|
||||
@@ -3060,65 +3065,6 @@ class ImageGenerationConfig(Base):
|
||||
)
|
||||
|
||||
|
||||
class VoiceProvider(Base):
|
||||
"""Configuration for voice services (STT and TTS)."""
|
||||
|
||||
__tablename__ = "voice_provider"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String, unique=True)
|
||||
provider_type: Mapped[str] = mapped_column(
|
||||
String
|
||||
) # "openai", "azure", "elevenlabs"
|
||||
api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
|
||||
EncryptedString(), nullable=True
|
||||
)
|
||||
api_base: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
custom_config: Mapped[dict[str, Any] | None] = mapped_column(
|
||||
postgresql.JSONB(), nullable=True
|
||||
)
|
||||
|
||||
# Model/voice configuration
|
||||
stt_model: Mapped[str | None] = mapped_column(
|
||||
String, nullable=True
|
||||
) # e.g., "whisper-1"
|
||||
tts_model: Mapped[str | None] = mapped_column(
|
||||
String, nullable=True
|
||||
) # e.g., "tts-1", "tts-1-hd"
|
||||
default_voice: Mapped[str | None] = mapped_column(
|
||||
String, nullable=True
|
||||
) # e.g., "alloy", "echo"
|
||||
|
||||
# STT and TTS can use different providers - only one provider per type
|
||||
is_default_stt: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
is_default_tts: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
|
||||
deleted: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
|
||||
time_created: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
time_updated: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||
)
|
||||
|
||||
# Enforce only one default STT provider and one default TTS provider at DB level
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"ix_voice_provider_one_default_stt",
|
||||
"is_default_stt",
|
||||
unique=True,
|
||||
postgresql_where=(is_default_stt == True), # noqa: E712
|
||||
),
|
||||
Index(
|
||||
"ix_voice_provider_one_default_tts",
|
||||
"is_default_tts",
|
||||
unique=True,
|
||||
postgresql_where=(is_default_tts == True), # noqa: E712
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class CloudEmbeddingProvider(Base):
|
||||
__tablename__ = "embedding_provider"
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from uuid import UUID
|
||||
|
||||
from fastapi import HTTPException
|
||||
from fastapi_users.password import PasswordHelper
|
||||
from sqlalchemy import case
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
@@ -11,6 +12,7 @@ from sqlalchemy.orm import Session
|
||||
from sqlalchemy.sql import expression
|
||||
from sqlalchemy.sql.elements import ColumnElement
|
||||
from sqlalchemy.sql.elements import KeyedColumnElement
|
||||
from sqlalchemy.sql.expression import or_
|
||||
|
||||
from onyx.auth.invited_users import remove_user_from_invited_users
|
||||
from onyx.auth.schemas import UserRole
|
||||
@@ -24,6 +26,7 @@ from onyx.db.models import Persona__User
|
||||
from onyx.db.models import SamlAccount
|
||||
from onyx.db.models import User
|
||||
from onyx.db.models import User__UserGroup
|
||||
from onyx.db.models import UserGroup
|
||||
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
|
||||
|
||||
|
||||
@@ -162,7 +165,13 @@ def _get_accepted_user_where_clause(
|
||||
where_clause.append(User.role != UserRole.EXT_PERM_USER)
|
||||
|
||||
if email_filter_string is not None:
|
||||
where_clause.append(email_col.ilike(f"%{email_filter_string}%"))
|
||||
personal_name_col: KeyedColumnElement[Any] = User.__table__.c.personal_name
|
||||
where_clause.append(
|
||||
or_(
|
||||
email_col.ilike(f"%{email_filter_string}%"),
|
||||
personal_name_col.ilike(f"%{email_filter_string}%"),
|
||||
)
|
||||
)
|
||||
|
||||
if roles_filter:
|
||||
where_clause.append(User.role.in_(roles_filter))
|
||||
@@ -173,6 +182,21 @@ def _get_accepted_user_where_clause(
|
||||
return where_clause
|
||||
|
||||
|
||||
def get_all_accepted_users(
|
||||
db_session: Session,
|
||||
include_external: bool = False,
|
||||
) -> Sequence[User]:
|
||||
"""Returns all accepted users without pagination.
|
||||
Uses the same filtering as the paginated endpoint but without
|
||||
search, role, or active filters."""
|
||||
stmt = select(User)
|
||||
where_clause = _get_accepted_user_where_clause(
|
||||
include_external=include_external,
|
||||
)
|
||||
stmt = stmt.where(*where_clause).order_by(User.email)
|
||||
return db_session.scalars(stmt).unique().all()
|
||||
|
||||
|
||||
def get_page_of_filtered_users(
|
||||
db_session: Session,
|
||||
page_size: int,
|
||||
@@ -218,6 +242,41 @@ def get_total_filtered_users_count(
|
||||
return db_session.scalar(total_count_stmt) or 0
|
||||
|
||||
|
||||
def get_user_counts_by_role_and_status(
|
||||
db_session: Session,
|
||||
) -> dict[str, dict[str, int]]:
|
||||
"""Returns user counts grouped by role and by active/inactive status.
|
||||
|
||||
Excludes API key users, anonymous users, and no-auth placeholder users.
|
||||
Uses a single query with conditional aggregation.
|
||||
"""
|
||||
base_where = _get_accepted_user_where_clause()
|
||||
role_col = User.__table__.c.role
|
||||
is_active_col = User.__table__.c.is_active
|
||||
|
||||
stmt = (
|
||||
select(
|
||||
role_col,
|
||||
func.count().label("total"),
|
||||
func.sum(case((is_active_col.is_(True), 1), else_=0)).label("active"),
|
||||
func.sum(case((is_active_col.is_(False), 1), else_=0)).label("inactive"),
|
||||
)
|
||||
.where(*base_where)
|
||||
.group_by(role_col)
|
||||
)
|
||||
|
||||
role_counts: dict[str, int] = {}
|
||||
status_counts: dict[str, int] = {"active": 0, "inactive": 0}
|
||||
|
||||
for role_val, total, active, inactive in db_session.execute(stmt).all():
|
||||
key = role_val.value if hasattr(role_val, "value") else str(role_val)
|
||||
role_counts[key] = total
|
||||
status_counts["active"] += active or 0
|
||||
status_counts["inactive"] += inactive or 0
|
||||
|
||||
return {"role_counts": role_counts, "status_counts": status_counts}
|
||||
|
||||
|
||||
def get_user_by_email(email: str, db_session: Session) -> User | None:
|
||||
user = (
|
||||
db_session.query(User)
|
||||
@@ -294,24 +353,23 @@ def batch_add_ext_perm_user_if_not_exists(
|
||||
lower_emails = [email.lower() for email in emails]
|
||||
found_users, missing_lower_emails = _get_users_by_emails(db_session, lower_emails)
|
||||
|
||||
new_users: list[User] = []
|
||||
# Use savepoints (begin_nested) so that a failed insert only rolls back
|
||||
# that single user, not the entire transaction. A plain rollback() would
|
||||
# discard all previously flushed users in the same transaction.
|
||||
# We also avoid add_all() because SQLAlchemy 2.0's insertmanyvalues
|
||||
# batch path hits a UUID sentinel mismatch with server_default columns.
|
||||
for email in missing_lower_emails:
|
||||
new_users.append(_generate_ext_permissioned_user(email=email))
|
||||
user = _generate_ext_permissioned_user(email=email)
|
||||
savepoint = db_session.begin_nested()
|
||||
try:
|
||||
db_session.add(user)
|
||||
savepoint.commit()
|
||||
except IntegrityError:
|
||||
savepoint.rollback()
|
||||
if not continue_on_error:
|
||||
raise
|
||||
|
||||
try:
|
||||
db_session.add_all(new_users)
|
||||
db_session.commit()
|
||||
except IntegrityError:
|
||||
db_session.rollback()
|
||||
if not continue_on_error:
|
||||
raise
|
||||
for user in new_users:
|
||||
try:
|
||||
db_session.add(user)
|
||||
db_session.commit()
|
||||
except IntegrityError:
|
||||
db_session.rollback()
|
||||
continue
|
||||
db_session.commit()
|
||||
# Fetch all users again to ensure we have the most up-to-date list
|
||||
all_users, _ = _get_users_by_emails(db_session, lower_emails)
|
||||
return all_users
|
||||
@@ -358,3 +416,28 @@ def delete_user_from_db(
|
||||
# NOTE: edge case may exist with race conditions
|
||||
# with this `invited user` scheme generally.
|
||||
remove_user_from_invited_users(user_to_delete.email)
|
||||
|
||||
|
||||
def batch_get_user_groups(
|
||||
db_session: Session,
|
||||
user_ids: list[UUID],
|
||||
) -> dict[UUID, list[tuple[int, str]]]:
|
||||
"""Fetch group memberships for a batch of users in a single query.
|
||||
Returns a mapping of user_id -> list of (group_id, group_name) tuples."""
|
||||
if not user_ids:
|
||||
return {}
|
||||
|
||||
rows = db_session.execute(
|
||||
select(
|
||||
User__UserGroup.user_id,
|
||||
UserGroup.id,
|
||||
UserGroup.name,
|
||||
)
|
||||
.join(UserGroup, UserGroup.id == User__UserGroup.user_group_id)
|
||||
.where(User__UserGroup.user_id.in_(user_ids))
|
||||
).all()
|
||||
|
||||
result: dict[UUID, list[tuple[int, str]]] = {uid: [] for uid in user_ids}
|
||||
for user_id, group_id, group_name in rows:
|
||||
result[user_id].append((group_id, group_name))
|
||||
return result
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import update
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.models import User
|
||||
from onyx.db.models import VoiceProvider
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
|
||||
MIN_VOICE_PLAYBACK_SPEED = 0.5
|
||||
MAX_VOICE_PLAYBACK_SPEED = 2.0
|
||||
|
||||
|
||||
def fetch_voice_providers(db_session: Session) -> list[VoiceProvider]:
|
||||
"""Fetch all voice providers."""
|
||||
return list(
|
||||
db_session.scalars(
|
||||
select(VoiceProvider)
|
||||
.where(VoiceProvider.deleted.is_(False))
|
||||
.order_by(VoiceProvider.name)
|
||||
).all()
|
||||
)
|
||||
|
||||
|
||||
def fetch_voice_provider_by_id(
|
||||
db_session: Session, provider_id: int, include_deleted: bool = False
|
||||
) -> VoiceProvider | None:
|
||||
"""Fetch a voice provider by ID."""
|
||||
stmt = select(VoiceProvider).where(VoiceProvider.id == provider_id)
|
||||
if not include_deleted:
|
||||
stmt = stmt.where(VoiceProvider.deleted.is_(False))
|
||||
return db_session.scalar(stmt)
|
||||
|
||||
|
||||
def fetch_default_stt_provider(db_session: Session) -> VoiceProvider | None:
|
||||
"""Fetch the default STT provider."""
|
||||
return db_session.scalar(
|
||||
select(VoiceProvider)
|
||||
.where(VoiceProvider.is_default_stt.is_(True))
|
||||
.where(VoiceProvider.deleted.is_(False))
|
||||
)
|
||||
|
||||
|
||||
def fetch_default_tts_provider(db_session: Session) -> VoiceProvider | None:
|
||||
"""Fetch the default TTS provider."""
|
||||
return db_session.scalar(
|
||||
select(VoiceProvider)
|
||||
.where(VoiceProvider.is_default_tts.is_(True))
|
||||
.where(VoiceProvider.deleted.is_(False))
|
||||
)
|
||||
|
||||
|
||||
def fetch_voice_provider_by_type(
|
||||
db_session: Session, provider_type: str
|
||||
) -> VoiceProvider | None:
|
||||
"""Fetch a voice provider by type."""
|
||||
return db_session.scalar(
|
||||
select(VoiceProvider)
|
||||
.where(VoiceProvider.provider_type == provider_type)
|
||||
.where(VoiceProvider.deleted.is_(False))
|
||||
)
|
||||
|
||||
|
||||
def upsert_voice_provider(
|
||||
*,
|
||||
db_session: Session,
|
||||
provider_id: int | None,
|
||||
name: str,
|
||||
provider_type: str,
|
||||
api_key: str | None,
|
||||
api_key_changed: bool,
|
||||
api_base: str | None = None,
|
||||
custom_config: dict[str, Any] | None = None,
|
||||
stt_model: str | None = None,
|
||||
tts_model: str | None = None,
|
||||
default_voice: str | None = None,
|
||||
activate_stt: bool = False,
|
||||
activate_tts: bool = False,
|
||||
) -> VoiceProvider:
|
||||
"""Create or update a voice provider."""
|
||||
provider: VoiceProvider | None = None
|
||||
|
||||
if provider_id is not None:
|
||||
provider = fetch_voice_provider_by_id(db_session, provider_id)
|
||||
if provider is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.NOT_FOUND,
|
||||
f"No voice provider with id {provider_id} exists.",
|
||||
)
|
||||
else:
|
||||
provider = VoiceProvider()
|
||||
db_session.add(provider)
|
||||
|
||||
# Apply updates
|
||||
provider.name = name
|
||||
provider.provider_type = provider_type
|
||||
provider.api_base = api_base
|
||||
provider.custom_config = custom_config
|
||||
provider.stt_model = stt_model
|
||||
provider.tts_model = tts_model
|
||||
provider.default_voice = default_voice
|
||||
|
||||
# Only update API key if explicitly changed or if provider has no key
|
||||
if api_key_changed or provider.api_key is None:
|
||||
provider.api_key = api_key # type: ignore[assignment]
|
||||
|
||||
db_session.flush()
|
||||
|
||||
if activate_stt:
|
||||
set_default_stt_provider(db_session=db_session, provider_id=provider.id)
|
||||
if activate_tts:
|
||||
set_default_tts_provider(db_session=db_session, provider_id=provider.id)
|
||||
|
||||
db_session.refresh(provider)
|
||||
return provider
|
||||
|
||||
|
||||
def delete_voice_provider(db_session: Session, provider_id: int) -> None:
|
||||
"""Soft-delete a voice provider by ID."""
|
||||
provider = fetch_voice_provider_by_id(db_session, provider_id)
|
||||
if provider:
|
||||
provider.deleted = True
|
||||
db_session.flush()
|
||||
|
||||
|
||||
def set_default_stt_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:
|
||||
"""Set a voice provider as the default STT provider."""
|
||||
provider = fetch_voice_provider_by_id(db_session, provider_id)
|
||||
if provider is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.NOT_FOUND,
|
||||
f"No voice provider with id {provider_id} exists.",
|
||||
)
|
||||
|
||||
# Deactivate all other STT providers
|
||||
db_session.execute(
|
||||
update(VoiceProvider)
|
||||
.where(
|
||||
VoiceProvider.is_default_stt.is_(True),
|
||||
VoiceProvider.id != provider_id,
|
||||
)
|
||||
.values(is_default_stt=False)
|
||||
)
|
||||
|
||||
# Activate this provider
|
||||
provider.is_default_stt = True
|
||||
|
||||
db_session.flush()
|
||||
db_session.refresh(provider)
|
||||
return provider
|
||||
|
||||
|
||||
def set_default_tts_provider(
|
||||
*, db_session: Session, provider_id: int, tts_model: str | None = None
|
||||
) -> VoiceProvider:
|
||||
"""Set a voice provider as the default TTS provider."""
|
||||
provider = fetch_voice_provider_by_id(db_session, provider_id)
|
||||
if provider is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.NOT_FOUND,
|
||||
f"No voice provider with id {provider_id} exists.",
|
||||
)
|
||||
|
||||
# Deactivate all other TTS providers
|
||||
db_session.execute(
|
||||
update(VoiceProvider)
|
||||
.where(
|
||||
VoiceProvider.is_default_tts.is_(True),
|
||||
VoiceProvider.id != provider_id,
|
||||
)
|
||||
.values(is_default_tts=False)
|
||||
)
|
||||
|
||||
# Activate this provider
|
||||
provider.is_default_tts = True
|
||||
|
||||
# Update the TTS model if specified
|
||||
if tts_model is not None:
|
||||
provider.tts_model = tts_model
|
||||
|
||||
db_session.flush()
|
||||
db_session.refresh(provider)
|
||||
return provider
|
||||
|
||||
|
||||
def deactivate_stt_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:
|
||||
"""Remove the default STT status from a voice provider."""
|
||||
provider = fetch_voice_provider_by_id(db_session, provider_id)
|
||||
if provider is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.NOT_FOUND,
|
||||
f"No voice provider with id {provider_id} exists.",
|
||||
)
|
||||
|
||||
provider.is_default_stt = False
|
||||
|
||||
db_session.flush()
|
||||
db_session.refresh(provider)
|
||||
return provider
|
||||
|
||||
|
||||
def deactivate_tts_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:
|
||||
"""Remove the default TTS status from a voice provider."""
|
||||
provider = fetch_voice_provider_by_id(db_session, provider_id)
|
||||
if provider is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.NOT_FOUND,
|
||||
f"No voice provider with id {provider_id} exists.",
|
||||
)
|
||||
|
||||
provider.is_default_tts = False
|
||||
|
||||
db_session.flush()
|
||||
db_session.refresh(provider)
|
||||
return provider
|
||||
|
||||
|
||||
# User voice preferences
|
||||
|
||||
|
||||
def update_user_voice_settings(
|
||||
db_session: Session,
|
||||
user_id: UUID,
|
||||
auto_send: bool | None = None,
|
||||
auto_playback: bool | None = None,
|
||||
playback_speed: float | None = None,
|
||||
) -> None:
|
||||
"""Update user's voice settings.
|
||||
|
||||
For all fields, None means "don't update this field".
|
||||
"""
|
||||
values: dict[str, bool | float] = {}
|
||||
|
||||
if auto_send is not None:
|
||||
values["voice_auto_send"] = auto_send
|
||||
if auto_playback is not None:
|
||||
values["voice_auto_playback"] = auto_playback
|
||||
if playback_speed is not None:
|
||||
values["voice_playback_speed"] = max(
|
||||
MIN_VOICE_PLAYBACK_SPEED, min(MAX_VOICE_PLAYBACK_SPEED, playback_speed)
|
||||
)
|
||||
|
||||
if values:
|
||||
db_session.execute(update(User).where(User.id == user_id).values(**values)) # type: ignore[arg-type]
|
||||
db_session.flush()
|
||||
@@ -66,11 +66,6 @@ class OnyxErrorCode(Enum):
|
||||
RATE_LIMITED = ("RATE_LIMITED", 429)
|
||||
SEAT_LIMIT_EXCEEDED = ("SEAT_LIMIT_EXCEEDED", 402)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Payload (413)
|
||||
# ------------------------------------------------------------------
|
||||
PAYLOAD_TOO_LARGE = ("PAYLOAD_TOO_LARGE", 413)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Connector / Credential Errors (400-range)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -3782,16 +3782,6 @@
|
||||
"display_name": "Claude Sonnet 3.5",
|
||||
"model_vendor": "anthropic"
|
||||
},
|
||||
"vertex_ai/claude-3-5-sonnet-v2": {
|
||||
"display_name": "Claude Sonnet 3.5",
|
||||
"model_vendor": "anthropic",
|
||||
"model_version": "v2"
|
||||
},
|
||||
"vertex_ai/claude-3-5-sonnet-v2@20241022": {
|
||||
"display_name": "Claude Sonnet 3.5 v2",
|
||||
"model_vendor": "anthropic",
|
||||
"model_version": "20241022"
|
||||
},
|
||||
"vertex_ai/claude-3-5-sonnet@20240620": {
|
||||
"display_name": "Claude Sonnet 3.5",
|
||||
"model_vendor": "anthropic",
|
||||
|
||||
@@ -119,9 +119,6 @@ from onyx.server.manage.opensearch_migration.api import (
|
||||
from onyx.server.manage.search_settings import router as search_settings_router
|
||||
from onyx.server.manage.slack_bot import router as slack_bot_management_router
|
||||
from onyx.server.manage.users import router as user_router
|
||||
from onyx.server.manage.voice.api import admin_router as voice_admin_router
|
||||
from onyx.server.manage.voice.user_api import router as voice_router
|
||||
from onyx.server.manage.voice.websocket_api import router as voice_websocket_router
|
||||
from onyx.server.manage.web_search.api import (
|
||||
admin_router as web_search_admin_router,
|
||||
)
|
||||
@@ -500,9 +497,6 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
|
||||
include_router_with_global_prefix_prepended(application, embedding_router)
|
||||
include_router_with_global_prefix_prepended(application, web_search_router)
|
||||
include_router_with_global_prefix_prepended(application, web_search_admin_router)
|
||||
include_router_with_global_prefix_prepended(application, voice_admin_router)
|
||||
include_router_with_global_prefix_prepended(application, voice_router)
|
||||
include_router_with_global_prefix_prepended(application, voice_websocket_router)
|
||||
include_router_with_global_prefix_prepended(
|
||||
application, opensearch_migration_admin_router
|
||||
)
|
||||
|
||||
@@ -76,11 +76,39 @@ def get_feedback_reminder_blocks(thread_link: str, include_followup: bool) -> Bl
|
||||
return SectionBlock(text=text)
|
||||
|
||||
|
||||
def _find_unclosed_fence(text: str) -> tuple[bool, int, str]:
|
||||
"""Scan *text* line-by-line to determine code-fence state.
|
||||
|
||||
Returns (is_open, fence_line_start, lang) where:
|
||||
- *is_open* is True when the text ends inside an unclosed code fence
|
||||
- *fence_line_start* is the char offset of the opening fence line
|
||||
(only meaningful when *is_open* is True)
|
||||
- *lang* is the language specifier on the opening fence (e.g. "python")
|
||||
"""
|
||||
in_fence = False
|
||||
fence_start = 0
|
||||
lang = ""
|
||||
offset = 0
|
||||
for line in text.splitlines(True): # keep line endings
|
||||
# Slack only treats ``` as a fence when it starts at column 0.
|
||||
# Indented backticks (e.g. inside a heredoc) are content, not fences.
|
||||
if line.startswith("```"):
|
||||
if not in_fence:
|
||||
in_fence = True
|
||||
fence_start = offset
|
||||
lang = line[3:].strip()
|
||||
else:
|
||||
in_fence = False
|
||||
lang = ""
|
||||
offset += len(line)
|
||||
return in_fence, fence_start, lang
|
||||
|
||||
|
||||
def _split_text(text: str, limit: int = 3000) -> list[str]:
|
||||
if len(text) <= limit:
|
||||
return [text]
|
||||
|
||||
chunks = []
|
||||
chunks: list[str] = []
|
||||
while text:
|
||||
if len(text) <= limit:
|
||||
chunks.append(text)
|
||||
@@ -92,8 +120,37 @@ def _split_text(text: str, limit: int = 3000) -> list[str]:
|
||||
split_at = limit
|
||||
|
||||
chunk = text[:split_at]
|
||||
|
||||
# Check whether the chunk ends inside an unclosed code fence.
|
||||
is_open, fence_start, lang = _find_unclosed_fence(chunk)
|
||||
if is_open:
|
||||
# Tier 1: try to back up to before the opening fence so the
|
||||
# entire code block stays in the next chunk.
|
||||
split_before = text.rfind("\n", 0, fence_start)
|
||||
if split_before > 0 and text[:split_before].strip():
|
||||
chunk = text[:split_before]
|
||||
remainder = text[split_before:]
|
||||
# Strip only the leading newline to preserve blank lines
|
||||
# and formatting before the code fence.
|
||||
if remainder and remainder[0] == "\n":
|
||||
remainder = remainder[1:]
|
||||
text = remainder
|
||||
else:
|
||||
# Tier 2: the code block itself exceeds the limit — split
|
||||
# inside it. Close the fence here, reopen in the next.
|
||||
chunk += "\n```"
|
||||
remainder = text[split_at:]
|
||||
# Strip only the single boundary character (space/newline)
|
||||
# to avoid eating meaningful indentation inside code.
|
||||
if remainder and remainder[0] in " \n":
|
||||
remainder = remainder[1:]
|
||||
text = f"```{lang}\n" + remainder
|
||||
else:
|
||||
# No unclosed fence — plain prose split. Leading whitespace
|
||||
# is cosmetic in Slack mrkdwn, so lstrip() is safe here.
|
||||
text = text[split_at:].lstrip()
|
||||
|
||||
chunks.append(chunk)
|
||||
text = text[split_at:].lstrip() # Remove leading spaces from the next chunk
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import re
|
||||
from enum import Enum
|
||||
|
||||
# Matches Slack channel references like <#C097NBWMY8Y> or <#C097NBWMY8Y|channel-name>
|
||||
SLACK_CHANNEL_REF_PATTERN = re.compile(r"<#([A-Z0-9]+)(?:\|([^>]+))?>")
|
||||
|
||||
LIKE_BLOCK_ACTION_ID = "feedback-like"
|
||||
DISLIKE_BLOCK_ACTION_ID = "feedback-dislike"
|
||||
SHOW_EVERYONE_ACTION_ID = "show-everyone"
|
||||
|
||||
@@ -18,15 +18,18 @@ from onyx.configs.onyxbot_configs import ONYX_BOT_DISPLAY_ERROR_MSGS
|
||||
from onyx.configs.onyxbot_configs import ONYX_BOT_NUM_RETRIES
|
||||
from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.models import Tag
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.models import SlackChannelConfig
|
||||
from onyx.db.models import User
|
||||
from onyx.db.persona import get_persona_by_id
|
||||
from onyx.db.users import get_user_by_email
|
||||
from onyx.onyxbot.slack.blocks import build_slack_response_blocks
|
||||
from onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN
|
||||
from onyx.onyxbot.slack.handlers.utils import send_team_member_message
|
||||
from onyx.onyxbot.slack.models import SlackMessageInfo
|
||||
from onyx.onyxbot.slack.models import ThreadMessage
|
||||
from onyx.onyxbot.slack.utils import get_channel_from_id
|
||||
from onyx.onyxbot.slack.utils import get_channel_name_from_id
|
||||
from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
|
||||
from onyx.onyxbot.slack.utils import SlackRateLimiter
|
||||
@@ -41,6 +44,51 @@ srl = SlackRateLimiter()
|
||||
RT = TypeVar("RT") # return type
|
||||
|
||||
|
||||
def resolve_channel_references(
|
||||
message: str,
|
||||
client: WebClient,
|
||||
logger: OnyxLoggingAdapter,
|
||||
) -> tuple[str, list[Tag]]:
|
||||
"""Parse Slack channel references from a message, resolve IDs to names,
|
||||
replace the raw markup with readable #channel-name, and return channel tags
|
||||
for search filtering."""
|
||||
tags: list[Tag] = []
|
||||
channel_matches = SLACK_CHANNEL_REF_PATTERN.findall(message)
|
||||
seen_channel_ids: set[str] = set()
|
||||
|
||||
for channel_id, channel_name_from_markup in channel_matches:
|
||||
if channel_id in seen_channel_ids:
|
||||
continue
|
||||
seen_channel_ids.add(channel_id)
|
||||
|
||||
channel_name = channel_name_from_markup or None
|
||||
|
||||
if not channel_name:
|
||||
try:
|
||||
channel_info = get_channel_from_id(client=client, channel_id=channel_id)
|
||||
channel_name = channel_info.get("name") or None
|
||||
except Exception:
|
||||
logger.warning(f"Failed to resolve channel name for ID: {channel_id}")
|
||||
|
||||
if not channel_name:
|
||||
continue
|
||||
|
||||
# Replace raw Slack markup with readable channel name
|
||||
if channel_name_from_markup:
|
||||
message = message.replace(
|
||||
f"<#{channel_id}|{channel_name_from_markup}>",
|
||||
f"#{channel_name}",
|
||||
)
|
||||
else:
|
||||
message = message.replace(
|
||||
f"<#{channel_id}>",
|
||||
f"#{channel_name}",
|
||||
)
|
||||
tags.append(Tag(tag_key="Channel", tag_value=channel_name))
|
||||
|
||||
return message, tags
|
||||
|
||||
|
||||
def rate_limits(
|
||||
client: WebClient, channel: str, thread_ts: Optional[str]
|
||||
) -> Callable[[Callable[..., RT]], Callable[..., RT]]:
|
||||
@@ -157,6 +205,20 @@ def handle_regular_answer(
|
||||
user_message = messages[-1]
|
||||
history_messages = messages[:-1]
|
||||
|
||||
# Resolve any <#CHANNEL_ID> references in the user message to readable
|
||||
# channel names and extract channel tags for search filtering
|
||||
resolved_message, channel_tags = resolve_channel_references(
|
||||
message=user_message.message,
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
user_message = ThreadMessage(
|
||||
message=resolved_message,
|
||||
sender=user_message.sender,
|
||||
role=user_message.role,
|
||||
)
|
||||
|
||||
channel_name, _ = get_channel_name_from_id(
|
||||
client=client,
|
||||
channel_id=channel,
|
||||
@@ -207,6 +269,7 @@ def handle_regular_answer(
|
||||
source_type=None,
|
||||
document_set=document_set_names,
|
||||
time_cutoff=None,
|
||||
tags=channel_tags if channel_tags else None,
|
||||
)
|
||||
|
||||
new_message_request = SendMessageRequest(
|
||||
@@ -231,6 +294,16 @@ def handle_regular_answer(
|
||||
slack_context_str=slack_context_str,
|
||||
)
|
||||
|
||||
# If a channel filter was applied but no results were found, override
|
||||
# the LLM response to avoid hallucinated answers about unindexed channels
|
||||
if channel_tags and not answer.citation_info and not answer.top_documents:
|
||||
channel_names = ", ".join(f"#{tag.tag_value}" for tag in channel_tags)
|
||||
answer.answer = (
|
||||
f"No indexed data found for {channel_names}. "
|
||||
"This channel may not be indexed, or there may be no messages "
|
||||
"matching your query within it."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Unable to process message - did not successfully answer "
|
||||
@@ -285,6 +358,7 @@ def handle_regular_answer(
|
||||
only_respond_if_citations
|
||||
and not answer.citation_info
|
||||
and not message_info.bypass_filters
|
||||
and not channel_tags
|
||||
):
|
||||
logger.error(
|
||||
f"Unable to find citations to answer: '{answer.answer}' - not answering!"
|
||||
|
||||
@@ -419,15 +419,12 @@ async def get_async_redis_connection() -> aioredis.Redis:
|
||||
return _async_redis_connection
|
||||
|
||||
|
||||
async def retrieve_auth_token_data(token: str) -> dict | None:
|
||||
"""Validate auth token against Redis and return token data.
|
||||
async def retrieve_auth_token_data_from_redis(request: Request) -> dict | None:
|
||||
token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)
|
||||
if not token:
|
||||
logger.debug("No auth token cookie found")
|
||||
return None
|
||||
|
||||
Args:
|
||||
token: The raw authentication token string.
|
||||
|
||||
Returns:
|
||||
Token data dict if valid, None if invalid/expired.
|
||||
"""
|
||||
try:
|
||||
redis = await get_async_redis_connection()
|
||||
redis_key = REDIS_AUTH_KEY_PREFIX + token
|
||||
@@ -442,96 +439,12 @@ async def retrieve_auth_token_data(token: str) -> dict | None:
|
||||
logger.error("Error decoding token data from Redis")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in retrieve_auth_token_data: {str(e)}")
|
||||
raise ValueError(f"Unexpected error in retrieve_auth_token_data: {str(e)}")
|
||||
|
||||
|
||||
async def retrieve_auth_token_data_from_redis(request: Request) -> dict | None:
|
||||
"""Validate auth token from request cookie. Wrapper for backwards compatibility."""
|
||||
token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)
|
||||
if not token:
|
||||
logger.debug("No auth token cookie found")
|
||||
return None
|
||||
return await retrieve_auth_token_data(token)
|
||||
|
||||
|
||||
# WebSocket token prefix (separate from regular auth tokens)
|
||||
REDIS_WS_TOKEN_PREFIX = "ws_token:"
|
||||
# WebSocket tokens expire after 60 seconds
|
||||
WS_TOKEN_TTL_SECONDS = 60
|
||||
# Rate limit: max tokens per user per window
|
||||
WS_TOKEN_RATE_LIMIT_MAX = 10
|
||||
WS_TOKEN_RATE_LIMIT_WINDOW_SECONDS = 60
|
||||
REDIS_WS_TOKEN_RATE_LIMIT_PREFIX = "ws_token_rate:"
|
||||
|
||||
|
||||
class WsTokenRateLimitExceeded(Exception):
|
||||
"""Raised when a user exceeds the WS token generation rate limit."""
|
||||
|
||||
|
||||
async def store_ws_token(token: str, user_id: str) -> None:
|
||||
"""Store a short-lived WebSocket authentication token in Redis.
|
||||
|
||||
Args:
|
||||
token: The generated WS token.
|
||||
user_id: The user ID to associate with this token.
|
||||
|
||||
Raises:
|
||||
WsTokenRateLimitExceeded: If the user has exceeded the rate limit.
|
||||
"""
|
||||
redis = await get_async_redis_connection()
|
||||
|
||||
# Atomically increment and check rate limit to avoid TOCTOU races
|
||||
rate_limit_key = REDIS_WS_TOKEN_RATE_LIMIT_PREFIX + user_id
|
||||
pipe = redis.pipeline()
|
||||
pipe.incr(rate_limit_key)
|
||||
pipe.expire(rate_limit_key, WS_TOKEN_RATE_LIMIT_WINDOW_SECONDS)
|
||||
results = await pipe.execute()
|
||||
new_count = results[0]
|
||||
|
||||
if new_count > WS_TOKEN_RATE_LIMIT_MAX:
|
||||
# Over limit — decrement back since we won't use this slot
|
||||
await redis.decr(rate_limit_key)
|
||||
logger.warning(f"WS token rate limit exceeded for user {user_id}")
|
||||
raise WsTokenRateLimitExceeded(
|
||||
f"Rate limit exceeded. Maximum {WS_TOKEN_RATE_LIMIT_MAX} tokens per minute."
|
||||
logger.error(
|
||||
f"Unexpected error in retrieve_auth_token_data_from_redis: {str(e)}"
|
||||
)
|
||||
raise ValueError(
|
||||
f"Unexpected error in retrieve_auth_token_data_from_redis: {str(e)}"
|
||||
)
|
||||
|
||||
# Store the actual token
|
||||
redis_key = REDIS_WS_TOKEN_PREFIX + token
|
||||
token_data = json.dumps({"sub": user_id})
|
||||
await redis.set(redis_key, token_data, ex=WS_TOKEN_TTL_SECONDS)
|
||||
|
||||
|
||||
async def retrieve_ws_token_data(token: str) -> dict | None:
|
||||
"""Validate a WebSocket token and return the token data.
|
||||
|
||||
This uses GETDEL for atomic get-and-delete to prevent race conditions
|
||||
where the same token could be used twice.
|
||||
|
||||
Args:
|
||||
token: The WS token to validate.
|
||||
|
||||
Returns:
|
||||
Token data dict with 'sub' (user ID) if valid, None if invalid/expired.
|
||||
"""
|
||||
try:
|
||||
redis = await get_async_redis_connection()
|
||||
redis_key = REDIS_WS_TOKEN_PREFIX + token
|
||||
|
||||
# Atomic get-and-delete to prevent race conditions (Redis 6.2+)
|
||||
token_data_str = await redis.getdel(redis_key)
|
||||
|
||||
if not token_data_str:
|
||||
return None
|
||||
|
||||
return json.loads(token_data_str)
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Error decoding WS token data from Redis")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in retrieve_ws_token_data: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def redis_lock_dump(lock: RedisLock, r: Redis) -> None:
|
||||
|
||||
@@ -9,7 +9,6 @@ from onyx.auth.users import current_chat_accessible_user
|
||||
from onyx.auth.users import current_curator_or_admin_user
|
||||
from onyx.auth.users import current_limited_user
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.auth.users import current_user_from_websocket
|
||||
from onyx.auth.users import current_user_with_expired_token
|
||||
from onyx.configs.app_configs import APP_API_PREFIX
|
||||
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
|
||||
@@ -130,7 +129,6 @@ def check_router_auth(
|
||||
or depends_fn == current_curator_or_admin_user
|
||||
or depends_fn == current_user_with_expired_token
|
||||
or depends_fn == current_chat_accessible_user
|
||||
or depends_fn == current_user_from_websocket
|
||||
or depends_fn == control_plane_dep
|
||||
or depends_fn == current_cloud_superuser
|
||||
or depends_fn == verify_scim_token
|
||||
|
||||
@@ -85,11 +85,6 @@ class UserPreferences(BaseModel):
|
||||
chat_background: str | None = None
|
||||
default_app_mode: DefaultAppMode = DefaultAppMode.CHAT
|
||||
|
||||
# Voice preferences
|
||||
voice_auto_send: bool | None = None
|
||||
voice_auto_playback: bool | None = None
|
||||
voice_playback_speed: float | None = None
|
||||
|
||||
# controls which tools are enabled for the user for a specific assistant
|
||||
assistant_specific_configs: UserSpecificAssistantPreferences | None = None
|
||||
|
||||
@@ -169,9 +164,6 @@ class UserInfo(BaseModel):
|
||||
theme_preference=user.theme_preference,
|
||||
chat_background=user.chat_background,
|
||||
default_app_mode=user.default_app_mode,
|
||||
voice_auto_send=user.voice_auto_send,
|
||||
voice_auto_playback=user.voice_auto_playback,
|
||||
voice_playback_speed=user.voice_playback_speed,
|
||||
assistant_specific_configs=assistant_specific_configs,
|
||||
)
|
||||
),
|
||||
@@ -248,12 +240,6 @@ class ChatBackgroundRequest(BaseModel):
|
||||
chat_background: str | None
|
||||
|
||||
|
||||
class VoiceSettingsUpdateRequest(BaseModel):
|
||||
auto_send: bool | None = None
|
||||
auto_playback: bool | None = None
|
||||
playback_speed: float | None = Field(default=None, ge=0.5, le=2.0)
|
||||
|
||||
|
||||
class PersonalizationUpdateRequest(BaseModel):
|
||||
name: str | None = None
|
||||
role: str | None = None
|
||||
|
||||
@@ -5,6 +5,7 @@ from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from typing import cast
|
||||
from uuid import UUID
|
||||
|
||||
import jwt
|
||||
from email_validator import EmailNotValidError
|
||||
@@ -18,6 +19,7 @@ from fastapi import Query
|
||||
from fastapi import Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.anonymous_user import fetch_anonymous_user_info
|
||||
@@ -67,11 +69,14 @@ from onyx.db.user_preferences import update_user_role
|
||||
from onyx.db.user_preferences import update_user_shortcut_enabled
|
||||
from onyx.db.user_preferences import update_user_temperature_override_enabled
|
||||
from onyx.db.user_preferences import update_user_theme_preference
|
||||
from onyx.db.users import batch_get_user_groups
|
||||
from onyx.db.users import delete_user_from_db
|
||||
from onyx.db.users import get_all_accepted_users
|
||||
from onyx.db.users import get_all_users
|
||||
from onyx.db.users import get_page_of_filtered_users
|
||||
from onyx.db.users import get_total_filtered_users_count
|
||||
from onyx.db.users import get_user_by_email
|
||||
from onyx.db.users import get_user_counts_by_role_and_status
|
||||
from onyx.db.users import validate_user_role_update
|
||||
from onyx.key_value_store.factory import get_kv_store
|
||||
from onyx.redis.redis_pool import get_raw_redis_client
|
||||
@@ -98,6 +103,7 @@ from onyx.server.manage.models import UserSpecificAssistantPreferences
|
||||
from onyx.server.models import FullUserSnapshot
|
||||
from onyx.server.models import InvitedUserSnapshot
|
||||
from onyx.server.models import MinimalUserSnapshot
|
||||
from onyx.server.models import UserGroupInfo
|
||||
from onyx.server.usage_limits import is_tenant_on_trial_fn
|
||||
from onyx.server.utils import BasicAuthenticationError
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -203,14 +209,91 @@ def list_accepted_users(
|
||||
total_items=0,
|
||||
)
|
||||
|
||||
user_ids = [user.id for user in filtered_accepted_users]
|
||||
groups_by_user = batch_get_user_groups(db_session, user_ids)
|
||||
|
||||
# Batch-fetch SCIM mappings to mark synced users
|
||||
scim_synced_ids: set[UUID] = set()
|
||||
try:
|
||||
from onyx.db.models import ScimUserMapping
|
||||
|
||||
scim_mappings = db_session.scalars(
|
||||
select(ScimUserMapping.user_id).where(ScimUserMapping.user_id.in_(user_ids))
|
||||
).all()
|
||||
scim_synced_ids = set(scim_mappings)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to fetch SCIM mappings; marking all users as non-synced",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
return PaginatedReturn(
|
||||
items=[
|
||||
FullUserSnapshot.from_user_model(user) for user in filtered_accepted_users
|
||||
FullUserSnapshot.from_user_model(
|
||||
user,
|
||||
groups=[
|
||||
UserGroupInfo(id=gid, name=gname)
|
||||
for gid, gname in groups_by_user.get(user.id, [])
|
||||
],
|
||||
is_scim_synced=user.id in scim_synced_ids,
|
||||
)
|
||||
for user in filtered_accepted_users
|
||||
],
|
||||
total_items=total_accepted_users_count,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/manage/users/accepted/all", tags=PUBLIC_API_TAGS)
|
||||
def list_all_accepted_users(
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[FullUserSnapshot]:
|
||||
"""Returns all accepted users without pagination.
|
||||
Used by the admin Users page for client-side filtering/sorting."""
|
||||
users = get_all_accepted_users(db_session=db_session)
|
||||
|
||||
if not users:
|
||||
return []
|
||||
|
||||
user_ids = [user.id for user in users]
|
||||
groups_by_user = batch_get_user_groups(db_session, user_ids)
|
||||
|
||||
# Batch-fetch SCIM mappings to mark synced users
|
||||
scim_synced_ids: set[UUID] = set()
|
||||
try:
|
||||
from onyx.db.models import ScimUserMapping
|
||||
|
||||
scim_mappings = db_session.scalars(
|
||||
select(ScimUserMapping.user_id).where(ScimUserMapping.user_id.in_(user_ids))
|
||||
).all()
|
||||
scim_synced_ids = set(scim_mappings)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to fetch SCIM mappings; marking all users as non-synced",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
return [
|
||||
FullUserSnapshot.from_user_model(
|
||||
user,
|
||||
groups=[
|
||||
UserGroupInfo(id=gid, name=gname)
|
||||
for gid, gname in groups_by_user.get(user.id, [])
|
||||
],
|
||||
is_scim_synced=user.id in scim_synced_ids,
|
||||
)
|
||||
for user in users
|
||||
]
|
||||
|
||||
|
||||
@router.get("/manage/users/counts")
|
||||
def get_user_counts(
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> dict[str, dict[str, int]]:
|
||||
return get_user_counts_by_role_and_status(db_session)
|
||||
|
||||
|
||||
@router.get("/manage/users/invited", tags=PUBLIC_API_TAGS)
|
||||
def list_invited_users(
|
||||
_: User = Depends(current_admin_user),
|
||||
@@ -269,24 +352,10 @@ def list_all_users(
|
||||
if accepted_page is None or invited_page is None or slack_users_page is None:
|
||||
return AllUsersResponse(
|
||||
accepted=[
|
||||
FullUserSnapshot(
|
||||
id=user.id,
|
||||
email=user.email,
|
||||
role=user.role,
|
||||
is_active=user.is_active,
|
||||
password_configured=user.password_configured,
|
||||
)
|
||||
for user in accepted_users
|
||||
FullUserSnapshot.from_user_model(user) for user in accepted_users
|
||||
],
|
||||
slack_users=[
|
||||
FullUserSnapshot(
|
||||
id=user.id,
|
||||
email=user.email,
|
||||
role=user.role,
|
||||
is_active=user.is_active,
|
||||
password_configured=user.password_configured,
|
||||
)
|
||||
for user in slack_users
|
||||
FullUserSnapshot.from_user_model(user) for user in slack_users
|
||||
],
|
||||
invited=[InvitedUserSnapshot(email=email) for email in invited_emails],
|
||||
accepted_pages=1,
|
||||
@@ -296,26 +365,10 @@ def list_all_users(
|
||||
|
||||
# Otherwise, return paginated results
|
||||
return AllUsersResponse(
|
||||
accepted=[
|
||||
FullUserSnapshot(
|
||||
id=user.id,
|
||||
email=user.email,
|
||||
role=user.role,
|
||||
is_active=user.is_active,
|
||||
password_configured=user.password_configured,
|
||||
)
|
||||
for user in accepted_users
|
||||
][accepted_page * USERS_PAGE_SIZE : (accepted_page + 1) * USERS_PAGE_SIZE],
|
||||
slack_users=[
|
||||
FullUserSnapshot(
|
||||
id=user.id,
|
||||
email=user.email,
|
||||
role=user.role,
|
||||
is_active=user.is_active,
|
||||
password_configured=user.password_configured,
|
||||
)
|
||||
for user in slack_users
|
||||
][
|
||||
accepted=[FullUserSnapshot.from_user_model(user) for user in accepted_users][
|
||||
accepted_page * USERS_PAGE_SIZE : (accepted_page + 1) * USERS_PAGE_SIZE
|
||||
],
|
||||
slack_users=[FullUserSnapshot.from_user_model(user) for user in slack_users][
|
||||
slack_users_page
|
||||
* USERS_PAGE_SIZE : (slack_users_page + 1)
|
||||
* USERS_PAGE_SIZE
|
||||
|
||||
@@ -1,315 +0,0 @@
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import Response
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import LLMProvider as LLMProviderModel
|
||||
from onyx.db.models import User
|
||||
from onyx.db.models import VoiceProvider
|
||||
from onyx.db.voice import deactivate_stt_provider
|
||||
from onyx.db.voice import deactivate_tts_provider
|
||||
from onyx.db.voice import delete_voice_provider
|
||||
from onyx.db.voice import fetch_voice_provider_by_id
|
||||
from onyx.db.voice import fetch_voice_provider_by_type
|
||||
from onyx.db.voice import fetch_voice_providers
|
||||
from onyx.db.voice import set_default_stt_provider
|
||||
from onyx.db.voice import set_default_tts_provider
|
||||
from onyx.db.voice import upsert_voice_provider
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.server.manage.voice.models import VoiceOption
|
||||
from onyx.server.manage.voice.models import VoiceProviderTestRequest
|
||||
from onyx.server.manage.voice.models import VoiceProviderUpdateSuccess
|
||||
from onyx.server.manage.voice.models import VoiceProviderUpsertRequest
|
||||
from onyx.server.manage.voice.models import VoiceProviderView
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.url import SSRFException
|
||||
from onyx.utils.url import validate_outbound_http_url
|
||||
from onyx.voice.factory import get_voice_provider
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
admin_router = APIRouter(prefix="/admin/voice")
|
||||
|
||||
|
||||
def _validate_voice_api_base(provider_type: str, api_base: str | None) -> str | None:
|
||||
"""Validate and normalize provider api_base / target URI."""
|
||||
if api_base is None:
|
||||
return None
|
||||
|
||||
allow_private_network = provider_type.lower() == "azure"
|
||||
try:
|
||||
return validate_outbound_http_url(
|
||||
api_base, allow_private_network=allow_private_network
|
||||
)
|
||||
except (ValueError, SSRFException) as e:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
f"Invalid target URI: {str(e)}",
|
||||
) from e
|
||||
|
||||
|
||||
def _provider_to_view(provider: VoiceProvider) -> VoiceProviderView:
|
||||
"""Convert a VoiceProvider model to a VoiceProviderView."""
|
||||
return VoiceProviderView(
|
||||
id=provider.id,
|
||||
name=provider.name,
|
||||
provider_type=provider.provider_type,
|
||||
is_default_stt=provider.is_default_stt,
|
||||
is_default_tts=provider.is_default_tts,
|
||||
stt_model=provider.stt_model,
|
||||
tts_model=provider.tts_model,
|
||||
default_voice=provider.default_voice,
|
||||
has_api_key=bool(provider.api_key),
|
||||
target_uri=provider.api_base, # api_base stores the target URI for Azure
|
||||
)
|
||||
|
||||
|
||||
@admin_router.get("/providers")
|
||||
def list_voice_providers(
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[VoiceProviderView]:
|
||||
"""List all configured voice providers."""
|
||||
providers = fetch_voice_providers(db_session)
|
||||
return [_provider_to_view(provider) for provider in providers]
|
||||
|
||||
|
||||
@admin_router.post("/providers")
|
||||
async def upsert_voice_provider_endpoint(
|
||||
request: VoiceProviderUpsertRequest,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> VoiceProviderView:
|
||||
"""Create or update a voice provider."""
|
||||
api_key = request.api_key
|
||||
api_key_changed = request.api_key_changed
|
||||
|
||||
# If llm_provider_id is specified, copy the API key from that LLM provider
|
||||
if request.llm_provider_id is not None:
|
||||
llm_provider = db_session.get(LLMProviderModel, request.llm_provider_id)
|
||||
if llm_provider is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.NOT_FOUND,
|
||||
f"LLM provider with id {request.llm_provider_id} not found.",
|
||||
)
|
||||
if llm_provider.api_key is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Selected LLM provider has no API key configured.",
|
||||
)
|
||||
api_key = llm_provider.api_key.get_value(apply_mask=False)
|
||||
api_key_changed = True
|
||||
|
||||
# Use target_uri if provided, otherwise fall back to api_base
|
||||
api_base = _validate_voice_api_base(
|
||||
request.provider_type, request.target_uri or request.api_base
|
||||
)
|
||||
|
||||
provider = upsert_voice_provider(
|
||||
db_session=db_session,
|
||||
provider_id=request.id,
|
||||
name=request.name,
|
||||
provider_type=request.provider_type,
|
||||
api_key=api_key,
|
||||
api_key_changed=api_key_changed,
|
||||
api_base=api_base,
|
||||
custom_config=request.custom_config,
|
||||
stt_model=request.stt_model,
|
||||
tts_model=request.tts_model,
|
||||
default_voice=request.default_voice,
|
||||
activate_stt=request.activate_stt,
|
||||
activate_tts=request.activate_tts,
|
||||
)
|
||||
|
||||
# Validate credentials before committing - rollback on failure
|
||||
try:
|
||||
voice_provider = get_voice_provider(provider)
|
||||
await voice_provider.validate_credentials()
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
logger.error(f"Voice provider credential validation failed on save: {e}")
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Connection test failed. Please verify your API key and settings.",
|
||||
) from e
|
||||
|
||||
db_session.commit()
|
||||
|
||||
return _provider_to_view(provider)
|
||||
|
||||
|
||||
@admin_router.delete(
|
||||
"/providers/{provider_id}", status_code=204, response_class=Response
|
||||
)
|
||||
def delete_voice_provider_endpoint(
|
||||
provider_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> Response:
|
||||
"""Delete a voice provider."""
|
||||
delete_voice_provider(db_session, provider_id)
|
||||
db_session.commit()
|
||||
return Response(status_code=204)
|
||||
|
||||
|
||||
@admin_router.post("/providers/{provider_id}/activate-stt")
|
||||
def activate_stt_provider_endpoint(
|
||||
provider_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> VoiceProviderView:
|
||||
"""Set a voice provider as the default STT provider."""
|
||||
provider = set_default_stt_provider(db_session=db_session, provider_id=provider_id)
|
||||
db_session.commit()
|
||||
return _provider_to_view(provider)
|
||||
|
||||
|
||||
@admin_router.post("/providers/{provider_id}/deactivate-stt")
|
||||
def deactivate_stt_provider_endpoint(
|
||||
provider_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> VoiceProviderUpdateSuccess:
|
||||
"""Remove the default STT status from a voice provider."""
|
||||
deactivate_stt_provider(db_session=db_session, provider_id=provider_id)
|
||||
db_session.commit()
|
||||
return VoiceProviderUpdateSuccess()
|
||||
|
||||
|
||||
@admin_router.post("/providers/{provider_id}/activate-tts")
|
||||
def activate_tts_provider_endpoint(
|
||||
provider_id: int,
|
||||
tts_model: str | None = None,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> VoiceProviderView:
|
||||
"""Set a voice provider as the default TTS provider."""
|
||||
provider = set_default_tts_provider(
|
||||
db_session=db_session, provider_id=provider_id, tts_model=tts_model
|
||||
)
|
||||
db_session.commit()
|
||||
return _provider_to_view(provider)
|
||||
|
||||
|
||||
@admin_router.post("/providers/{provider_id}/deactivate-tts")
|
||||
def deactivate_tts_provider_endpoint(
|
||||
provider_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> VoiceProviderUpdateSuccess:
|
||||
"""Remove the default TTS status from a voice provider."""
|
||||
deactivate_tts_provider(db_session=db_session, provider_id=provider_id)
|
||||
db_session.commit()
|
||||
return VoiceProviderUpdateSuccess()
|
||||
|
||||
|
||||
@admin_router.post("/providers/test")
|
||||
async def test_voice_provider(
|
||||
request: VoiceProviderTestRequest,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> VoiceProviderUpdateSuccess:
|
||||
"""Test a voice provider connection by making a real API call."""
|
||||
api_key = request.api_key
|
||||
|
||||
if request.use_stored_key:
|
||||
existing_provider = fetch_voice_provider_by_type(
|
||||
db_session, request.provider_type
|
||||
)
|
||||
if existing_provider is None or not existing_provider.api_key:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"No stored API key found for this provider type.",
|
||||
)
|
||||
api_key = existing_provider.api_key.get_value(apply_mask=False)
|
||||
|
||||
if not api_key:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"API key is required. Either provide api_key or set use_stored_key to true.",
|
||||
)
|
||||
|
||||
# Use target_uri if provided, otherwise fall back to api_base
|
||||
api_base = _validate_voice_api_base(
|
||||
request.provider_type, request.target_uri or request.api_base
|
||||
)
|
||||
|
||||
# Create a temporary VoiceProvider for testing (not saved to DB)
|
||||
temp_provider = VoiceProvider(
|
||||
name="__test__",
|
||||
provider_type=request.provider_type,
|
||||
api_base=api_base,
|
||||
custom_config=request.custom_config or {},
|
||||
)
|
||||
temp_provider.api_key = api_key # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
provider = get_voice_provider(temp_provider)
|
||||
except ValueError as exc:
|
||||
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc
|
||||
|
||||
# Validate credentials with a real API call
|
||||
try:
|
||||
await provider.validate_credentials()
|
||||
except OnyxError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Voice provider connection test failed: {e}")
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Connection test failed. Please verify your API key and settings.",
|
||||
) from e
|
||||
|
||||
logger.info(f"Voice provider test succeeded for {request.provider_type}.")
|
||||
return VoiceProviderUpdateSuccess()
|
||||
|
||||
|
||||
@admin_router.get("/providers/{provider_id}/voices")
|
||||
def get_provider_voices(
|
||||
provider_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[VoiceOption]:
|
||||
"""Get available voices for a provider."""
|
||||
provider_db = fetch_voice_provider_by_id(db_session, provider_id)
|
||||
if provider_db is None:
|
||||
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Voice provider not found.")
|
||||
|
||||
if not provider_db.api_key:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR, "Provider has no API key configured."
|
||||
)
|
||||
|
||||
try:
|
||||
provider = get_voice_provider(provider_db)
|
||||
except ValueError as exc:
|
||||
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc
|
||||
|
||||
return [VoiceOption(**voice) for voice in provider.get_available_voices()]
|
||||
|
||||
|
||||
@admin_router.get("/voices")
|
||||
def get_voices_by_type(
|
||||
provider_type: str,
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> list[VoiceOption]:
|
||||
"""Get available voices for a provider type.
|
||||
|
||||
For providers like ElevenLabs and OpenAI, this fetches voices
|
||||
without requiring an existing provider configuration.
|
||||
"""
|
||||
# Create a temporary VoiceProvider to get static voice list
|
||||
temp_provider = VoiceProvider(
|
||||
name="__temp__",
|
||||
provider_type=provider_type,
|
||||
)
|
||||
|
||||
try:
|
||||
provider = get_voice_provider(temp_provider)
|
||||
except ValueError as exc:
|
||||
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc
|
||||
|
||||
return [VoiceOption(**voice) for voice in provider.get_available_voices()]
|
||||
@@ -1,95 +0,0 @@
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
|
||||
|
||||
class VoiceProviderView(BaseModel):
|
||||
"""Response model for voice provider listing."""
|
||||
|
||||
id: int
|
||||
name: str
|
||||
provider_type: str # "openai", "azure", "elevenlabs"
|
||||
is_default_stt: bool
|
||||
is_default_tts: bool
|
||||
stt_model: str | None
|
||||
tts_model: str | None
|
||||
default_voice: str | None
|
||||
has_api_key: bool = Field(
|
||||
default=False,
|
||||
description="Indicates whether an API key is stored for this provider.",
|
||||
)
|
||||
target_uri: str | None = Field(
|
||||
default=None,
|
||||
description="Target URI for Azure Speech Services.",
|
||||
)
|
||||
|
||||
|
||||
class VoiceProviderUpdateSuccess(BaseModel):
|
||||
"""Simple status response for voice provider actions."""
|
||||
|
||||
status: str = "ok"
|
||||
|
||||
|
||||
class VoiceOption(BaseModel):
|
||||
"""Voice option returned by voice providers."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
|
||||
|
||||
class VoiceProviderUpsertRequest(BaseModel):
|
||||
"""Request model for creating or updating a voice provider."""
|
||||
|
||||
id: int | None = Field(default=None, description="Existing provider ID to update.")
|
||||
name: str
|
||||
provider_type: str # "openai", "azure", "elevenlabs"
|
||||
api_key: str | None = Field(
|
||||
default=None,
|
||||
description="API key for the provider.",
|
||||
)
|
||||
api_key_changed: bool = Field(
|
||||
default=False,
|
||||
description="Set to true when providing a new API key for an existing provider.",
|
||||
)
|
||||
llm_provider_id: int | None = Field(
|
||||
default=None,
|
||||
description="If set, copies the API key from the specified LLM provider.",
|
||||
)
|
||||
api_base: str | None = None
|
||||
target_uri: str | None = Field(
|
||||
default=None,
|
||||
description="Target URI for Azure Speech Services (maps to api_base).",
|
||||
)
|
||||
custom_config: dict[str, Any] | None = None
|
||||
stt_model: str | None = None
|
||||
tts_model: str | None = None
|
||||
default_voice: str | None = None
|
||||
activate_stt: bool = Field(
|
||||
default=False,
|
||||
description="If true, sets this provider as the default STT provider after upsert.",
|
||||
)
|
||||
activate_tts: bool = Field(
|
||||
default=False,
|
||||
description="If true, sets this provider as the default TTS provider after upsert.",
|
||||
)
|
||||
|
||||
|
||||
class VoiceProviderTestRequest(BaseModel):
|
||||
"""Request model for testing a voice provider connection."""
|
||||
|
||||
provider_type: str
|
||||
api_key: str | None = Field(
|
||||
default=None,
|
||||
description="API key for testing. If not provided, use_stored_key must be true.",
|
||||
)
|
||||
use_stored_key: bool = Field(
|
||||
default=False,
|
||||
description="If true, use the stored API key for this provider type.",
|
||||
)
|
||||
api_base: str | None = None
|
||||
target_uri: str | None = Field(
|
||||
default=None,
|
||||
description="Target URI for Azure Speech Services (maps to api_base).",
|
||||
)
|
||||
custom_config: dict[str, Any] | None = None
|
||||
@@ -1,250 +0,0 @@
|
||||
import secrets
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import File
|
||||
from fastapi import Query
|
||||
from fastapi import UploadFile
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.models import User
|
||||
from onyx.db.voice import fetch_default_stt_provider
|
||||
from onyx.db.voice import fetch_default_tts_provider
|
||||
from onyx.db.voice import update_user_voice_settings
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.redis.redis_pool import store_ws_token
|
||||
from onyx.redis.redis_pool import WsTokenRateLimitExceeded
|
||||
from onyx.server.manage.models import VoiceSettingsUpdateRequest
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.voice.factory import get_voice_provider
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
router = APIRouter(prefix="/voice")
|
||||
|
||||
# Max audio file size: 25MB (Whisper limit)
|
||||
MAX_AUDIO_SIZE = 25 * 1024 * 1024
|
||||
# Chunk size for streaming uploads (8KB)
|
||||
UPLOAD_READ_CHUNK_SIZE = 8192
|
||||
|
||||
|
||||
class VoiceStatusResponse(BaseModel):
|
||||
stt_enabled: bool
|
||||
tts_enabled: bool
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
def get_voice_status(
|
||||
_: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> VoiceStatusResponse:
|
||||
"""Check whether STT and TTS providers are configured and ready."""
|
||||
stt_provider = fetch_default_stt_provider(db_session)
|
||||
tts_provider = fetch_default_tts_provider(db_session)
|
||||
return VoiceStatusResponse(
|
||||
stt_enabled=stt_provider is not None and stt_provider.api_key is not None,
|
||||
tts_enabled=tts_provider is not None and tts_provider.api_key is not None,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/transcribe")
|
||||
async def transcribe_audio(
|
||||
audio: UploadFile = File(...),
|
||||
_: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> dict[str, str]:
|
||||
"""Transcribe audio to text using the default STT provider."""
|
||||
provider_db = fetch_default_stt_provider(db_session)
|
||||
if provider_db is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"No speech-to-text provider configured. Please contact your administrator.",
|
||||
)
|
||||
|
||||
if not provider_db.api_key:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Voice provider API key not configured.",
|
||||
)
|
||||
|
||||
# Read in chunks to enforce size limit during streaming (prevents OOM attacks)
|
||||
chunks: list[bytes] = []
|
||||
total = 0
|
||||
while chunk := await audio.read(UPLOAD_READ_CHUNK_SIZE):
|
||||
total += len(chunk)
|
||||
if total > MAX_AUDIO_SIZE:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.PAYLOAD_TOO_LARGE,
|
||||
f"Audio file too large. Maximum size is {MAX_AUDIO_SIZE // (1024 * 1024)}MB.",
|
||||
)
|
||||
chunks.append(chunk)
|
||||
audio_data = b"".join(chunks)
|
||||
|
||||
# Extract format from filename
|
||||
filename = audio.filename or "audio.webm"
|
||||
audio_format = filename.rsplit(".", 1)[-1] if "." in filename else "webm"
|
||||
|
||||
try:
|
||||
provider = get_voice_provider(provider_db)
|
||||
except ValueError as exc:
|
||||
raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc
|
||||
|
||||
try:
|
||||
text = await provider.transcribe(audio_data, audio_format)
|
||||
return {"text": text}
|
||||
except NotImplementedError as exc:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.NOT_IMPLEMENTED,
|
||||
f"Speech-to-text not implemented for {provider_db.provider_type}.",
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
logger.error(f"Transcription failed: {exc}")
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INTERNAL_ERROR,
|
||||
"Transcription failed. Please try again.",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post("/synthesize")
|
||||
async def synthesize_speech(
|
||||
text: str | None = Query(
|
||||
default=None, description="Text to synthesize", max_length=4096
|
||||
),
|
||||
voice: str | None = Query(default=None, description="Voice ID to use"),
|
||||
speed: float | None = Query(
|
||||
default=None, description="Playback speed (0.5-2.0)", ge=0.5, le=2.0
|
||||
),
|
||||
user: User = Depends(current_user),
|
||||
) -> StreamingResponse:
|
||||
"""
|
||||
Synthesize text to speech using the default TTS provider.
|
||||
|
||||
Accepts parameters via query string for streaming compatibility.
|
||||
"""
|
||||
logger.info(
|
||||
f"TTS request: text length={len(text) if text else 0}, voice={voice}, speed={speed}"
|
||||
)
|
||||
|
||||
if not text:
|
||||
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Text is required")
|
||||
|
||||
# Use short-lived session to fetch provider config, then release connection
|
||||
# before starting the long-running streaming response
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
provider_db = fetch_default_tts_provider(db_session)
|
||||
if provider_db is None:
|
||||
logger.error("No TTS provider configured")
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"No text-to-speech provider configured. Please contact your administrator.",
|
||||
)
|
||||
|
||||
if not provider_db.api_key:
|
||||
logger.error("TTS provider has no API key")
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Voice provider API key not configured.",
|
||||
)
|
||||
|
||||
# Use request voice or provider default
|
||||
final_voice = voice or provider_db.default_voice
|
||||
# Use explicit None checks to avoid falsy float issues (0.0 would be skipped with `or`)
|
||||
final_speed = (
|
||||
speed
|
||||
if speed is not None
|
||||
else (
|
||||
user.voice_playback_speed
|
||||
if user.voice_playback_speed is not None
|
||||
else 1.0
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"TTS using provider: {provider_db.provider_type}, voice: {final_voice}, speed: {final_speed}"
|
||||
)
|
||||
|
||||
try:
|
||||
provider = get_voice_provider(provider_db)
|
||||
except ValueError as exc:
|
||||
logger.error(f"Failed to get voice provider: {exc}")
|
||||
raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc
|
||||
|
||||
# Session is now closed - streaming response won't hold DB connection
|
||||
async def audio_stream() -> AsyncIterator[bytes]:
|
||||
try:
|
||||
chunk_count = 0
|
||||
async for chunk in provider.synthesize_stream(
|
||||
text=text, voice=final_voice, speed=final_speed
|
||||
):
|
||||
chunk_count += 1
|
||||
yield chunk
|
||||
logger.info(f"TTS streaming complete: {chunk_count} chunks sent")
|
||||
except NotImplementedError as exc:
|
||||
logger.error(f"TTS not implemented: {exc}")
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"Synthesis failed: {exc}")
|
||||
raise
|
||||
|
||||
return StreamingResponse(
|
||||
audio_stream(),
|
||||
media_type="audio/mpeg",
|
||||
headers={
|
||||
"Content-Disposition": "inline; filename=speech.mp3",
|
||||
# Allow streaming by not setting content-length
|
||||
"Cache-Control": "no-cache",
|
||||
"X-Accel-Buffering": "no", # Disable nginx buffering
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.patch("/settings")
|
||||
def update_voice_settings(
|
||||
request: VoiceSettingsUpdateRequest,
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> dict[str, str]:
|
||||
"""Update user's voice settings."""
|
||||
update_user_voice_settings(
|
||||
db_session=db_session,
|
||||
user_id=user.id,
|
||||
auto_send=request.auto_send,
|
||||
auto_playback=request.auto_playback,
|
||||
playback_speed=request.playback_speed,
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
class WSTokenResponse(BaseModel):
|
||||
token: str
|
||||
|
||||
|
||||
@router.post("/ws-token")
|
||||
async def get_ws_token(
|
||||
user: User = Depends(current_user),
|
||||
) -> WSTokenResponse:
|
||||
"""
|
||||
Generate a short-lived token for WebSocket authentication.
|
||||
|
||||
This token should be passed as a query parameter when connecting
|
||||
to voice WebSocket endpoints (e.g., /voice/transcribe/stream?token=xxx).
|
||||
|
||||
The token expires after 60 seconds and is single-use.
|
||||
Rate limited to 10 tokens per minute per user.
|
||||
"""
|
||||
token = secrets.token_urlsafe(32)
|
||||
try:
|
||||
await store_ws_token(token, str(user.id))
|
||||
except WsTokenRateLimitExceeded:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.RATE_LIMITED,
|
||||
"Too many token requests. Please wait before requesting another.",
|
||||
)
|
||||
return WSTokenResponse(token=token)
|
||||
@@ -1,860 +0,0 @@
|
||||
"""WebSocket API for streaming speech-to-text and text-to-speech."""
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
from collections.abc import MutableMapping
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import WebSocket
|
||||
from fastapi import WebSocketDisconnect
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.users import current_user_from_websocket
|
||||
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
|
||||
from onyx.db.models import User
|
||||
from onyx.db.voice import fetch_default_stt_provider
|
||||
from onyx.db.voice import fetch_default_tts_provider
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.voice.factory import get_voice_provider
|
||||
from onyx.voice.interface import StreamingSynthesizerProtocol
|
||||
from onyx.voice.interface import StreamingTranscriberProtocol
|
||||
from onyx.voice.interface import TranscriptResult
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
router = APIRouter(prefix="/voice")
|
||||
|
||||
|
||||
# Transcribe every ~0.5 seconds of audio (webm/opus is ~2-4KB/s, so ~1-2KB per 0.5s)
|
||||
MIN_CHUNK_BYTES = 1500
|
||||
VOICE_DISABLE_STREAMING_FALLBACK = (
|
||||
os.environ.get("VOICE_DISABLE_STREAMING_FALLBACK", "").lower() == "true"
|
||||
)
|
||||
|
||||
# WebSocket size limits to prevent memory exhaustion attacks
|
||||
WS_MAX_MESSAGE_SIZE = 64 * 1024 # 64KB per message (OWASP recommendation)
|
||||
WS_MAX_TOTAL_BYTES = 25 * 1024 * 1024 # 25MB total per connection (matches REST API)
|
||||
WS_MAX_TEXT_MESSAGE_SIZE = 16 * 1024 # 16KB for text/JSON messages
|
||||
WS_MAX_TTS_TEXT_LENGTH = 4096 # Max text length per synthesize call (matches REST API)
|
||||
|
||||
|
||||
class ChunkedTranscriber:
|
||||
"""Fallback transcriber for providers without streaming support."""
|
||||
|
||||
def __init__(self, provider: Any, audio_format: str = "webm"):
|
||||
self.provider = provider
|
||||
self.audio_format = audio_format
|
||||
self.chunk_buffer = io.BytesIO()
|
||||
self.full_audio = io.BytesIO()
|
||||
self.chunk_bytes = 0
|
||||
self.transcripts: list[str] = []
|
||||
|
||||
async def add_chunk(self, chunk: bytes) -> str | None:
|
||||
"""Add audio chunk. Returns transcript if enough audio accumulated."""
|
||||
self.chunk_buffer.write(chunk)
|
||||
self.full_audio.write(chunk)
|
||||
self.chunk_bytes += len(chunk)
|
||||
|
||||
if self.chunk_bytes >= MIN_CHUNK_BYTES:
|
||||
return await self._transcribe_chunk()
|
||||
return None
|
||||
|
||||
async def _transcribe_chunk(self) -> str | None:
|
||||
"""Transcribe current chunk and append to running transcript."""
|
||||
audio_data = self.chunk_buffer.getvalue()
|
||||
if not audio_data:
|
||||
return None
|
||||
|
||||
try:
|
||||
transcript = await self.provider.transcribe(audio_data, self.audio_format)
|
||||
self.chunk_buffer = io.BytesIO()
|
||||
self.chunk_bytes = 0
|
||||
|
||||
if transcript and transcript.strip():
|
||||
self.transcripts.append(transcript.strip())
|
||||
return " ".join(self.transcripts)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription error: {e}")
|
||||
self.chunk_buffer = io.BytesIO()
|
||||
self.chunk_bytes = 0
|
||||
return None
|
||||
|
||||
async def flush(self) -> str:
|
||||
"""Get final transcript from full audio for best accuracy."""
|
||||
full_audio_data = self.full_audio.getvalue()
|
||||
if full_audio_data:
|
||||
try:
|
||||
transcript = await self.provider.transcribe(
|
||||
full_audio_data, self.audio_format
|
||||
)
|
||||
if transcript and transcript.strip():
|
||||
return transcript.strip()
|
||||
except Exception as e:
|
||||
logger.error(f"Final transcription error: {e}")
|
||||
return " ".join(self.transcripts)
|
||||
|
||||
|
||||
async def handle_streaming_transcription(
|
||||
websocket: WebSocket,
|
||||
transcriber: StreamingTranscriberProtocol,
|
||||
) -> None:
|
||||
"""Handle transcription using native streaming API."""
|
||||
logger.info("Streaming transcription: starting handler")
|
||||
last_transcript = ""
|
||||
chunk_count = 0
|
||||
total_bytes = 0
|
||||
|
||||
async def receive_transcripts() -> None:
|
||||
"""Background task to receive and send transcripts."""
|
||||
nonlocal last_transcript
|
||||
logger.info("Streaming transcription: starting transcript receiver")
|
||||
while True:
|
||||
result: TranscriptResult | None = await transcriber.receive_transcript()
|
||||
if result is None: # End of stream
|
||||
logger.info("Streaming transcription: transcript stream ended")
|
||||
break
|
||||
# Send if text changed OR if VAD detected end of speech (for auto-send trigger)
|
||||
if result.text and (result.text != last_transcript or result.is_vad_end):
|
||||
last_transcript = result.text
|
||||
logger.debug(
|
||||
f"Streaming transcription: got transcript: {result.text[:50]}... "
|
||||
f"(is_vad_end={result.is_vad_end})"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "transcript",
|
||||
"text": result.text,
|
||||
"is_final": result.is_vad_end,
|
||||
}
|
||||
)
|
||||
|
||||
# Start receiving transcripts in background
|
||||
receive_task = asyncio.create_task(receive_transcripts())
|
||||
|
||||
try:
|
||||
while True:
|
||||
message = await websocket.receive()
|
||||
msg_type = message.get("type", "unknown")
|
||||
|
||||
if msg_type == "websocket.disconnect":
|
||||
logger.info(
|
||||
f"Streaming transcription: client disconnected after {chunk_count} chunks ({total_bytes} bytes)"
|
||||
)
|
||||
break
|
||||
|
||||
if "bytes" in message:
|
||||
chunk_size = len(message["bytes"])
|
||||
|
||||
# Enforce per-message size limit
|
||||
if chunk_size > WS_MAX_MESSAGE_SIZE:
|
||||
logger.warning(
|
||||
f"Streaming transcription: message too large ({chunk_size} bytes)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Message too large"}
|
||||
)
|
||||
break
|
||||
|
||||
# Enforce total connection size limit
|
||||
if total_bytes + chunk_size > WS_MAX_TOTAL_BYTES:
|
||||
logger.warning(
|
||||
f"Streaming transcription: total size limit exceeded ({total_bytes + chunk_size} bytes)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Total size limit exceeded"}
|
||||
)
|
||||
break
|
||||
|
||||
chunk_count += 1
|
||||
total_bytes += chunk_size
|
||||
logger.debug(
|
||||
f"Streaming transcription: received chunk {chunk_count} ({chunk_size} bytes, total: {total_bytes})"
|
||||
)
|
||||
await transcriber.send_audio(message["bytes"])
|
||||
|
||||
elif "text" in message:
|
||||
try:
|
||||
data = json.loads(message["text"])
|
||||
logger.debug(
|
||||
f"Streaming transcription: received text message: {data}"
|
||||
)
|
||||
if data.get("type") == "end":
|
||||
logger.info(
|
||||
"Streaming transcription: end signal received, closing transcriber"
|
||||
)
|
||||
final_transcript = await transcriber.close()
|
||||
receive_task.cancel()
|
||||
logger.info(
|
||||
"Streaming transcription: final transcript: "
|
||||
f"{final_transcript[:100] if final_transcript else '(empty)'}..."
|
||||
)
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "transcript",
|
||||
"text": final_transcript,
|
||||
"is_final": True,
|
||||
}
|
||||
)
|
||||
break
|
||||
elif data.get("type") == "reset":
|
||||
# Reset accumulated transcript after auto-send
|
||||
logger.info(
|
||||
"Streaming transcription: reset signal received, clearing transcript"
|
||||
)
|
||||
transcriber.reset_transcript()
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
f"Streaming transcription: failed to parse JSON: {message.get('text', '')[:100]}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Streaming transcription: error: {e}", exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
receive_task.cancel()
|
||||
try:
|
||||
await receive_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info(
|
||||
f"Streaming transcription: handler finished. Processed {chunk_count} chunks, {total_bytes} total bytes"
|
||||
)
|
||||
|
||||
|
||||
async def handle_chunked_transcription(
|
||||
websocket: WebSocket,
|
||||
transcriber: ChunkedTranscriber,
|
||||
) -> None:
|
||||
"""Handle transcription using chunked batch API."""
|
||||
logger.info("Chunked transcription: starting handler")
|
||||
chunk_count = 0
|
||||
total_bytes = 0
|
||||
|
||||
while True:
|
||||
message = await websocket.receive()
|
||||
msg_type = message.get("type", "unknown")
|
||||
|
||||
if msg_type == "websocket.disconnect":
|
||||
logger.info(
|
||||
f"Chunked transcription: client disconnected after {chunk_count} chunks ({total_bytes} bytes)"
|
||||
)
|
||||
break
|
||||
|
||||
if "bytes" in message:
|
||||
chunk_size = len(message["bytes"])
|
||||
|
||||
# Enforce per-message size limit
|
||||
if chunk_size > WS_MAX_MESSAGE_SIZE:
|
||||
logger.warning(
|
||||
f"Chunked transcription: message too large ({chunk_size} bytes)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Message too large"}
|
||||
)
|
||||
break
|
||||
|
||||
# Enforce total connection size limit
|
||||
if total_bytes + chunk_size > WS_MAX_TOTAL_BYTES:
|
||||
logger.warning(
|
||||
f"Chunked transcription: total size limit exceeded ({total_bytes + chunk_size} bytes)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Total size limit exceeded"}
|
||||
)
|
||||
break
|
||||
|
||||
chunk_count += 1
|
||||
total_bytes += chunk_size
|
||||
logger.debug(
|
||||
f"Chunked transcription: received chunk {chunk_count} ({chunk_size} bytes, total: {total_bytes})"
|
||||
)
|
||||
|
||||
transcript = await transcriber.add_chunk(message["bytes"])
|
||||
if transcript:
|
||||
logger.debug(
|
||||
f"Chunked transcription: got transcript: {transcript[:50]}..."
|
||||
)
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "transcript",
|
||||
"text": transcript,
|
||||
"is_final": False,
|
||||
}
|
||||
)
|
||||
|
||||
elif "text" in message:
|
||||
try:
|
||||
data = json.loads(message["text"])
|
||||
logger.debug(f"Chunked transcription: received text message: {data}")
|
||||
if data.get("type") == "end":
|
||||
logger.info("Chunked transcription: end signal received, flushing")
|
||||
final_transcript = await transcriber.flush()
|
||||
logger.info(
|
||||
f"Chunked transcription: final transcript: {final_transcript[:100] if final_transcript else '(empty)'}..."
|
||||
)
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "transcript",
|
||||
"text": final_transcript,
|
||||
"is_final": True,
|
||||
}
|
||||
)
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
f"Chunked transcription: failed to parse JSON: {message.get('text', '')[:100]}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Chunked transcription: handler finished. Processed {chunk_count} chunks, {total_bytes} total bytes"
|
||||
)
|
||||
|
||||
|
||||
@router.websocket("/transcribe/stream")
|
||||
async def websocket_transcribe(
|
||||
websocket: WebSocket,
|
||||
_user: User = Depends(current_user_from_websocket),
|
||||
) -> None:
|
||||
"""
|
||||
WebSocket endpoint for streaming speech-to-text.
|
||||
|
||||
Protocol:
|
||||
- Client sends binary audio chunks
|
||||
- Server sends JSON: {"type": "transcript", "text": "...", "is_final": false}
|
||||
- Client sends JSON {"type": "end"} to signal end
|
||||
- Server responds with final transcript and closes
|
||||
|
||||
Authentication:
|
||||
Requires `token` query parameter (e.g., /voice/transcribe/stream?token=xxx).
|
||||
Applies same auth checks as HTTP endpoints (verification, role checks).
|
||||
"""
|
||||
logger.info("WebSocket transcribe: connection request received (authenticated)")
|
||||
|
||||
try:
|
||||
await websocket.accept()
|
||||
logger.info("WebSocket transcribe: connection accepted")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket transcribe: failed to accept connection: {e}")
|
||||
return
|
||||
|
||||
streaming_transcriber = None
|
||||
provider = None
|
||||
|
||||
try:
|
||||
# Get STT provider
|
||||
logger.info("WebSocket transcribe: fetching STT provider from database")
|
||||
engine = get_sqlalchemy_engine()
|
||||
with Session(engine) as db_session:
|
||||
provider_db = fetch_default_stt_provider(db_session)
|
||||
if provider_db is None:
|
||||
logger.warning(
|
||||
"WebSocket transcribe: no default STT provider configured"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "No speech-to-text provider configured",
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
if not provider_db.api_key:
|
||||
logger.warning("WebSocket transcribe: STT provider has no API key")
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "Speech-to-text provider has no API key configured",
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"WebSocket transcribe: creating voice provider: {provider_db.provider_type}"
|
||||
)
|
||||
try:
|
||||
provider = get_voice_provider(provider_db)
|
||||
logger.info(
|
||||
f"WebSocket transcribe: voice provider created, streaming supported: {provider.supports_streaming_stt()}"
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(
|
||||
f"WebSocket transcribe: failed to create voice provider: {e}"
|
||||
)
|
||||
await websocket.send_json({"type": "error", "message": str(e)})
|
||||
return
|
||||
|
||||
# Use native streaming if provider supports it
|
||||
if provider.supports_streaming_stt():
|
||||
logger.info("WebSocket transcribe: using native streaming STT")
|
||||
try:
|
||||
streaming_transcriber = await provider.create_streaming_transcriber()
|
||||
logger.info(
|
||||
"WebSocket transcribe: streaming transcriber created successfully"
|
||||
)
|
||||
await handle_streaming_transcription(websocket, streaming_transcriber)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"WebSocket transcribe: failed to create streaming transcriber: {e}"
|
||||
)
|
||||
if VOICE_DISABLE_STREAMING_FALLBACK:
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": f"Streaming STT failed: {e}"}
|
||||
)
|
||||
return
|
||||
logger.info("WebSocket transcribe: falling back to chunked STT")
|
||||
# Browser stream provides raw PCM16 chunks over WebSocket.
|
||||
chunked_transcriber = ChunkedTranscriber(provider, audio_format="pcm16")
|
||||
await handle_chunked_transcription(websocket, chunked_transcriber)
|
||||
else:
|
||||
# Fall back to chunked transcription
|
||||
if VOICE_DISABLE_STREAMING_FALLBACK:
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "Provider doesn't support streaming STT",
|
||||
}
|
||||
)
|
||||
return
|
||||
logger.info(
|
||||
"WebSocket transcribe: using chunked STT (provider doesn't support streaming)"
|
||||
)
|
||||
chunked_transcriber = ChunkedTranscriber(provider, audio_format="pcm16")
|
||||
await handle_chunked_transcription(websocket, chunked_transcriber)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.debug("WebSocket transcribe: client disconnected")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket transcribe: unhandled error: {e}", exc_info=True)
|
||||
try:
|
||||
# Send generic error to avoid leaking sensitive details
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "An unexpected error occurred"}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if streaming_transcriber:
|
||||
try:
|
||||
await streaming_transcriber.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await websocket.close()
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("WebSocket transcribe: connection closed")
|
||||
|
||||
|
||||
async def handle_streaming_synthesis(
|
||||
websocket: WebSocket,
|
||||
synthesizer: StreamingSynthesizerProtocol,
|
||||
) -> None:
|
||||
"""Handle TTS using native streaming API."""
|
||||
logger.info("Streaming synthesis: starting handler")
|
||||
|
||||
async def send_audio() -> None:
|
||||
"""Background task to send audio chunks to client."""
|
||||
chunk_count = 0
|
||||
total_bytes = 0
|
||||
try:
|
||||
while True:
|
||||
audio_chunk = await synthesizer.receive_audio()
|
||||
if audio_chunk is None:
|
||||
logger.info(
|
||||
f"Streaming synthesis: audio stream ended, sent {chunk_count} chunks, {total_bytes} bytes"
|
||||
)
|
||||
try:
|
||||
await websocket.send_json({"type": "audio_done"})
|
||||
logger.info("Streaming synthesis: sent audio_done to client")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Streaming synthesis: failed to send audio_done: {e}"
|
||||
)
|
||||
break
|
||||
if audio_chunk: # Skip empty chunks
|
||||
chunk_count += 1
|
||||
total_bytes += len(audio_chunk)
|
||||
try:
|
||||
await websocket.send_bytes(audio_chunk)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Streaming synthesis: failed to send chunk: {e}"
|
||||
)
|
||||
break
|
||||
except asyncio.CancelledError:
|
||||
logger.info(
|
||||
f"Streaming synthesis: send_audio cancelled after {chunk_count} chunks"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Streaming synthesis: send_audio error: {e}")
|
||||
|
||||
send_task: asyncio.Task | None = None
|
||||
disconnected = False
|
||||
|
||||
try:
|
||||
while not disconnected:
|
||||
try:
|
||||
message = await websocket.receive()
|
||||
except WebSocketDisconnect:
|
||||
logger.info("Streaming synthesis: client disconnected")
|
||||
break
|
||||
|
||||
msg_type = message.get("type", "unknown") # type: ignore[possibly-undefined]
|
||||
|
||||
if msg_type == "websocket.disconnect":
|
||||
logger.info("Streaming synthesis: client disconnected")
|
||||
disconnected = True
|
||||
break
|
||||
|
||||
if "text" in message:
|
||||
# Enforce text message size limit
|
||||
msg_size = len(message["text"])
|
||||
if msg_size > WS_MAX_TEXT_MESSAGE_SIZE:
|
||||
logger.warning(
|
||||
f"Streaming synthesis: text message too large ({msg_size} bytes)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Message too large"}
|
||||
)
|
||||
break
|
||||
|
||||
try:
|
||||
data = json.loads(message["text"])
|
||||
|
||||
if data.get("type") == "synthesize":
|
||||
text = data.get("text", "")
|
||||
# Enforce per-text size limit
|
||||
if len(text) > WS_MAX_TTS_TEXT_LENGTH:
|
||||
logger.warning(
|
||||
f"Streaming synthesis: text too long ({len(text)} chars)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Text too long"}
|
||||
)
|
||||
continue
|
||||
if text:
|
||||
# Start audio receiver on first text chunk so playback
|
||||
# can begin before the full assistant response completes.
|
||||
if send_task is None:
|
||||
send_task = asyncio.create_task(send_audio())
|
||||
logger.debug(
|
||||
f"Streaming synthesis: forwarding text chunk ({len(text)} chars)"
|
||||
)
|
||||
await synthesizer.send_text(text)
|
||||
|
||||
elif data.get("type") == "end":
|
||||
logger.info("Streaming synthesis: end signal received")
|
||||
|
||||
# Ensure receiver is active even if no prior text chunks arrived.
|
||||
if send_task is None:
|
||||
send_task = asyncio.create_task(send_audio())
|
||||
|
||||
# Signal end of input
|
||||
if hasattr(synthesizer, "flush"):
|
||||
await synthesizer.flush()
|
||||
|
||||
# Wait for all audio to be sent
|
||||
logger.info(
|
||||
"Streaming synthesis: waiting for audio stream to complete"
|
||||
)
|
||||
try:
|
||||
await asyncio.wait_for(send_task, timeout=60.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"Streaming synthesis: timeout waiting for audio"
|
||||
)
|
||||
break
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
f"Streaming synthesis: failed to parse JSON: {message.get('text', '')[:100]}"
|
||||
)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.debug("Streaming synthesis: client disconnected during synthesis")
|
||||
except Exception as e:
|
||||
logger.error(f"Streaming synthesis: error: {e}", exc_info=True)
|
||||
finally:
|
||||
if send_task and not send_task.done():
|
||||
logger.info("Streaming synthesis: waiting for send_task to finish")
|
||||
try:
|
||||
await asyncio.wait_for(send_task, timeout=30.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Streaming synthesis: timeout waiting for send_task")
|
||||
send_task.cancel()
|
||||
try:
|
||||
await send_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("Streaming synthesis: handler finished")
|
||||
|
||||
|
||||
async def handle_chunked_synthesis(
|
||||
websocket: WebSocket,
|
||||
provider: Any,
|
||||
first_message: MutableMapping[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Fallback TTS handler using provider.synthesize_stream.
|
||||
|
||||
Args:
|
||||
websocket: The WebSocket connection
|
||||
provider: Voice provider instance
|
||||
first_message: Optional first message already received (used when falling
|
||||
back from streaming mode, where the first message was already consumed)
|
||||
"""
|
||||
logger.info("Chunked synthesis: starting handler")
|
||||
text_buffer: list[str] = []
|
||||
voice: str | None = None
|
||||
speed = 1.0
|
||||
|
||||
# Process pre-received message if provided
|
||||
pending_message = first_message
|
||||
|
||||
try:
|
||||
while True:
|
||||
if pending_message is not None:
|
||||
message = pending_message
|
||||
pending_message = None
|
||||
else:
|
||||
message = await websocket.receive()
|
||||
msg_type = message.get("type", "unknown")
|
||||
|
||||
if msg_type == "websocket.disconnect":
|
||||
logger.info("Chunked synthesis: client disconnected")
|
||||
break
|
||||
|
||||
if "text" not in message:
|
||||
continue
|
||||
|
||||
# Enforce text message size limit
|
||||
msg_size = len(message["text"])
|
||||
if msg_size > WS_MAX_TEXT_MESSAGE_SIZE:
|
||||
logger.warning(
|
||||
f"Chunked synthesis: text message too large ({msg_size} bytes)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Message too large"}
|
||||
)
|
||||
break
|
||||
|
||||
try:
|
||||
data = json.loads(message["text"])
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
"Chunked synthesis: failed to parse JSON: "
|
||||
f"{message.get('text', '')[:100]}"
|
||||
)
|
||||
continue
|
||||
|
||||
msg_data_type = data.get("type") # type: ignore[possibly-undefined]
|
||||
if msg_data_type == "synthesize":
|
||||
text = data.get("text", "")
|
||||
# Enforce per-text size limit
|
||||
if len(text) > WS_MAX_TTS_TEXT_LENGTH:
|
||||
logger.warning(
|
||||
f"Chunked synthesis: text too long ({len(text)} chars)"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "Text too long"}
|
||||
)
|
||||
continue
|
||||
if text:
|
||||
text_buffer.append(text)
|
||||
logger.debug(
|
||||
f"Chunked synthesis: buffered text ({len(text)} chars), "
|
||||
f"total buffered: {len(text_buffer)} chunks"
|
||||
)
|
||||
if isinstance(data.get("voice"), str) and data["voice"]:
|
||||
voice = data["voice"]
|
||||
if isinstance(data.get("speed"), (int, float)):
|
||||
speed = float(data["speed"])
|
||||
elif msg_data_type == "end":
|
||||
logger.info("Chunked synthesis: end signal received")
|
||||
full_text = " ".join(text_buffer).strip()
|
||||
if not full_text:
|
||||
await websocket.send_json({"type": "audio_done"})
|
||||
logger.info("Chunked synthesis: no text, sent audio_done")
|
||||
break
|
||||
|
||||
chunk_count = 0
|
||||
total_bytes = 0
|
||||
logger.info(
|
||||
f"Chunked synthesis: sending full text ({len(full_text)} chars)"
|
||||
)
|
||||
async for audio_chunk in provider.synthesize_stream(
|
||||
full_text, voice=voice, speed=speed
|
||||
):
|
||||
if not audio_chunk:
|
||||
continue
|
||||
chunk_count += 1
|
||||
total_bytes += len(audio_chunk)
|
||||
await websocket.send_bytes(audio_chunk)
|
||||
await websocket.send_json({"type": "audio_done"})
|
||||
logger.info(
|
||||
f"Chunked synthesis: sent audio_done after {chunk_count} chunks, {total_bytes} bytes"
|
||||
)
|
||||
break
|
||||
except WebSocketDisconnect:
|
||||
logger.debug("Chunked synthesis: client disconnected")
|
||||
except Exception as e:
|
||||
logger.error(f"Chunked synthesis: error: {e}", exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
logger.info("Chunked synthesis: handler finished")
|
||||
|
||||
|
||||
@router.websocket("/synthesize/stream")
|
||||
async def websocket_synthesize(
|
||||
websocket: WebSocket,
|
||||
_user: User = Depends(current_user_from_websocket),
|
||||
) -> None:
|
||||
"""
|
||||
WebSocket endpoint for streaming text-to-speech.
|
||||
|
||||
Protocol:
|
||||
- Client sends JSON: {"type": "synthesize", "text": "...", "voice": "...", "speed": 1.0}
|
||||
- Server sends binary audio chunks
|
||||
- Server sends JSON: {"type": "audio_done"} when synthesis completes
|
||||
- Client sends JSON {"type": "end"} to close connection
|
||||
|
||||
Authentication:
|
||||
Requires `token` query parameter (e.g., /voice/synthesize/stream?token=xxx).
|
||||
Applies same auth checks as HTTP endpoints (verification, role checks).
|
||||
"""
|
||||
logger.info("WebSocket synthesize: connection request received (authenticated)")
|
||||
|
||||
try:
|
||||
await websocket.accept()
|
||||
logger.info("WebSocket synthesize: connection accepted")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket synthesize: failed to accept connection: {e}")
|
||||
return
|
||||
|
||||
streaming_synthesizer: StreamingSynthesizerProtocol | None = None
|
||||
provider = None
|
||||
|
||||
try:
|
||||
# Get TTS provider
|
||||
logger.info("WebSocket synthesize: fetching TTS provider from database")
|
||||
engine = get_sqlalchemy_engine()
|
||||
with Session(engine) as db_session:
|
||||
provider_db = fetch_default_tts_provider(db_session)
|
||||
if provider_db is None:
|
||||
logger.warning(
|
||||
"WebSocket synthesize: no default TTS provider configured"
|
||||
)
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "No text-to-speech provider configured",
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
if not provider_db.api_key:
|
||||
logger.warning("WebSocket synthesize: TTS provider has no API key")
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "Text-to-speech provider has no API key configured",
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"WebSocket synthesize: creating voice provider: {provider_db.provider_type}"
|
||||
)
|
||||
try:
|
||||
provider = get_voice_provider(provider_db)
|
||||
logger.info(
|
||||
f"WebSocket synthesize: voice provider created, streaming TTS supported: {provider.supports_streaming_tts()}"
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(
|
||||
f"WebSocket synthesize: failed to create voice provider: {e}"
|
||||
)
|
||||
await websocket.send_json({"type": "error", "message": str(e)})
|
||||
return
|
||||
|
||||
# Use native streaming if provider supports it
|
||||
if provider.supports_streaming_tts():
|
||||
logger.info("WebSocket synthesize: using native streaming TTS")
|
||||
message = None # Initialize to avoid UnboundLocalError in except block
|
||||
try:
|
||||
# Wait for initial config message with voice/speed
|
||||
message = await websocket.receive()
|
||||
voice = None
|
||||
speed = 1.0
|
||||
if "text" in message:
|
||||
try:
|
||||
data = json.loads(message["text"])
|
||||
voice = data.get("voice")
|
||||
speed = data.get("speed", 1.0)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
streaming_synthesizer = await provider.create_streaming_synthesizer(
|
||||
voice=voice, speed=speed
|
||||
)
|
||||
logger.info(
|
||||
"WebSocket synthesize: streaming synthesizer created successfully"
|
||||
)
|
||||
await handle_streaming_synthesis(websocket, streaming_synthesizer)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"WebSocket synthesize: failed to create streaming synthesizer: {e}"
|
||||
)
|
||||
if VOICE_DISABLE_STREAMING_FALLBACK:
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": f"Streaming TTS failed: {e}"}
|
||||
)
|
||||
return
|
||||
logger.info(
|
||||
"WebSocket synthesize: falling back to chunked TTS synthesis"
|
||||
)
|
||||
# Pass the first message so it's not lost in the fallback
|
||||
await handle_chunked_synthesis(
|
||||
websocket, provider, first_message=message
|
||||
)
|
||||
else:
|
||||
if VOICE_DISABLE_STREAMING_FALLBACK:
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "Provider doesn't support streaming TTS",
|
||||
}
|
||||
)
|
||||
return
|
||||
logger.info(
|
||||
"WebSocket synthesize: using chunked TTS (provider doesn't support streaming)"
|
||||
)
|
||||
await handle_chunked_synthesis(websocket, provider)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.debug("WebSocket synthesize: client disconnected")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket synthesize: unhandled error: {e}", exc_info=True)
|
||||
try:
|
||||
# Send generic error to avoid leaking sensitive details
|
||||
await websocket.send_json(
|
||||
{"type": "error", "message": "An unexpected error occurred"}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if streaming_synthesizer:
|
||||
try:
|
||||
await streaming_synthesizer.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await websocket.close()
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("WebSocket synthesize: connection closed")
|
||||
@@ -1,3 +1,4 @@
|
||||
import datetime
|
||||
from typing import Generic
|
||||
from typing import Optional
|
||||
from typing import TypeVar
|
||||
@@ -31,21 +32,41 @@ class MinimalUserSnapshot(BaseModel):
|
||||
email: str
|
||||
|
||||
|
||||
class UserGroupInfo(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
|
||||
|
||||
class FullUserSnapshot(BaseModel):
|
||||
id: UUID
|
||||
email: str
|
||||
role: UserRole
|
||||
is_active: bool
|
||||
password_configured: bool
|
||||
personal_name: str | None
|
||||
created_at: datetime.datetime
|
||||
updated_at: datetime.datetime
|
||||
groups: list[UserGroupInfo]
|
||||
is_scim_synced: bool
|
||||
|
||||
@classmethod
|
||||
def from_user_model(cls, user: User) -> "FullUserSnapshot":
|
||||
def from_user_model(
|
||||
cls,
|
||||
user: User,
|
||||
groups: list[UserGroupInfo] | None = None,
|
||||
is_scim_synced: bool = False,
|
||||
) -> "FullUserSnapshot":
|
||||
return cls(
|
||||
id=user.id,
|
||||
email=user.email,
|
||||
role=user.role,
|
||||
is_active=user.is_active,
|
||||
password_configured=user.password_configured,
|
||||
personal_name=user.personal_name,
|
||||
created_at=user.created_at,
|
||||
updated_at=user.updated_at,
|
||||
groups=groups or [],
|
||||
is_scim_synced=is_scim_synced,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -140,44 +140,6 @@ def _validate_and_resolve_url(url: str) -> tuple[str, str, int]:
|
||||
return validated_ip, hostname, port
|
||||
|
||||
|
||||
def validate_outbound_http_url(url: str, *, allow_private_network: bool = False) -> str:
|
||||
"""
|
||||
Validate a URL that will be used by backend outbound HTTP calls.
|
||||
|
||||
Returns:
|
||||
A normalized URL string with surrounding whitespace removed.
|
||||
|
||||
Raises:
|
||||
ValueError: If URL is malformed.
|
||||
SSRFException: If URL fails SSRF checks.
|
||||
"""
|
||||
normalized_url = url.strip()
|
||||
if not normalized_url:
|
||||
raise ValueError("URL cannot be empty")
|
||||
|
||||
parsed = urlparse(normalized_url)
|
||||
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise SSRFException(
|
||||
f"Invalid URL scheme '{parsed.scheme}'. Only http and https are allowed."
|
||||
)
|
||||
|
||||
if not parsed.hostname:
|
||||
raise ValueError("URL must contain a hostname")
|
||||
|
||||
if parsed.username or parsed.password:
|
||||
raise SSRFException("URLs with embedded credentials are not allowed.")
|
||||
|
||||
hostname = parsed.hostname.lower()
|
||||
if hostname in BLOCKED_HOSTNAMES:
|
||||
raise SSRFException(f"Access to hostname '{parsed.hostname}' is not allowed.")
|
||||
|
||||
if not allow_private_network:
|
||||
_validate_and_resolve_url(normalized_url)
|
||||
|
||||
return normalized_url
|
||||
|
||||
|
||||
MAX_REDIRECTS = 10
|
||||
|
||||
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
from onyx.db.models import VoiceProvider
|
||||
from onyx.voice.interface import VoiceProviderInterface
|
||||
|
||||
|
||||
def get_voice_provider(provider: VoiceProvider) -> VoiceProviderInterface:
|
||||
"""
|
||||
Factory function to get the appropriate voice provider implementation.
|
||||
|
||||
Args:
|
||||
provider: VoiceProvider model instance (can be from DB or constructed temporarily)
|
||||
|
||||
Returns:
|
||||
VoiceProviderInterface implementation
|
||||
|
||||
Raises:
|
||||
ValueError: If provider_type is not supported
|
||||
"""
|
||||
provider_type = provider.provider_type.lower()
|
||||
|
||||
# Handle both SensitiveValue (from DB) and plain string (from temp model)
|
||||
if provider.api_key is None:
|
||||
api_key = None
|
||||
elif hasattr(provider.api_key, "get_value"):
|
||||
# SensitiveValue from database
|
||||
api_key = provider.api_key.get_value(apply_mask=False)
|
||||
else:
|
||||
# Plain string from temporary model
|
||||
api_key = provider.api_key # type: ignore[assignment]
|
||||
api_base = provider.api_base
|
||||
custom_config = provider.custom_config
|
||||
stt_model = provider.stt_model
|
||||
tts_model = provider.tts_model
|
||||
default_voice = provider.default_voice
|
||||
|
||||
if provider_type == "openai":
|
||||
from onyx.voice.providers.openai import OpenAIVoiceProvider
|
||||
|
||||
return OpenAIVoiceProvider(
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
stt_model=stt_model,
|
||||
tts_model=tts_model,
|
||||
default_voice=default_voice,
|
||||
)
|
||||
|
||||
elif provider_type == "azure":
|
||||
from onyx.voice.providers.azure import AzureVoiceProvider
|
||||
|
||||
return AzureVoiceProvider(
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
custom_config=custom_config or {},
|
||||
stt_model=stt_model,
|
||||
tts_model=tts_model,
|
||||
default_voice=default_voice,
|
||||
)
|
||||
|
||||
elif provider_type == "elevenlabs":
|
||||
from onyx.voice.providers.elevenlabs import ElevenLabsVoiceProvider
|
||||
|
||||
return ElevenLabsVoiceProvider(
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
stt_model=stt_model,
|
||||
tts_model=tts_model,
|
||||
default_voice=default_voice,
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported voice provider type: {provider_type}")
|
||||
@@ -1,182 +0,0 @@
|
||||
from abc import ABC
|
||||
from abc import abstractmethod
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Protocol
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class TranscriptResult(BaseModel):
|
||||
"""Result from streaming transcription."""
|
||||
|
||||
text: str
|
||||
"""The accumulated transcript text."""
|
||||
|
||||
is_vad_end: bool = False
|
||||
"""True if VAD detected end of speech (silence). Use for auto-send."""
|
||||
|
||||
|
||||
class StreamingTranscriberProtocol(Protocol):
|
||||
"""Protocol for streaming transcription sessions."""
|
||||
|
||||
async def send_audio(self, chunk: bytes) -> None:
|
||||
"""Send an audio chunk for transcription."""
|
||||
...
|
||||
|
||||
async def receive_transcript(self) -> TranscriptResult | None:
|
||||
"""
|
||||
Receive next transcript update.
|
||||
|
||||
Returns:
|
||||
TranscriptResult with accumulated text and VAD status, or None when stream ends.
|
||||
"""
|
||||
...
|
||||
|
||||
async def close(self) -> str:
|
||||
"""Close the session and return final transcript."""
|
||||
...
|
||||
|
||||
def reset_transcript(self) -> None:
|
||||
"""Reset accumulated transcript. Call after auto-send to start fresh."""
|
||||
...
|
||||
|
||||
|
||||
class StreamingSynthesizerProtocol(Protocol):
|
||||
"""Protocol for streaming TTS sessions (real-time text-to-speech)."""
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish connection to TTS provider."""
|
||||
...
|
||||
|
||||
async def send_text(self, text: str) -> None:
|
||||
"""Send text to be synthesized."""
|
||||
...
|
||||
|
||||
async def receive_audio(self) -> bytes | None:
|
||||
"""
|
||||
Receive next audio chunk.
|
||||
|
||||
Returns:
|
||||
Audio bytes, or None when stream ends.
|
||||
"""
|
||||
...
|
||||
|
||||
async def flush(self) -> None:
|
||||
"""Signal end of text input and wait for pending audio."""
|
||||
...
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the session."""
|
||||
...
|
||||
|
||||
|
||||
class VoiceProviderInterface(ABC):
|
||||
"""Abstract base class for voice providers (STT and TTS)."""
|
||||
|
||||
@abstractmethod
|
||||
async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
|
||||
"""
|
||||
Convert audio to text (Speech-to-Text).
|
||||
|
||||
Args:
|
||||
audio_data: Raw audio bytes
|
||||
audio_format: Audio format (e.g., "webm", "wav", "mp3")
|
||||
|
||||
Returns:
|
||||
Transcribed text
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def synthesize_stream(
|
||||
self, text: str, voice: str | None = None, speed: float = 1.0
|
||||
) -> AsyncIterator[bytes]:
|
||||
"""
|
||||
Convert text to audio stream (Text-to-Speech).
|
||||
|
||||
Streams audio chunks progressively for lower latency playback.
|
||||
|
||||
Args:
|
||||
text: Text to convert to speech
|
||||
voice: Voice identifier (e.g., "alloy", "echo"), or None for default
|
||||
speed: Playback speed multiplier (0.25 to 4.0)
|
||||
|
||||
Yields:
|
||||
Audio data chunks
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def validate_credentials(self) -> None:
|
||||
"""
|
||||
Validate that the provider credentials are correct by making a
|
||||
lightweight API call. Raises on failure.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_available_voices(self) -> list[dict[str, str]]:
|
||||
"""
|
||||
Get list of available voices for this provider.
|
||||
|
||||
Returns:
|
||||
List of voice dictionaries with 'id' and 'name' keys
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_available_stt_models(self) -> list[dict[str, str]]:
|
||||
"""
|
||||
Get list of available STT models for this provider.
|
||||
|
||||
Returns:
|
||||
List of model dictionaries with 'id' and 'name' keys
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_available_tts_models(self) -> list[dict[str, str]]:
|
||||
"""
|
||||
Get list of available TTS models for this provider.
|
||||
|
||||
Returns:
|
||||
List of model dictionaries with 'id' and 'name' keys
|
||||
"""
|
||||
|
||||
def supports_streaming_stt(self) -> bool:
|
||||
"""Returns True if this provider supports streaming STT."""
|
||||
return False
|
||||
|
||||
def supports_streaming_tts(self) -> bool:
|
||||
"""Returns True if this provider supports real-time streaming TTS."""
|
||||
return False
|
||||
|
||||
async def create_streaming_transcriber(
|
||||
self, audio_format: str = "webm"
|
||||
) -> StreamingTranscriberProtocol:
|
||||
"""
|
||||
Create a streaming transcription session.
|
||||
|
||||
Args:
|
||||
audio_format: Audio format being sent (e.g., "webm", "pcm16")
|
||||
|
||||
Returns:
|
||||
A streaming transcriber that can send audio chunks and receive transcripts
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If streaming STT is not supported
|
||||
"""
|
||||
raise NotImplementedError("Streaming STT not supported by this provider")
|
||||
|
||||
async def create_streaming_synthesizer(
|
||||
self, voice: str | None = None, speed: float = 1.0
|
||||
) -> "StreamingSynthesizerProtocol":
|
||||
"""
|
||||
Create a streaming TTS session for real-time audio synthesis.
|
||||
|
||||
Args:
|
||||
voice: Voice identifier
|
||||
speed: Playback speed multiplier
|
||||
|
||||
Returns:
|
||||
A streaming synthesizer that can send text and receive audio chunks
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If streaming TTS is not supported
|
||||
"""
|
||||
raise NotImplementedError("Streaming TTS not supported by this provider")
|
||||
@@ -1,626 +0,0 @@
|
||||
"""Azure Speech Services voice provider for STT and TTS.
|
||||
|
||||
Azure supports:
|
||||
- **STT**: Batch transcription via REST API (audio/wav POST) and real-time
|
||||
streaming via the Azure Speech SDK (push audio stream with continuous
|
||||
recognition). The SDK handles VAD natively through its recognizing/recognized
|
||||
events.
|
||||
- **TTS**: SSML-based synthesis via REST API (streaming response) and real-time
|
||||
synthesis via the Speech SDK. Text is escaped with ``xml.sax.saxutils.escape``
|
||||
and attributes with ``quoteattr`` to prevent SSML injection.
|
||||
|
||||
Both modes support Azure cloud endpoints (region-based URLs) and self-hosted
|
||||
Speech containers (custom endpoint URLs). The ``speech_region`` is validated to
|
||||
contain only ``[a-z0-9-]`` to prevent URL injection.
|
||||
|
||||
The Azure Speech SDK (``azure-cognitiveservices-speech``) is an optional C
|
||||
extension dependency — it is imported lazily inside streaming methods so the
|
||||
provider can still be instantiated and used for REST-based operations without it.
|
||||
|
||||
See https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/
|
||||
for API reference.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import re
|
||||
import struct
|
||||
import wave
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
from xml.sax.saxutils import escape
|
||||
from xml.sax.saxutils import quoteattr
|
||||
|
||||
import aiohttp
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.voice.interface import StreamingSynthesizerProtocol
|
||||
from onyx.voice.interface import StreamingTranscriberProtocol
|
||||
from onyx.voice.interface import TranscriptResult
|
||||
from onyx.voice.interface import VoiceProviderInterface
|
||||
|
||||
# SSML namespace — W3C standard for Speech Synthesis Markup Language.
|
||||
# This is a fixed W3C specification and will not change.
|
||||
SSML_NAMESPACE = "http://www.w3.org/2001/10/synthesis"
|
||||
|
||||
# Common Azure Neural voices
|
||||
AZURE_VOICES = [
|
||||
{"id": "en-US-JennyNeural", "name": "Jenny (en-US, Female)"},
|
||||
{"id": "en-US-GuyNeural", "name": "Guy (en-US, Male)"},
|
||||
{"id": "en-US-AriaNeural", "name": "Aria (en-US, Female)"},
|
||||
{"id": "en-US-DavisNeural", "name": "Davis (en-US, Male)"},
|
||||
{"id": "en-US-AmberNeural", "name": "Amber (en-US, Female)"},
|
||||
{"id": "en-US-AnaNeural", "name": "Ana (en-US, Female)"},
|
||||
{"id": "en-US-BrandonNeural", "name": "Brandon (en-US, Male)"},
|
||||
{"id": "en-US-ChristopherNeural", "name": "Christopher (en-US, Male)"},
|
||||
{"id": "en-US-CoraNeural", "name": "Cora (en-US, Female)"},
|
||||
{"id": "en-GB-SoniaNeural", "name": "Sonia (en-GB, Female)"},
|
||||
{"id": "en-GB-RyanNeural", "name": "Ryan (en-GB, Male)"},
|
||||
]
|
||||
|
||||
|
||||
class AzureStreamingTranscriber(StreamingTranscriberProtocol):
|
||||
"""Streaming transcription using Azure Speech SDK."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
region: str | None = None,
|
||||
endpoint: str | None = None,
|
||||
input_sample_rate: int = 24000,
|
||||
target_sample_rate: int = 16000,
|
||||
):
|
||||
self.api_key = api_key
|
||||
self.region = region
|
||||
self.endpoint = endpoint
|
||||
self.input_sample_rate = input_sample_rate
|
||||
self.target_sample_rate = target_sample_rate
|
||||
self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()
|
||||
self._accumulated_transcript = ""
|
||||
self._recognizer: Any = None
|
||||
self._audio_stream: Any = None
|
||||
self._closed = False
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Initialize Azure Speech recognizer with push stream."""
|
||||
try:
|
||||
import azure.cognitiveservices.speech as speechsdk # type: ignore
|
||||
except ImportError as e:
|
||||
raise RuntimeError(
|
||||
"Azure Speech SDK is required for streaming STT. "
|
||||
"Install `azure-cognitiveservices-speech`."
|
||||
) from e
|
||||
|
||||
self._loop = asyncio.get_running_loop()
|
||||
|
||||
# Use endpoint for self-hosted containers, region for Azure cloud
|
||||
if self.endpoint:
|
||||
speech_config = speechsdk.SpeechConfig(
|
||||
subscription=self.api_key,
|
||||
endpoint=self.endpoint,
|
||||
)
|
||||
else:
|
||||
speech_config = speechsdk.SpeechConfig(
|
||||
subscription=self.api_key,
|
||||
region=self.region,
|
||||
)
|
||||
|
||||
audio_format = speechsdk.audio.AudioStreamFormat(
|
||||
samples_per_second=16000,
|
||||
bits_per_sample=16,
|
||||
channels=1,
|
||||
)
|
||||
self._audio_stream = speechsdk.audio.PushAudioInputStream(audio_format)
|
||||
audio_config = speechsdk.audio.AudioConfig(stream=self._audio_stream)
|
||||
|
||||
self._recognizer = speechsdk.SpeechRecognizer(
|
||||
speech_config=speech_config,
|
||||
audio_config=audio_config,
|
||||
)
|
||||
|
||||
transcriber = self
|
||||
|
||||
def on_recognizing(evt: Any) -> None:
|
||||
if evt.result.text and transcriber._loop and not transcriber._closed:
|
||||
full_text = transcriber._accumulated_transcript
|
||||
if full_text:
|
||||
full_text += " " + evt.result.text
|
||||
else:
|
||||
full_text = evt.result.text
|
||||
transcriber._loop.call_soon_threadsafe(
|
||||
transcriber._transcript_queue.put_nowait,
|
||||
TranscriptResult(text=full_text, is_vad_end=False),
|
||||
)
|
||||
|
||||
def on_recognized(evt: Any) -> None:
|
||||
if evt.result.text and transcriber._loop and not transcriber._closed:
|
||||
if transcriber._accumulated_transcript:
|
||||
transcriber._accumulated_transcript += " " + evt.result.text
|
||||
else:
|
||||
transcriber._accumulated_transcript = evt.result.text
|
||||
transcriber._loop.call_soon_threadsafe(
|
||||
transcriber._transcript_queue.put_nowait,
|
||||
TranscriptResult(
|
||||
text=transcriber._accumulated_transcript, is_vad_end=True
|
||||
),
|
||||
)
|
||||
|
||||
self._recognizer.recognizing.connect(on_recognizing)
|
||||
self._recognizer.recognized.connect(on_recognized)
|
||||
self._recognizer.start_continuous_recognition_async()
|
||||
|
||||
async def send_audio(self, chunk: bytes) -> None:
|
||||
"""Send audio chunk to Azure."""
|
||||
if self._audio_stream and not self._closed:
|
||||
self._audio_stream.write(self._resample_pcm16(chunk))
|
||||
|
||||
def _resample_pcm16(self, data: bytes) -> bytes:
|
||||
"""Resample PCM16 audio from input_sample_rate to target_sample_rate."""
|
||||
if self.input_sample_rate == self.target_sample_rate:
|
||||
return data
|
||||
|
||||
num_samples = len(data) // 2
|
||||
if num_samples == 0:
|
||||
return b""
|
||||
|
||||
samples = list(struct.unpack(f"<{num_samples}h", data))
|
||||
ratio = self.input_sample_rate / self.target_sample_rate
|
||||
new_length = int(num_samples / ratio)
|
||||
|
||||
resampled: list[int] = []
|
||||
for i in range(new_length):
|
||||
src_idx = i * ratio
|
||||
idx_floor = int(src_idx)
|
||||
idx_ceil = min(idx_floor + 1, num_samples - 1)
|
||||
frac = src_idx - idx_floor
|
||||
sample = int(samples[idx_floor] * (1 - frac) + samples[idx_ceil] * frac)
|
||||
sample = max(-32768, min(32767, sample))
|
||||
resampled.append(sample)
|
||||
|
||||
return struct.pack(f"<{len(resampled)}h", *resampled)
|
||||
|
||||
async def receive_transcript(self) -> TranscriptResult | None:
|
||||
"""Receive next transcript."""
|
||||
try:
|
||||
return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)
|
||||
except asyncio.TimeoutError:
|
||||
return TranscriptResult(text="", is_vad_end=False)
|
||||
|
||||
async def close(self) -> str:
|
||||
"""Stop recognition and return final transcript."""
|
||||
self._closed = True
|
||||
if self._recognizer:
|
||||
self._recognizer.stop_continuous_recognition_async()
|
||||
if self._audio_stream:
|
||||
self._audio_stream.close()
|
||||
self._loop = None
|
||||
return self._accumulated_transcript
|
||||
|
||||
def reset_transcript(self) -> None:
|
||||
"""Reset accumulated transcript."""
|
||||
self._accumulated_transcript = ""
|
||||
|
||||
|
||||
class AzureStreamingSynthesizer(StreamingSynthesizerProtocol):
|
||||
"""Real-time streaming TTS using Azure Speech SDK."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
region: str | None = None,
|
||||
endpoint: str | None = None,
|
||||
voice: str = "en-US-JennyNeural",
|
||||
speed: float = 1.0,
|
||||
):
|
||||
self._logger = setup_logger()
|
||||
self.api_key = api_key
|
||||
self.region = region
|
||||
self.endpoint = endpoint
|
||||
self.voice = voice
|
||||
self.speed = max(0.5, min(2.0, speed))
|
||||
self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
||||
self._synthesizer: Any = None
|
||||
self._closed = False
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Initialize Azure Speech synthesizer with push stream."""
|
||||
try:
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
except ImportError as e:
|
||||
raise RuntimeError(
|
||||
"Azure Speech SDK is required for streaming TTS. "
|
||||
"Install `azure-cognitiveservices-speech`."
|
||||
) from e
|
||||
|
||||
self._logger.info("AzureStreamingSynthesizer: connecting")
|
||||
|
||||
# Store the event loop for thread-safe queue operations
|
||||
self._loop = asyncio.get_running_loop()
|
||||
|
||||
# Use endpoint for self-hosted containers, region for Azure cloud
|
||||
if self.endpoint:
|
||||
speech_config = speechsdk.SpeechConfig(
|
||||
subscription=self.api_key,
|
||||
endpoint=self.endpoint,
|
||||
)
|
||||
else:
|
||||
speech_config = speechsdk.SpeechConfig(
|
||||
subscription=self.api_key,
|
||||
region=self.region,
|
||||
)
|
||||
speech_config.speech_synthesis_voice_name = self.voice
|
||||
# Use MP3 format for streaming - compatible with MediaSource Extensions
|
||||
speech_config.set_speech_synthesis_output_format(
|
||||
speechsdk.SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3
|
||||
)
|
||||
|
||||
# Create synthesizer with pull audio output stream
|
||||
self._synthesizer = speechsdk.SpeechSynthesizer(
|
||||
speech_config=speech_config,
|
||||
audio_config=None, # We'll manually handle audio
|
||||
)
|
||||
|
||||
# Connect to synthesis events
|
||||
self._synthesizer.synthesizing.connect(self._on_synthesizing)
|
||||
self._synthesizer.synthesis_completed.connect(self._on_completed)
|
||||
|
||||
self._logger.info("AzureStreamingSynthesizer: connected")
|
||||
|
||||
def _on_synthesizing(self, evt: Any) -> None:
|
||||
"""Called when audio chunk is available (runs in Azure SDK thread)."""
|
||||
if evt.result.audio_data and self._loop and not self._closed:
|
||||
# Thread-safe way to put item in async queue
|
||||
self._loop.call_soon_threadsafe(
|
||||
self._audio_queue.put_nowait, evt.result.audio_data
|
||||
)
|
||||
|
||||
def _on_completed(self, _evt: Any) -> None:
|
||||
"""Called when synthesis is complete (runs in Azure SDK thread)."""
|
||||
if self._loop and not self._closed:
|
||||
self._loop.call_soon_threadsafe(self._audio_queue.put_nowait, None)
|
||||
|
||||
async def send_text(self, text: str) -> None:
|
||||
"""Send text to be synthesized using SSML for prosody control."""
|
||||
if self._synthesizer and not self._closed:
|
||||
# Build SSML with prosody for speed control
|
||||
rate = f"{int((self.speed - 1) * 100):+d}%"
|
||||
escaped_text = escape(text)
|
||||
ssml = f"""<speak version='1.0' xmlns='{SSML_NAMESPACE}' xml:lang='en-US'>
|
||||
<voice name={quoteattr(self.voice)}>
|
||||
<prosody rate='{rate}'>{escaped_text}</prosody>
|
||||
</voice>
|
||||
</speak>"""
|
||||
# Use speak_ssml_async for SSML support (includes speed/prosody)
|
||||
self._synthesizer.speak_ssml_async(ssml)
|
||||
|
||||
async def receive_audio(self) -> bytes | None:
|
||||
"""Receive next audio chunk."""
|
||||
try:
|
||||
return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)
|
||||
except asyncio.TimeoutError:
|
||||
return b"" # No audio yet, but not done
|
||||
|
||||
async def flush(self) -> None:
|
||||
"""Signal end of text input - wait for pending audio."""
|
||||
# Azure SDK handles flushing automatically
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the session."""
|
||||
self._closed = True
|
||||
if self._synthesizer:
|
||||
self._synthesizer.synthesis_completed.disconnect_all()
|
||||
self._synthesizer.synthesizing.disconnect_all()
|
||||
self._loop = None
|
||||
|
||||
|
||||
class AzureVoiceProvider(VoiceProviderInterface):
|
||||
"""Azure Speech Services voice provider."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None,
|
||||
api_base: str | None,
|
||||
custom_config: dict[str, Any],
|
||||
stt_model: str | None = None,
|
||||
tts_model: str | None = None,
|
||||
default_voice: str | None = None,
|
||||
):
|
||||
self.api_key = api_key
|
||||
self.api_base = api_base
|
||||
self.custom_config = custom_config
|
||||
raw_speech_region = (
|
||||
custom_config.get("speech_region")
|
||||
or self._extract_speech_region_from_uri(api_base)
|
||||
or ""
|
||||
)
|
||||
self.speech_region = self._validate_speech_region(raw_speech_region)
|
||||
self.stt_model = stt_model
|
||||
self.tts_model = tts_model
|
||||
self.default_voice = default_voice or "en-US-JennyNeural"
|
||||
|
||||
@staticmethod
|
||||
def _is_azure_cloud_url(uri: str | None) -> bool:
|
||||
"""Check if URI is an Azure cloud endpoint (vs custom/self-hosted)."""
|
||||
if not uri:
|
||||
return False
|
||||
try:
|
||||
hostname = (urlparse(uri).hostname or "").lower()
|
||||
except ValueError:
|
||||
return False
|
||||
return hostname.endswith(
|
||||
(
|
||||
".speech.microsoft.com",
|
||||
".api.cognitive.microsoft.com",
|
||||
".cognitiveservices.azure.com",
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_speech_region_from_uri(uri: str | None) -> str | None:
|
||||
"""Extract Azure speech region from endpoint URI.
|
||||
|
||||
Note: Custom domains (*.cognitiveservices.azure.com) contain the resource
|
||||
name, not the region. For custom domains, the region must be specified
|
||||
explicitly via custom_config["speech_region"].
|
||||
"""
|
||||
if not uri:
|
||||
return None
|
||||
# Accepted examples:
|
||||
# - https://eastus.tts.speech.microsoft.com/cognitiveservices/v1
|
||||
# - https://eastus.stt.speech.microsoft.com/speech/recognition/...
|
||||
# - https://westus.api.cognitive.microsoft.com/
|
||||
#
|
||||
# NOT supported (requires explicit speech_region config):
|
||||
# - https://<resource>.cognitiveservices.azure.com/ (resource name != region)
|
||||
try:
|
||||
hostname = (urlparse(uri).hostname or "").lower()
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
stt_tts_match = re.match(
|
||||
r"^([a-z0-9-]+)\.(?:tts|stt)\.speech\.microsoft\.com$", hostname
|
||||
)
|
||||
if stt_tts_match:
|
||||
return stt_tts_match.group(1)
|
||||
|
||||
api_match = re.match(
|
||||
r"^([a-z0-9-]+)\.api\.cognitive\.microsoft\.com$", hostname
|
||||
)
|
||||
if api_match:
|
||||
return api_match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _validate_speech_region(speech_region: str) -> str:
|
||||
normalized_region = speech_region.strip().lower()
|
||||
if not normalized_region:
|
||||
return ""
|
||||
if not re.fullmatch(r"[a-z0-9-]+", normalized_region):
|
||||
raise ValueError(
|
||||
"Invalid Azure speech_region. Use lowercase letters, digits, and hyphens only."
|
||||
)
|
||||
return normalized_region
|
||||
|
||||
def _get_stt_url(self) -> str:
|
||||
"""Get the STT endpoint URL (auto-detects cloud vs self-hosted)."""
|
||||
if self.api_base and not self._is_azure_cloud_url(self.api_base):
|
||||
# Self-hosted container endpoint
|
||||
return f"{self.api_base.rstrip('/')}/speech/recognition/conversation/cognitiveservices/v1"
|
||||
# Azure cloud endpoint
|
||||
return (
|
||||
f"https://{self.speech_region}.stt.speech.microsoft.com/"
|
||||
"speech/recognition/conversation/cognitiveservices/v1"
|
||||
)
|
||||
|
||||
def _get_tts_url(self) -> str:
|
||||
"""Get the TTS endpoint URL (auto-detects cloud vs self-hosted)."""
|
||||
if self.api_base and not self._is_azure_cloud_url(self.api_base):
|
||||
# Self-hosted container endpoint
|
||||
return f"{self.api_base.rstrip('/')}/cognitiveservices/v1"
|
||||
# Azure cloud endpoint
|
||||
return f"https://{self.speech_region}.tts.speech.microsoft.com/cognitiveservices/v1"
|
||||
|
||||
def _is_self_hosted(self) -> bool:
|
||||
"""Check if using self-hosted container vs Azure cloud."""
|
||||
return bool(self.api_base and not self._is_azure_cloud_url(self.api_base))
|
||||
|
||||
@staticmethod
|
||||
def _pcm16_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:
|
||||
"""Wrap raw PCM16 mono bytes into a WAV container."""
|
||||
buffer = io.BytesIO()
|
||||
with wave.open(buffer, "wb") as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(sample_rate)
|
||||
wav_file.writeframes(pcm_data)
|
||||
return buffer.getvalue()
|
||||
|
||||
async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
|
||||
if not self.api_key:
|
||||
raise ValueError("Azure API key required for STT")
|
||||
if not self._is_self_hosted() and not self.speech_region:
|
||||
raise ValueError("Azure speech region required for STT (cloud mode)")
|
||||
|
||||
normalized_format = audio_format.lower()
|
||||
payload = audio_data
|
||||
content_type = f"audio/{normalized_format}"
|
||||
|
||||
# WebSocket chunked fallback sends raw PCM16 bytes.
|
||||
if normalized_format in {"pcm", "pcm16", "raw"}:
|
||||
payload = self._pcm16_to_wav(audio_data, sample_rate=24000)
|
||||
content_type = "audio/wav"
|
||||
elif normalized_format in {"wav", "wave"}:
|
||||
content_type = "audio/wav"
|
||||
elif normalized_format == "webm":
|
||||
content_type = "audio/webm; codecs=opus"
|
||||
|
||||
url = self._get_stt_url()
|
||||
params = {"language": "en-US", "format": "detailed"}
|
||||
headers = {
|
||||
"Ocp-Apim-Subscription-Key": self.api_key,
|
||||
"Content-Type": content_type,
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
url, params=params, headers=headers, data=payload
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
raise RuntimeError(f"Azure STT failed: {error_text}")
|
||||
result = await response.json()
|
||||
|
||||
if result.get("RecognitionStatus") != "Success":
|
||||
return ""
|
||||
nbest = result.get("NBest") or []
|
||||
if nbest and isinstance(nbest, list):
|
||||
display = nbest[0].get("Display")
|
||||
if isinstance(display, str):
|
||||
return display
|
||||
display_text = result.get("DisplayText", "")
|
||||
return display_text if isinstance(display_text, str) else ""
|
||||
|
||||
async def synthesize_stream(
|
||||
self, text: str, voice: str | None = None, speed: float = 1.0
|
||||
) -> AsyncIterator[bytes]:
|
||||
"""
|
||||
Convert text to audio using Azure TTS with streaming.
|
||||
|
||||
Args:
|
||||
text: Text to convert to speech
|
||||
voice: Voice name (defaults to provider's default voice)
|
||||
speed: Playback speed multiplier (0.5 to 2.0)
|
||||
|
||||
Yields:
|
||||
Audio data chunks (mp3 format)
|
||||
"""
|
||||
if not self.api_key:
|
||||
raise ValueError("Azure API key required for TTS")
|
||||
|
||||
if not self._is_self_hosted() and not self.speech_region:
|
||||
raise ValueError("Azure speech region required for TTS (cloud mode)")
|
||||
|
||||
voice_name = voice or self.default_voice
|
||||
|
||||
# Clamp speed to valid range and convert to rate format
|
||||
speed = max(0.5, min(2.0, speed))
|
||||
rate = f"{int((speed - 1) * 100):+d}%" # e.g., 1.0 -> "+0%", 1.5 -> "+50%"
|
||||
|
||||
# Build SSML with escaped text and quoted attributes to prevent injection
|
||||
escaped_text = escape(text)
|
||||
ssml = f"""<speak version='1.0' xmlns='{SSML_NAMESPACE}' xml:lang='en-US'>
|
||||
<voice name={quoteattr(voice_name)}>
|
||||
<prosody rate='{rate}'>{escaped_text}</prosody>
|
||||
</voice>
|
||||
</speak>"""
|
||||
|
||||
url = self._get_tts_url()
|
||||
|
||||
headers = {
|
||||
"Ocp-Apim-Subscription-Key": self.api_key,
|
||||
"Content-Type": "application/ssml+xml",
|
||||
"X-Microsoft-OutputFormat": "audio-16khz-128kbitrate-mono-mp3",
|
||||
"User-Agent": "Onyx",
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, data=ssml) as response:
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
raise RuntimeError(f"Azure TTS failed: {error_text}")
|
||||
|
||||
# Use 8192 byte chunks for smoother streaming
|
||||
async for chunk in response.content.iter_chunked(8192):
|
||||
if chunk:
|
||||
yield chunk
|
||||
|
||||
async def validate_credentials(self) -> None:
|
||||
"""Validate Azure credentials by listing available voices."""
|
||||
if not self.api_key:
|
||||
raise ValueError("Azure API key required")
|
||||
if not self._is_self_hosted() and not self.speech_region:
|
||||
raise ValueError("Azure speech region required (cloud mode)")
|
||||
|
||||
url = f"https://{self.speech_region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
|
||||
if self._is_self_hosted():
|
||||
url = f"{(self.api_base or '').rstrip('/')}/cognitiveservices/voices/list"
|
||||
|
||||
headers = {"Ocp-Apim-Subscription-Key": self.api_key}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Azure credential validation failed: {error_text}"
|
||||
)
|
||||
|
||||
def get_available_voices(self) -> list[dict[str, str]]:
|
||||
"""Return common Azure Neural voices."""
|
||||
return AZURE_VOICES.copy()
|
||||
|
||||
def get_available_stt_models(self) -> list[dict[str, str]]:
|
||||
return [
|
||||
{"id": "default", "name": "Azure Speech Recognition"},
|
||||
]
|
||||
|
||||
def get_available_tts_models(self) -> list[dict[str, str]]:
|
||||
return [
|
||||
{"id": "neural", "name": "Neural TTS"},
|
||||
]
|
||||
|
||||
def supports_streaming_stt(self) -> bool:
|
||||
"""Azure supports streaming STT via Speech SDK."""
|
||||
return True
|
||||
|
||||
def supports_streaming_tts(self) -> bool:
|
||||
"""Azure supports real-time streaming TTS via Speech SDK."""
|
||||
return True
|
||||
|
||||
async def create_streaming_transcriber(
|
||||
self, _audio_format: str = "webm"
|
||||
) -> AzureStreamingTranscriber:
|
||||
"""Create a streaming transcription session."""
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required for streaming transcription")
|
||||
if not self._is_self_hosted() and not self.speech_region:
|
||||
raise ValueError(
|
||||
"Speech region required for Azure streaming transcription (cloud mode)"
|
||||
)
|
||||
|
||||
# Use endpoint for self-hosted, region for cloud
|
||||
transcriber = AzureStreamingTranscriber(
|
||||
api_key=self.api_key,
|
||||
region=self.speech_region if not self._is_self_hosted() else None,
|
||||
endpoint=self.api_base if self._is_self_hosted() else None,
|
||||
input_sample_rate=24000,
|
||||
target_sample_rate=16000,
|
||||
)
|
||||
await transcriber.connect()
|
||||
return transcriber
|
||||
|
||||
async def create_streaming_synthesizer(
|
||||
self, voice: str | None = None, speed: float = 1.0
|
||||
) -> AzureStreamingSynthesizer:
|
||||
"""Create a streaming TTS session."""
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required for streaming TTS")
|
||||
if not self._is_self_hosted() and not self.speech_region:
|
||||
raise ValueError(
|
||||
"Speech region required for Azure streaming TTS (cloud mode)"
|
||||
)
|
||||
|
||||
# Use endpoint for self-hosted, region for cloud
|
||||
synthesizer = AzureStreamingSynthesizer(
|
||||
api_key=self.api_key,
|
||||
region=self.speech_region if not self._is_self_hosted() else None,
|
||||
endpoint=self.api_base if self._is_self_hosted() else None,
|
||||
voice=voice or self.default_voice or "en-US-JennyNeural",
|
||||
speed=speed,
|
||||
)
|
||||
await synthesizer.connect()
|
||||
return synthesizer
|
||||
@@ -1,858 +0,0 @@
|
||||
"""ElevenLabs voice provider for STT and TTS.
|
||||
|
||||
ElevenLabs supports:
|
||||
- **STT**: Scribe API (batch via REST, streaming via WebSocket with Scribe v2 Realtime).
|
||||
The streaming endpoint sends base64-encoded PCM16 audio chunks and receives JSON
|
||||
transcript messages (partial_transcript, committed_transcript, utterance_end).
|
||||
- **TTS**: Text-to-speech via REST streaming and WebSocket stream-input.
|
||||
The WebSocket variant accepts incremental text chunks and returns audio in order,
|
||||
enabling low-latency playback before the full text is available.
|
||||
|
||||
See https://elevenlabs.io/docs for API reference.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
from collections.abc import AsyncIterator
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
|
||||
from onyx.voice.interface import StreamingSynthesizerProtocol
|
||||
from onyx.voice.interface import StreamingTranscriberProtocol
|
||||
from onyx.voice.interface import TranscriptResult
|
||||
from onyx.voice.interface import VoiceProviderInterface
|
||||
|
||||
# Default ElevenLabs API base URL
|
||||
DEFAULT_ELEVENLABS_API_BASE = "https://api.elevenlabs.io"
|
||||
|
||||
# Default sample rates for STT streaming
|
||||
DEFAULT_INPUT_SAMPLE_RATE = 24000 # What the browser frontend sends
|
||||
DEFAULT_TARGET_SAMPLE_RATE = 16000 # What ElevenLabs Scribe expects
|
||||
|
||||
# Default streaming TTS output format
|
||||
DEFAULT_TTS_OUTPUT_FORMAT = "mp3_44100_64"
|
||||
|
||||
# Default TTS voice settings
|
||||
DEFAULT_VOICE_STABILITY = 0.5
|
||||
DEFAULT_VOICE_SIMILARITY_BOOST = 0.75
|
||||
|
||||
# Chunk length schedule for streaming TTS (optimized for real-time playback)
|
||||
DEFAULT_CHUNK_LENGTH_SCHEDULE = [120, 160, 250, 290]
|
||||
|
||||
# Default STT streaming VAD configuration
|
||||
DEFAULT_VAD_SILENCE_THRESHOLD_SECS = 1.0
|
||||
DEFAULT_VAD_THRESHOLD = 0.4
|
||||
DEFAULT_MIN_SPEECH_DURATION_MS = 100
|
||||
DEFAULT_MIN_SILENCE_DURATION_MS = 300
|
||||
|
||||
|
||||
class ElevenLabsSTTMessageType(StrEnum):
|
||||
"""Message types from ElevenLabs Scribe Realtime STT API."""
|
||||
|
||||
SESSION_STARTED = "session_started"
|
||||
PARTIAL_TRANSCRIPT = "partial_transcript"
|
||||
COMMITTED_TRANSCRIPT = "committed_transcript"
|
||||
UTTERANCE_END = "utterance_end"
|
||||
SESSION_ENDED = "session_ended"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class ElevenLabsTTSMessageType(StrEnum):
|
||||
"""Message types from ElevenLabs stream-input TTS API."""
|
||||
|
||||
AUDIO = "audio"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
def _http_to_ws_url(http_url: str) -> str:
|
||||
"""Convert http(s) URL to ws(s) URL for WebSocket connections."""
|
||||
if http_url.startswith("https://"):
|
||||
return "wss://" + http_url[8:]
|
||||
elif http_url.startswith("http://"):
|
||||
return "ws://" + http_url[7:]
|
||||
return http_url
|
||||
|
||||
|
||||
# Common ElevenLabs voices
|
||||
ELEVENLABS_VOICES = [
|
||||
{"id": "21m00Tcm4TlvDq8ikWAM", "name": "Rachel"},
|
||||
{"id": "AZnzlk1XvdvUeBnXmlld", "name": "Domi"},
|
||||
{"id": "EXAVITQu4vr4xnSDxMaL", "name": "Bella"},
|
||||
{"id": "ErXwobaYiN019PkySvjV", "name": "Antoni"},
|
||||
{"id": "MF3mGyEYCl7XYWbV9V6O", "name": "Elli"},
|
||||
{"id": "TxGEqnHWrfWFTfGW9XjX", "name": "Josh"},
|
||||
{"id": "VR6AewLTigWG4xSOukaG", "name": "Arnold"},
|
||||
{"id": "pNInz6obpgDQGcFmaJgB", "name": "Adam"},
|
||||
{"id": "yoZ06aMxZJJ28mfd3POQ", "name": "Sam"},
|
||||
]
|
||||
|
||||
|
||||
class ElevenLabsStreamingTranscriber(StreamingTranscriberProtocol):
|
||||
"""Streaming transcription session using ElevenLabs Scribe Realtime API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
model: str = "scribe_v2_realtime",
|
||||
input_sample_rate: int = DEFAULT_INPUT_SAMPLE_RATE,
|
||||
target_sample_rate: int = DEFAULT_TARGET_SAMPLE_RATE,
|
||||
language_code: str = "en",
|
||||
api_base: str | None = None,
|
||||
):
|
||||
# Import logger first
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
self._logger = setup_logger()
|
||||
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: initializing with model {model}"
|
||||
)
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.input_sample_rate = input_sample_rate
|
||||
self.target_sample_rate = target_sample_rate
|
||||
self.language_code = language_code
|
||||
self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE
|
||||
self._ws: aiohttp.ClientWebSocketResponse | None = None
|
||||
self._session: aiohttp.ClientSession | None = None
|
||||
self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()
|
||||
self._final_transcript = ""
|
||||
self._receive_task: asyncio.Task | None = None
|
||||
self._closed = False
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish WebSocket connection to ElevenLabs."""
|
||||
self._logger.info(
|
||||
"ElevenLabsStreamingTranscriber: connecting to ElevenLabs API"
|
||||
)
|
||||
self._session = aiohttp.ClientSession()
|
||||
|
||||
# VAD is configured via query parameters.
|
||||
# commit_strategy=vad enables automatic transcript commit on silence detection.
|
||||
# These params are part of the ElevenLabs Scribe Realtime API contract:
|
||||
# https://elevenlabs.io/docs/api-reference/speech-to-text/realtime
|
||||
ws_base = _http_to_ws_url(self.api_base.rstrip("/"))
|
||||
url = (
|
||||
f"{ws_base}/v1/speech-to-text/realtime"
|
||||
f"?model_id={self.model}"
|
||||
f"&sample_rate={self.target_sample_rate}"
|
||||
f"&language_code={self.language_code}"
|
||||
f"&commit_strategy=vad"
|
||||
f"&vad_silence_threshold_secs={DEFAULT_VAD_SILENCE_THRESHOLD_SECS}"
|
||||
f"&vad_threshold={DEFAULT_VAD_THRESHOLD}"
|
||||
f"&min_speech_duration_ms={DEFAULT_MIN_SPEECH_DURATION_MS}"
|
||||
f"&min_silence_duration_ms={DEFAULT_MIN_SILENCE_DURATION_MS}"
|
||||
)
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: connecting to {url} "
|
||||
f"(input={self.input_sample_rate}Hz, target={self.target_sample_rate}Hz)"
|
||||
)
|
||||
|
||||
try:
|
||||
self._ws = await self._session.ws_connect(
|
||||
url,
|
||||
headers={"xi-api-key": self.api_key},
|
||||
)
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: connected successfully, "
|
||||
f"ws.closed={self._ws.closed}, close_code={self._ws.close_code}"
|
||||
)
|
||||
except Exception as e:
|
||||
self._logger.error(
|
||||
f"ElevenLabsStreamingTranscriber: failed to connect: {e}"
|
||||
)
|
||||
if self._session:
|
||||
await self._session.close()
|
||||
raise
|
||||
|
||||
# Start receiving transcripts in background
|
||||
self._receive_task = asyncio.create_task(self._receive_loop())
|
||||
|
||||
async def _receive_loop(self) -> None:
|
||||
"""Background task to receive transcripts from WebSocket."""
|
||||
self._logger.info("ElevenLabsStreamingTranscriber: receive loop started")
|
||||
if not self._ws:
|
||||
self._logger.warning(
|
||||
"ElevenLabsStreamingTranscriber: no WebSocket connection"
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
async for msg in self._ws:
|
||||
self._logger.debug(
|
||||
f"ElevenLabsStreamingTranscriber: raw message type: {msg.type}"
|
||||
)
|
||||
if msg.type == aiohttp.WSMsgType.TEXT:
|
||||
parsed_data: Any = None
|
||||
data: dict[str, Any]
|
||||
try:
|
||||
parsed_data = json.loads(msg.data)
|
||||
except json.JSONDecodeError:
|
||||
self._logger.error(
|
||||
f"ElevenLabsStreamingTranscriber: failed to parse JSON: {msg.data[:200]}"
|
||||
)
|
||||
continue
|
||||
if not isinstance(parsed_data, dict):
|
||||
self._logger.error(
|
||||
"ElevenLabsStreamingTranscriber: expected object JSON payload"
|
||||
)
|
||||
continue
|
||||
data = parsed_data
|
||||
|
||||
# ElevenLabs uses message_type field - fail fast if missing
|
||||
if "message_type" not in data and "type" not in data:
|
||||
self._logger.error(
|
||||
f"ElevenLabsStreamingTranscriber: malformed packet missing 'message_type' field: {data}"
|
||||
)
|
||||
continue
|
||||
msg_type = data.get("message_type", data.get("type", ""))
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: received message_type: '{msg_type}', data keys: {list(data.keys())}"
|
||||
)
|
||||
# Check for error in various formats
|
||||
if "error" in data or msg_type == ElevenLabsSTTMessageType.ERROR:
|
||||
error_msg = data.get("error", data.get("message", data))
|
||||
self._logger.error(
|
||||
f"ElevenLabsStreamingTranscriber: API error: {error_msg}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Handle message types from ElevenLabs Scribe Realtime API.
|
||||
# See https://elevenlabs.io/docs/api-reference/speech-to-text/realtime
|
||||
if msg_type == ElevenLabsSTTMessageType.SESSION_STARTED:
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: session started, "
|
||||
f"id={data.get('session_id')}, config={data.get('config')}"
|
||||
)
|
||||
elif msg_type == ElevenLabsSTTMessageType.PARTIAL_TRANSCRIPT:
|
||||
# Interim result — updated as more audio is processed
|
||||
text = data.get("text", "")
|
||||
if text:
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: partial_transcript: {text[:50]}..."
|
||||
)
|
||||
self._final_transcript = text
|
||||
await self._transcript_queue.put(
|
||||
TranscriptResult(text=text, is_vad_end=False)
|
||||
)
|
||||
elif msg_type == ElevenLabsSTTMessageType.COMMITTED_TRANSCRIPT:
|
||||
# Final transcript for the current utterance (VAD detected end)
|
||||
text = data.get("text", "")
|
||||
if text:
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: committed_transcript: {text[:50]}..."
|
||||
)
|
||||
self._final_transcript = text
|
||||
await self._transcript_queue.put(
|
||||
TranscriptResult(text=text, is_vad_end=True)
|
||||
)
|
||||
elif msg_type == ElevenLabsSTTMessageType.UTTERANCE_END:
|
||||
# VAD detected end of speech (may carry text or be empty)
|
||||
text = data.get("text", "") or self._final_transcript
|
||||
if text:
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: utterance_end: {text[:50]}..."
|
||||
)
|
||||
self._final_transcript = text
|
||||
await self._transcript_queue.put(
|
||||
TranscriptResult(text=text, is_vad_end=True)
|
||||
)
|
||||
elif msg_type == ElevenLabsSTTMessageType.SESSION_ENDED:
|
||||
self._logger.info(
|
||||
"ElevenLabsStreamingTranscriber: session ended"
|
||||
)
|
||||
break
|
||||
else:
|
||||
# Log unhandled message types with full data for debugging
|
||||
self._logger.warning(
|
||||
f"ElevenLabsStreamingTranscriber: unhandled message_type: {msg_type}, full data: {data}"
|
||||
)
|
||||
elif msg.type == aiohttp.WSMsgType.BINARY:
|
||||
self._logger.debug(
|
||||
f"ElevenLabsStreamingTranscriber: received binary message: {len(msg.data)} bytes"
|
||||
)
|
||||
elif msg.type == aiohttp.WSMsgType.CLOSED:
|
||||
close_code = self._ws.close_code if self._ws else "N/A"
|
||||
self._logger.info(
|
||||
"ElevenLabsStreamingTranscriber: WebSocket closed by "
|
||||
f"server, close_code={close_code}"
|
||||
)
|
||||
break
|
||||
elif msg.type == aiohttp.WSMsgType.ERROR:
|
||||
self._logger.error(
|
||||
f"ElevenLabsStreamingTranscriber: WebSocket error: {self._ws.exception() if self._ws else 'N/A'}"
|
||||
)
|
||||
break
|
||||
elif msg.type == aiohttp.WSMsgType.CLOSE:
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: WebSocket CLOSE frame received, data={msg.data}, extra={msg.extra}"
|
||||
)
|
||||
break
|
||||
except Exception as e:
|
||||
self._logger.error(
|
||||
f"ElevenLabsStreamingTranscriber: error in receive loop: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
finally:
|
||||
close_code = self._ws.close_code if self._ws else "N/A"
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingTranscriber: receive loop ended, close_code={close_code}"
|
||||
)
|
||||
await self._transcript_queue.put(None) # Signal end
|
||||
|
||||
def _resample_pcm16(self, data: bytes) -> bytes:
|
||||
"""Resample PCM16 audio from input_sample_rate to target_sample_rate."""
|
||||
import struct
|
||||
|
||||
if self.input_sample_rate == self.target_sample_rate:
|
||||
return data
|
||||
|
||||
# Parse int16 samples
|
||||
num_samples = len(data) // 2
|
||||
samples = list(struct.unpack(f"<{num_samples}h", data))
|
||||
|
||||
# Calculate resampling ratio
|
||||
ratio = self.input_sample_rate / self.target_sample_rate
|
||||
new_length = int(num_samples / ratio)
|
||||
|
||||
# Linear interpolation resampling
|
||||
resampled = []
|
||||
for i in range(new_length):
|
||||
src_idx = i * ratio
|
||||
idx_floor = int(src_idx)
|
||||
idx_ceil = min(idx_floor + 1, num_samples - 1)
|
||||
frac = src_idx - idx_floor
|
||||
sample = int(samples[idx_floor] * (1 - frac) + samples[idx_ceil] * frac)
|
||||
# Clamp to int16 range
|
||||
sample = max(-32768, min(32767, sample))
|
||||
resampled.append(sample)
|
||||
|
||||
return struct.pack(f"<{len(resampled)}h", *resampled)
|
||||
|
||||
async def send_audio(self, chunk: bytes) -> None:
|
||||
"""Send an audio chunk for transcription."""
|
||||
if not self._ws:
|
||||
self._logger.warning("send_audio: no WebSocket connection")
|
||||
return
|
||||
if self._closed:
|
||||
self._logger.warning("send_audio: transcriber is closed")
|
||||
return
|
||||
if self._ws.closed:
|
||||
self._logger.warning(
|
||||
f"send_audio: WebSocket is closed, close_code={self._ws.close_code}"
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
# Resample from input rate (24kHz) to target rate (16kHz)
|
||||
resampled = self._resample_pcm16(chunk)
|
||||
# ElevenLabs expects input_audio_chunk message format with audio_base_64
|
||||
audio_b64 = base64.b64encode(resampled).decode("utf-8")
|
||||
message = {
|
||||
"message_type": "input_audio_chunk",
|
||||
"audio_base_64": audio_b64,
|
||||
"sample_rate": self.target_sample_rate,
|
||||
}
|
||||
self._logger.info(
|
||||
f"send_audio: {len(chunk)} bytes -> {len(resampled)} bytes (resampled) -> {len(audio_b64)} chars base64"
|
||||
)
|
||||
await self._ws.send_str(json.dumps(message))
|
||||
self._logger.info("send_audio: message sent successfully")
|
||||
except Exception as e:
|
||||
self._logger.error(f"send_audio: failed to send: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def receive_transcript(self) -> TranscriptResult | None:
|
||||
"""Receive next transcript. Returns None when done."""
|
||||
try:
|
||||
return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)
|
||||
except asyncio.TimeoutError:
|
||||
return TranscriptResult(
|
||||
text="", is_vad_end=False
|
||||
) # No transcript yet, but not done
|
||||
|
||||
async def close(self) -> str:
|
||||
"""Close the session and return final transcript."""
|
||||
self._logger.info("ElevenLabsStreamingTranscriber: closing session")
|
||||
self._closed = True
|
||||
if self._ws and not self._ws.closed:
|
||||
try:
|
||||
# Just close the WebSocket - ElevenLabs Scribe doesn't need a special end message
|
||||
self._logger.info(
|
||||
"ElevenLabsStreamingTranscriber: closing WebSocket connection"
|
||||
)
|
||||
await self._ws.close()
|
||||
except Exception as e:
|
||||
self._logger.debug(f"Error closing WebSocket: {e}")
|
||||
if self._receive_task and not self._receive_task.done():
|
||||
self._receive_task.cancel()
|
||||
try:
|
||||
await self._receive_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
if self._session and not self._session.closed:
|
||||
await self._session.close()
|
||||
return self._final_transcript
|
||||
|
||||
def reset_transcript(self) -> None:
|
||||
"""Reset accumulated transcript. Call after auto-send to start fresh."""
|
||||
self._final_transcript = ""
|
||||
|
||||
|
||||
class ElevenLabsStreamingSynthesizer(StreamingSynthesizerProtocol):
|
||||
"""Real-time streaming TTS using ElevenLabs WebSocket API.
|
||||
|
||||
Uses ElevenLabs' stream-input WebSocket which processes text as one
|
||||
continuous stream and returns audio in order.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
voice_id: str,
|
||||
model_id: str = "eleven_multilingual_v2",
|
||||
output_format: str = "mp3_44100_64",
|
||||
api_base: str | None = None,
|
||||
speed: float = 1.0,
|
||||
):
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
self._logger = setup_logger()
|
||||
self.api_key = api_key
|
||||
self.voice_id = voice_id
|
||||
self.model_id = model_id
|
||||
self.output_format = output_format
|
||||
self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE
|
||||
self.speed = speed
|
||||
self._ws: aiohttp.ClientWebSocketResponse | None = None
|
||||
self._session: aiohttp.ClientSession | None = None
|
||||
self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
||||
self._receive_task: asyncio.Task | None = None
|
||||
self._closed = False
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish WebSocket connection to ElevenLabs TTS."""
|
||||
self._logger.info("ElevenLabsStreamingSynthesizer: connecting")
|
||||
self._session = aiohttp.ClientSession()
|
||||
|
||||
# WebSocket URL for streaming input TTS with output format for streaming compatibility
|
||||
# Using mp3_44100_64 for good quality with smaller chunks for real-time playback
|
||||
ws_base = _http_to_ws_url(self.api_base.rstrip("/"))
|
||||
url = (
|
||||
f"{ws_base}/v1/text-to-speech/{self.voice_id}/stream-input"
|
||||
f"?model_id={self.model_id}&output_format={self.output_format}"
|
||||
)
|
||||
|
||||
self._ws = await self._session.ws_connect(
|
||||
url,
|
||||
headers={"xi-api-key": self.api_key},
|
||||
)
|
||||
|
||||
# Send initial configuration with generation settings optimized for streaming.
|
||||
# Note: API key is sent via header only (not in body to avoid log exposure).
|
||||
# See https://elevenlabs.io/docs/api-reference/text-to-speech/stream-input
|
||||
await self._ws.send_str(
|
||||
json.dumps(
|
||||
{
|
||||
"text": " ", # Initial space to start the stream
|
||||
"voice_settings": {
|
||||
"stability": DEFAULT_VOICE_STABILITY,
|
||||
"similarity_boost": DEFAULT_VOICE_SIMILARITY_BOOST,
|
||||
"speed": self.speed,
|
||||
},
|
||||
"generation_config": {
|
||||
"chunk_length_schedule": DEFAULT_CHUNK_LENGTH_SCHEDULE,
|
||||
},
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# Start receiving audio in background
|
||||
self._receive_task = asyncio.create_task(self._receive_loop())
|
||||
self._logger.info("ElevenLabsStreamingSynthesizer: connected")
|
||||
|
||||
async def _receive_loop(self) -> None:
|
||||
"""Background task to receive audio chunks from WebSocket.
|
||||
|
||||
Audio is returned in order as one continuous stream.
|
||||
"""
|
||||
if not self._ws:
|
||||
return
|
||||
|
||||
chunk_count = 0
|
||||
total_bytes = 0
|
||||
try:
|
||||
async for msg in self._ws:
|
||||
if self._closed:
|
||||
self._logger.info(
|
||||
"ElevenLabsStreamingSynthesizer: closed flag set, stopping "
|
||||
"receive loop"
|
||||
)
|
||||
break
|
||||
if msg.type == aiohttp.WSMsgType.TEXT:
|
||||
data = json.loads(msg.data)
|
||||
# Process audio if present
|
||||
if "audio" in data and data["audio"]:
|
||||
audio_bytes = base64.b64decode(data["audio"])
|
||||
chunk_count += 1
|
||||
total_bytes += len(audio_bytes)
|
||||
await self._audio_queue.put(audio_bytes)
|
||||
|
||||
# Check isFinal separately - a message can have both audio AND isFinal
|
||||
if "isFinal" in data:
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingSynthesizer: received isFinal={data['isFinal']}, "
|
||||
f"chunks so far: {chunk_count}, bytes: {total_bytes}"
|
||||
)
|
||||
if data.get("isFinal"):
|
||||
self._logger.info(
|
||||
"ElevenLabsStreamingSynthesizer: isFinal=true, signaling end of audio"
|
||||
)
|
||||
await self._audio_queue.put(None)
|
||||
|
||||
# Check for errors
|
||||
if "error" in data or data.get("type") == "error":
|
||||
self._logger.error(
|
||||
f"ElevenLabsStreamingSynthesizer: received error: {data}"
|
||||
)
|
||||
elif msg.type == aiohttp.WSMsgType.BINARY:
|
||||
chunk_count += 1
|
||||
total_bytes += len(msg.data)
|
||||
await self._audio_queue.put(msg.data)
|
||||
elif msg.type in (
|
||||
aiohttp.WSMsgType.CLOSE,
|
||||
aiohttp.WSMsgType.ERROR,
|
||||
):
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingSynthesizer: WebSocket closed/error, type={msg.type}"
|
||||
)
|
||||
break
|
||||
except Exception as e:
|
||||
self._logger.error(f"ElevenLabsStreamingSynthesizer receive error: {e}")
|
||||
finally:
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingSynthesizer: receive loop ended, {chunk_count} chunks, {total_bytes} bytes"
|
||||
)
|
||||
await self._audio_queue.put(None) # Signal end of stream
|
||||
|
||||
async def send_text(self, text: str) -> None:
|
||||
"""Send text to be synthesized.
|
||||
|
||||
ElevenLabs processes text as a continuous stream and returns
|
||||
audio in order. We let ElevenLabs handle buffering via chunk_length_schedule
|
||||
and only force generation when flush() is called at the end.
|
||||
|
||||
Args:
|
||||
text: Text to synthesize
|
||||
"""
|
||||
if self._ws and not self._closed and text.strip():
|
||||
self._logger.info(
|
||||
f"ElevenLabsStreamingSynthesizer: sending text ({len(text)} chars): '{text}'"
|
||||
)
|
||||
# Let ElevenLabs buffer and auto-generate based on chunk_length_schedule
|
||||
# Don't trigger generation here - wait for flush() at the end
|
||||
await self._ws.send_str(
|
||||
json.dumps(
|
||||
{
|
||||
"text": text + " ", # Space for natural speech flow
|
||||
}
|
||||
)
|
||||
)
|
||||
self._logger.info("ElevenLabsStreamingSynthesizer: text sent successfully")
|
||||
else:
|
||||
self._logger.warning(
|
||||
f"ElevenLabsStreamingSynthesizer: skipping send_text - "
|
||||
f"ws={self._ws is not None}, closed={self._closed}, text='{text[:30] if text else ''}'"
|
||||
)
|
||||
|
||||
async def receive_audio(self) -> bytes | None:
|
||||
"""Receive next audio chunk."""
|
||||
try:
|
||||
return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)
|
||||
except asyncio.TimeoutError:
|
||||
return b"" # No audio yet, but not done
|
||||
|
||||
async def flush(self) -> None:
|
||||
"""Signal end of text input. ElevenLabs will generate remaining audio and close."""
|
||||
if self._ws and not self._closed:
|
||||
# Send empty string to signal end of input
|
||||
# ElevenLabs will generate any remaining buffered text,
|
||||
# send all audio chunks, send isFinal, then close the connection
|
||||
self._logger.info(
|
||||
"ElevenLabsStreamingSynthesizer: sending end-of-input (empty string)"
|
||||
)
|
||||
await self._ws.send_str(json.dumps({"text": ""}))
|
||||
self._logger.info("ElevenLabsStreamingSynthesizer: end-of-input sent")
|
||||
else:
|
||||
self._logger.warning(
|
||||
f"ElevenLabsStreamingSynthesizer: skipping flush - "
|
||||
f"ws={self._ws is not None}, closed={self._closed}"
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the session."""
|
||||
self._closed = True
|
||||
if self._ws:
|
||||
await self._ws.close()
|
||||
if self._receive_task:
|
||||
self._receive_task.cancel()
|
||||
try:
|
||||
await self._receive_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
if self._session:
|
||||
await self._session.close()
|
||||
|
||||
|
||||
# Valid ElevenLabs model IDs
|
||||
ELEVENLABS_STT_MODELS = {"scribe_v1", "scribe_v2_realtime"}
|
||||
ELEVENLABS_TTS_MODELS = {
|
||||
"eleven_multilingual_v2",
|
||||
"eleven_turbo_v2_5",
|
||||
"eleven_monolingual_v1",
|
||||
"eleven_flash_v2_5",
|
||||
"eleven_flash_v2",
|
||||
}
|
||||
|
||||
|
||||
class ElevenLabsVoiceProvider(VoiceProviderInterface):
|
||||
"""ElevenLabs voice provider."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None,
|
||||
api_base: str | None = None,
|
||||
stt_model: str | None = None,
|
||||
tts_model: str | None = None,
|
||||
default_voice: str | None = None,
|
||||
):
|
||||
self.api_key = api_key
|
||||
self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE
|
||||
# Validate and default models - use valid ElevenLabs model IDs
|
||||
self.stt_model = (
|
||||
stt_model if stt_model in ELEVENLABS_STT_MODELS else "scribe_v1"
|
||||
)
|
||||
self.tts_model = (
|
||||
tts_model
|
||||
if tts_model in ELEVENLABS_TTS_MODELS
|
||||
else "eleven_multilingual_v2"
|
||||
)
|
||||
self.default_voice = default_voice
|
||||
|
||||
async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
|
||||
"""
|
||||
Transcribe audio using ElevenLabs Speech-to-Text API.
|
||||
|
||||
Args:
|
||||
audio_data: Raw audio bytes
|
||||
audio_format: Format of the audio (e.g., 'webm', 'mp3', 'wav')
|
||||
|
||||
Returns:
|
||||
Transcribed text
|
||||
"""
|
||||
if not self.api_key:
|
||||
raise ValueError("ElevenLabs API key required for transcription")
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
url = f"{self.api_base}/v1/speech-to-text"
|
||||
|
||||
# Map common formats to MIME types
|
||||
mime_types = {
|
||||
"webm": "audio/webm",
|
||||
"mp3": "audio/mpeg",
|
||||
"wav": "audio/wav",
|
||||
"ogg": "audio/ogg",
|
||||
"flac": "audio/flac",
|
||||
"m4a": "audio/mp4",
|
||||
}
|
||||
mime_type = mime_types.get(audio_format.lower(), f"audio/{audio_format}")
|
||||
|
||||
headers = {
|
||||
"xi-api-key": self.api_key,
|
||||
}
|
||||
|
||||
# ElevenLabs expects multipart form data
|
||||
form_data = aiohttp.FormData()
|
||||
form_data.add_field(
|
||||
"audio",
|
||||
audio_data,
|
||||
filename=f"audio.{audio_format}",
|
||||
content_type=mime_type,
|
||||
)
|
||||
# For batch STT, use scribe_v1 (not the realtime model)
|
||||
batch_model = (
|
||||
self.stt_model if self.stt_model in ("scribe_v1",) else "scribe_v1"
|
||||
)
|
||||
form_data.add_field("model_id", batch_model)
|
||||
|
||||
logger.info(
|
||||
f"ElevenLabs transcribe: sending {len(audio_data)} bytes, format={audio_format}"
|
||||
)
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, data=form_data) as response:
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
logger.error(f"ElevenLabs transcribe failed: {error_text}")
|
||||
raise RuntimeError(f"ElevenLabs transcription failed: {error_text}")
|
||||
|
||||
result = await response.json()
|
||||
text = result.get("text", "")
|
||||
logger.info(f"ElevenLabs transcribe: got result: {text[:50]}...")
|
||||
return text
|
||||
|
||||
async def synthesize_stream(
|
||||
self, text: str, voice: str | None = None, speed: float = 1.0
|
||||
) -> AsyncIterator[bytes]:
|
||||
"""
|
||||
Convert text to audio using ElevenLabs TTS with streaming.
|
||||
|
||||
Args:
|
||||
text: Text to convert to speech
|
||||
voice: Voice ID (defaults to provider's default voice or Rachel)
|
||||
speed: Playback speed multiplier
|
||||
|
||||
Yields:
|
||||
Audio data chunks (mp3 format)
|
||||
"""
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError("ElevenLabs API key required for TTS")
|
||||
|
||||
voice_id = voice or self.default_voice or "21m00Tcm4TlvDq8ikWAM" # Rachel
|
||||
|
||||
url = f"{self.api_base}/v1/text-to-speech/{voice_id}/stream"
|
||||
|
||||
logger.info(
|
||||
f"ElevenLabs TTS: starting synthesis, text='{text[:50]}...', "
|
||||
f"voice={voice_id}, model={self.tts_model}, speed={speed}"
|
||||
)
|
||||
|
||||
headers = {
|
||||
"xi-api-key": self.api_key,
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "audio/mpeg",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"text": text,
|
||||
"model_id": self.tts_model,
|
||||
"voice_settings": {
|
||||
"stability": DEFAULT_VOICE_STABILITY,
|
||||
"similarity_boost": DEFAULT_VOICE_SIMILARITY_BOOST,
|
||||
"speed": speed,
|
||||
},
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=payload) as response:
|
||||
logger.info(
|
||||
f"ElevenLabs TTS: got response status={response.status}, "
|
||||
f"content-type={response.headers.get('content-type')}"
|
||||
)
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
logger.error(f"ElevenLabs TTS failed: {error_text}")
|
||||
raise RuntimeError(f"ElevenLabs TTS failed: {error_text}")
|
||||
|
||||
# Use 8192 byte chunks for smoother streaming
|
||||
chunk_count = 0
|
||||
total_bytes = 0
|
||||
async for chunk in response.content.iter_chunked(8192):
|
||||
if chunk:
|
||||
chunk_count += 1
|
||||
total_bytes += len(chunk)
|
||||
yield chunk
|
||||
logger.info(
|
||||
f"ElevenLabs TTS: streaming complete, {chunk_count} chunks, "
|
||||
f"{total_bytes} total bytes"
|
||||
)
|
||||
|
||||
async def validate_credentials(self) -> None:
|
||||
"""Validate ElevenLabs API key by fetching user info."""
|
||||
if not self.api_key:
|
||||
raise ValueError("ElevenLabs API key required")
|
||||
|
||||
headers = {"xi-api-key": self.api_key}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"{self.api_base}/v1/user", headers=headers
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
raise RuntimeError(
|
||||
f"ElevenLabs credential validation failed: {error_text}"
|
||||
)
|
||||
|
||||
def get_available_voices(self) -> list[dict[str, str]]:
|
||||
"""Return common ElevenLabs voices."""
|
||||
return ELEVENLABS_VOICES.copy()
|
||||
|
||||
def get_available_stt_models(self) -> list[dict[str, str]]:
|
||||
return [
|
||||
{"id": "scribe_v2_realtime", "name": "Scribe v2 Realtime (Streaming)"},
|
||||
{"id": "scribe_v1", "name": "Scribe v1 (Batch)"},
|
||||
]
|
||||
|
||||
def get_available_tts_models(self) -> list[dict[str, str]]:
|
||||
return [
|
||||
{"id": "eleven_multilingual_v2", "name": "Multilingual v2"},
|
||||
{"id": "eleven_turbo_v2_5", "name": "Turbo v2.5"},
|
||||
{"id": "eleven_monolingual_v1", "name": "Monolingual v1"},
|
||||
]
|
||||
|
||||
def supports_streaming_stt(self) -> bool:
|
||||
"""ElevenLabs supports streaming via Scribe Realtime API."""
|
||||
return True
|
||||
|
||||
def supports_streaming_tts(self) -> bool:
|
||||
"""ElevenLabs supports real-time streaming TTS via WebSocket."""
|
||||
return True
|
||||
|
||||
async def create_streaming_transcriber(
|
||||
self, _audio_format: str = "webm"
|
||||
) -> ElevenLabsStreamingTranscriber:
|
||||
"""Create a streaming transcription session."""
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required for streaming transcription")
|
||||
# ElevenLabs realtime STT requires scribe_v2_realtime model.
|
||||
# Frontend sends PCM16 at DEFAULT_INPUT_SAMPLE_RATE (24kHz),
|
||||
# but ElevenLabs expects DEFAULT_TARGET_SAMPLE_RATE (16kHz).
|
||||
# The transcriber resamples automatically.
|
||||
transcriber = ElevenLabsStreamingTranscriber(
|
||||
api_key=self.api_key,
|
||||
model="scribe_v2_realtime",
|
||||
input_sample_rate=DEFAULT_INPUT_SAMPLE_RATE,
|
||||
target_sample_rate=DEFAULT_TARGET_SAMPLE_RATE,
|
||||
language_code="en",
|
||||
api_base=self.api_base,
|
||||
)
|
||||
await transcriber.connect()
|
||||
return transcriber
|
||||
|
||||
async def create_streaming_synthesizer(
|
||||
self, voice: str | None = None, speed: float = 1.0
|
||||
) -> ElevenLabsStreamingSynthesizer:
|
||||
"""Create a streaming TTS session."""
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required for streaming TTS")
|
||||
voice_id = voice or self.default_voice or "21m00Tcm4TlvDq8ikWAM"
|
||||
synthesizer = ElevenLabsStreamingSynthesizer(
|
||||
api_key=self.api_key,
|
||||
voice_id=voice_id,
|
||||
model_id=self.tts_model,
|
||||
output_format=DEFAULT_TTS_OUTPUT_FORMAT,
|
||||
api_base=self.api_base,
|
||||
speed=speed,
|
||||
)
|
||||
await synthesizer.connect()
|
||||
return synthesizer
|
||||
@@ -1,626 +0,0 @@
|
||||
"""OpenAI voice provider for STT and TTS.
|
||||
|
||||
OpenAI supports:
|
||||
- **STT**: Whisper (batch transcription via REST) and Realtime API (streaming
|
||||
transcription via WebSocket with server-side VAD). Audio is sent as base64-encoded
|
||||
PCM16 at 24kHz mono. The Realtime API returns transcript deltas and completed
|
||||
transcription events per VAD-detected utterance.
|
||||
- **TTS**: HTTP streaming endpoint that returns audio chunks progressively.
|
||||
Supported models: tts-1 (standard) and tts-1-hd (high quality).
|
||||
|
||||
See https://platform.openai.com/docs for API reference.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
from collections.abc import AsyncIterator
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import aiohttp
|
||||
|
||||
from onyx.voice.interface import StreamingSynthesizerProtocol
|
||||
from onyx.voice.interface import StreamingTranscriberProtocol
|
||||
from onyx.voice.interface import TranscriptResult
|
||||
from onyx.voice.interface import VoiceProviderInterface
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
# Default OpenAI API base URL
|
||||
DEFAULT_OPENAI_API_BASE = "https://api.openai.com"
|
||||
|
||||
|
||||
class OpenAIRealtimeMessageType(StrEnum):
|
||||
"""Message types from OpenAI Realtime transcription API."""
|
||||
|
||||
ERROR = "error"
|
||||
SPEECH_STARTED = "input_audio_buffer.speech_started"
|
||||
SPEECH_STOPPED = "input_audio_buffer.speech_stopped"
|
||||
BUFFER_COMMITTED = "input_audio_buffer.committed"
|
||||
TRANSCRIPTION_DELTA = "conversation.item.input_audio_transcription.delta"
|
||||
TRANSCRIPTION_COMPLETED = "conversation.item.input_audio_transcription.completed"
|
||||
SESSION_CREATED = "transcription_session.created"
|
||||
SESSION_UPDATED = "transcription_session.updated"
|
||||
ITEM_CREATED = "conversation.item.created"
|
||||
|
||||
|
||||
def _http_to_ws_url(http_url: str) -> str:
|
||||
"""Convert http(s) URL to ws(s) URL for WebSocket connections."""
|
||||
if http_url.startswith("https://"):
|
||||
return "wss://" + http_url[8:]
|
||||
elif http_url.startswith("http://"):
|
||||
return "ws://" + http_url[7:]
|
||||
return http_url
|
||||
|
||||
|
||||
class OpenAIStreamingTranscriber(StreamingTranscriberProtocol):
|
||||
"""Streaming transcription using OpenAI Realtime API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
model: str = "whisper-1",
|
||||
api_base: str | None = None,
|
||||
):
|
||||
# Import logger first
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
self._logger = setup_logger()
|
||||
|
||||
self._logger.info(
|
||||
f"OpenAIStreamingTranscriber: initializing with model {model}"
|
||||
)
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.api_base = api_base or DEFAULT_OPENAI_API_BASE
|
||||
self._ws: aiohttp.ClientWebSocketResponse | None = None
|
||||
self._session: aiohttp.ClientSession | None = None
|
||||
self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()
|
||||
self._current_turn_transcript = "" # Transcript for current VAD turn
|
||||
self._accumulated_transcript = "" # Accumulated across all turns
|
||||
self._receive_task: asyncio.Task | None = None
|
||||
self._closed = False
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish WebSocket connection to OpenAI Realtime API."""
|
||||
self._session = aiohttp.ClientSession()
|
||||
|
||||
# OpenAI Realtime transcription endpoint
|
||||
ws_base = _http_to_ws_url(self.api_base.rstrip("/"))
|
||||
url = f"{ws_base}/v1/realtime?intent=transcription"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"OpenAI-Beta": "realtime=v1",
|
||||
}
|
||||
|
||||
try:
|
||||
self._ws = await self._session.ws_connect(url, headers=headers)
|
||||
self._logger.info("Connected to OpenAI Realtime API")
|
||||
except Exception as e:
|
||||
self._logger.error(f"Failed to connect to OpenAI Realtime API: {e}")
|
||||
raise
|
||||
|
||||
# Configure the session for transcription
|
||||
# Enable server-side VAD (Voice Activity Detection) for automatic speech detection
|
||||
config_message = {
|
||||
"type": "transcription_session.update",
|
||||
"session": {
|
||||
"input_audio_format": "pcm16", # 16-bit PCM at 24kHz mono
|
||||
"input_audio_transcription": {
|
||||
"model": self.model,
|
||||
},
|
||||
"turn_detection": {
|
||||
"type": "server_vad",
|
||||
"threshold": 0.5,
|
||||
"prefix_padding_ms": 300,
|
||||
"silence_duration_ms": 500,
|
||||
},
|
||||
},
|
||||
}
|
||||
await self._ws.send_str(json.dumps(config_message))
|
||||
self._logger.info(f"Sent config for model: {self.model} with server VAD")
|
||||
|
||||
# Start receiving transcripts
|
||||
self._receive_task = asyncio.create_task(self._receive_loop())
|
||||
|
||||
async def _receive_loop(self) -> None:
|
||||
"""Background task to receive transcripts."""
|
||||
if not self._ws:
|
||||
return
|
||||
|
||||
try:
|
||||
async for msg in self._ws:
|
||||
if msg.type == aiohttp.WSMsgType.TEXT:
|
||||
data = json.loads(msg.data)
|
||||
msg_type = data.get("type", "")
|
||||
self._logger.debug(f"Received message type: {msg_type}")
|
||||
|
||||
# Handle errors
|
||||
if msg_type == OpenAIRealtimeMessageType.ERROR:
|
||||
error = data.get("error", {})
|
||||
self._logger.error(f"OpenAI error: {error}")
|
||||
continue
|
||||
|
||||
# Handle VAD events
|
||||
if msg_type == OpenAIRealtimeMessageType.SPEECH_STARTED:
|
||||
self._logger.info("OpenAI: Speech started")
|
||||
# Reset current turn transcript for new speech
|
||||
self._current_turn_transcript = ""
|
||||
continue
|
||||
elif msg_type == OpenAIRealtimeMessageType.SPEECH_STOPPED:
|
||||
self._logger.info(
|
||||
"OpenAI: Speech stopped (VAD detected silence)"
|
||||
)
|
||||
continue
|
||||
elif msg_type == OpenAIRealtimeMessageType.BUFFER_COMMITTED:
|
||||
self._logger.info("OpenAI: Audio buffer committed")
|
||||
continue
|
||||
|
||||
# Handle transcription events
|
||||
if msg_type == OpenAIRealtimeMessageType.TRANSCRIPTION_DELTA:
|
||||
delta = data.get("delta", "")
|
||||
if delta:
|
||||
self._logger.info(f"OpenAI: Transcription delta: {delta}")
|
||||
self._current_turn_transcript += delta
|
||||
# Show accumulated + current turn transcript
|
||||
full_transcript = self._accumulated_transcript
|
||||
if full_transcript and self._current_turn_transcript:
|
||||
full_transcript += " "
|
||||
full_transcript += self._current_turn_transcript
|
||||
await self._transcript_queue.put(
|
||||
TranscriptResult(text=full_transcript, is_vad_end=False)
|
||||
)
|
||||
elif msg_type == OpenAIRealtimeMessageType.TRANSCRIPTION_COMPLETED:
|
||||
transcript = data.get("transcript", "")
|
||||
if transcript:
|
||||
self._logger.info(
|
||||
f"OpenAI: Transcription completed (VAD turn end): {transcript[:50]}..."
|
||||
)
|
||||
# This is the final transcript for this VAD turn
|
||||
self._current_turn_transcript = transcript
|
||||
# Accumulate this turn's transcript
|
||||
if self._accumulated_transcript:
|
||||
self._accumulated_transcript += " " + transcript
|
||||
else:
|
||||
self._accumulated_transcript = transcript
|
||||
# Send with is_vad_end=True to trigger auto-send
|
||||
await self._transcript_queue.put(
|
||||
TranscriptResult(
|
||||
text=self._accumulated_transcript,
|
||||
is_vad_end=True,
|
||||
)
|
||||
)
|
||||
elif msg_type not in (
|
||||
OpenAIRealtimeMessageType.SESSION_CREATED,
|
||||
OpenAIRealtimeMessageType.SESSION_UPDATED,
|
||||
OpenAIRealtimeMessageType.ITEM_CREATED,
|
||||
):
|
||||
# Log any other message types we might be missing
|
||||
self._logger.info(
|
||||
f"OpenAI: Unhandled message type '{msg_type}': {data}"
|
||||
)
|
||||
|
||||
elif msg.type == aiohttp.WSMsgType.ERROR:
|
||||
self._logger.error(f"WebSocket error: {self._ws.exception()}")
|
||||
break
|
||||
elif msg.type == aiohttp.WSMsgType.CLOSED:
|
||||
self._logger.info("WebSocket closed by server")
|
||||
break
|
||||
except Exception as e:
|
||||
self._logger.error(f"Error in receive loop: {e}")
|
||||
finally:
|
||||
await self._transcript_queue.put(None)
|
||||
|
||||
async def send_audio(self, chunk: bytes) -> None:
|
||||
"""Send audio chunk to OpenAI."""
|
||||
if self._ws and not self._closed:
|
||||
# OpenAI expects base64-encoded PCM16 audio at 24kHz mono
|
||||
# PCM16 at 24kHz: 24000 samples/sec * 2 bytes/sample = 48000 bytes/sec
|
||||
# So chunk_bytes / 48000 = duration in seconds
|
||||
duration_ms = (len(chunk) / 48000) * 1000
|
||||
self._logger.debug(
|
||||
f"Sending {len(chunk)} bytes ({duration_ms:.1f}ms) of audio to OpenAI. "
|
||||
f"First 10 bytes: {chunk[:10].hex() if len(chunk) >= 10 else chunk.hex()}"
|
||||
)
|
||||
message = {
|
||||
"type": "input_audio_buffer.append",
|
||||
"audio": base64.b64encode(chunk).decode("utf-8"),
|
||||
}
|
||||
await self._ws.send_str(json.dumps(message))
|
||||
|
||||
def reset_transcript(self) -> None:
|
||||
"""Reset accumulated transcript. Call after auto-send to start fresh."""
|
||||
self._logger.info("OpenAI: Resetting accumulated transcript")
|
||||
self._accumulated_transcript = ""
|
||||
self._current_turn_transcript = ""
|
||||
|
||||
async def receive_transcript(self) -> TranscriptResult | None:
|
||||
"""Receive next transcript."""
|
||||
try:
|
||||
return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)
|
||||
except asyncio.TimeoutError:
|
||||
return TranscriptResult(text="", is_vad_end=False)
|
||||
|
||||
async def close(self) -> str:
|
||||
"""Close session and return final transcript."""
|
||||
self._closed = True
|
||||
if self._ws:
|
||||
# With server VAD, the buffer is auto-committed when speech stops.
|
||||
# But we should still commit any remaining audio and wait for transcription.
|
||||
try:
|
||||
await self._ws.send_str(
|
||||
json.dumps({"type": "input_audio_buffer.commit"})
|
||||
)
|
||||
except Exception as e:
|
||||
self._logger.debug(f"Error sending commit (may be expected): {e}")
|
||||
|
||||
# Wait for *new* transcription to arrive (up to 5 seconds)
|
||||
self._logger.info("Waiting for transcription to complete...")
|
||||
transcript_before_commit = self._accumulated_transcript
|
||||
for _ in range(50): # 50 * 100ms = 5 seconds max
|
||||
await asyncio.sleep(0.1)
|
||||
if self._accumulated_transcript != transcript_before_commit:
|
||||
self._logger.info(
|
||||
f"Got final transcript: {self._accumulated_transcript[:50]}..."
|
||||
)
|
||||
break
|
||||
else:
|
||||
self._logger.warning("Timed out waiting for transcription")
|
||||
|
||||
await self._ws.close()
|
||||
if self._receive_task:
|
||||
self._receive_task.cancel()
|
||||
try:
|
||||
await self._receive_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
if self._session:
|
||||
await self._session.close()
|
||||
return self._accumulated_transcript
|
||||
|
||||
|
||||
# OpenAI available voices for TTS
|
||||
OPENAI_VOICES = [
|
||||
{"id": "alloy", "name": "Alloy"},
|
||||
{"id": "echo", "name": "Echo"},
|
||||
{"id": "fable", "name": "Fable"},
|
||||
{"id": "onyx", "name": "Onyx"},
|
||||
{"id": "nova", "name": "Nova"},
|
||||
{"id": "shimmer", "name": "Shimmer"},
|
||||
]
|
||||
|
||||
# OpenAI available STT models (all support streaming via Realtime API)
|
||||
OPENAI_STT_MODELS = [
|
||||
{"id": "whisper-1", "name": "Whisper v1"},
|
||||
{"id": "gpt-4o-transcribe", "name": "GPT-4o Transcribe"},
|
||||
{"id": "gpt-4o-mini-transcribe", "name": "GPT-4o Mini Transcribe"},
|
||||
]
|
||||
|
||||
# OpenAI available TTS models
|
||||
OPENAI_TTS_MODELS = [
|
||||
{"id": "tts-1", "name": "TTS-1 (Standard)"},
|
||||
{"id": "tts-1-hd", "name": "TTS-1 HD (High Quality)"},
|
||||
]
|
||||
|
||||
|
||||
def _create_wav_header(
|
||||
data_length: int,
|
||||
sample_rate: int = 24000,
|
||||
channels: int = 1,
|
||||
bits_per_sample: int = 16,
|
||||
) -> bytes:
|
||||
"""Create a WAV file header for PCM audio data."""
|
||||
import struct
|
||||
|
||||
byte_rate = sample_rate * channels * bits_per_sample // 8
|
||||
block_align = channels * bits_per_sample // 8
|
||||
|
||||
# WAV header is 44 bytes
|
||||
header = struct.pack(
|
||||
"<4sI4s4sIHHIIHH4sI",
|
||||
b"RIFF", # ChunkID
|
||||
36 + data_length, # ChunkSize
|
||||
b"WAVE", # Format
|
||||
b"fmt ", # Subchunk1ID
|
||||
16, # Subchunk1Size (PCM)
|
||||
1, # AudioFormat (1 = PCM)
|
||||
channels, # NumChannels
|
||||
sample_rate, # SampleRate
|
||||
byte_rate, # ByteRate
|
||||
block_align, # BlockAlign
|
||||
bits_per_sample, # BitsPerSample
|
||||
b"data", # Subchunk2ID
|
||||
data_length, # Subchunk2Size
|
||||
)
|
||||
return header
|
||||
|
||||
|
||||
class OpenAIStreamingSynthesizer(StreamingSynthesizerProtocol):
|
||||
"""Streaming TTS using OpenAI HTTP TTS API with streaming responses."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
voice: str = "alloy",
|
||||
model: str = "tts-1",
|
||||
speed: float = 1.0,
|
||||
api_base: str | None = None,
|
||||
):
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
self._logger = setup_logger()
|
||||
self.api_key = api_key
|
||||
self.voice = voice
|
||||
self.model = model
|
||||
self.speed = max(0.25, min(4.0, speed))
|
||||
self.api_base = api_base or DEFAULT_OPENAI_API_BASE
|
||||
self._session: aiohttp.ClientSession | None = None
|
||||
self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
||||
self._text_queue: asyncio.Queue[str | None] = asyncio.Queue()
|
||||
self._synthesis_task: asyncio.Task | None = None
|
||||
self._closed = False
|
||||
self._flushed = False
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Initialize HTTP session for TTS requests."""
|
||||
self._logger.info("OpenAIStreamingSynthesizer: connecting")
|
||||
self._session = aiohttp.ClientSession()
|
||||
# Start background task to process text queue
|
||||
self._synthesis_task = asyncio.create_task(self._process_text_queue())
|
||||
self._logger.info("OpenAIStreamingSynthesizer: connected")
|
||||
|
||||
async def _process_text_queue(self) -> None:
|
||||
"""Background task to process queued text for synthesis."""
|
||||
while not self._closed:
|
||||
try:
|
||||
text = await asyncio.wait_for(self._text_queue.get(), timeout=0.1)
|
||||
if text is None:
|
||||
break
|
||||
await self._synthesize_text(text)
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
self._logger.error(f"Error processing text queue: {e}")
|
||||
|
||||
async def _synthesize_text(self, text: str) -> None:
|
||||
"""Make HTTP TTS request and stream audio to queue."""
|
||||
if not self._session or self._closed:
|
||||
return
|
||||
|
||||
url = f"{self.api_base.rstrip('/')}/v1/audio/speech"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"voice": self.voice,
|
||||
"input": text,
|
||||
"speed": self.speed,
|
||||
"response_format": "mp3",
|
||||
}
|
||||
|
||||
try:
|
||||
async with self._session.post(
|
||||
url, headers=headers, json=payload
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
self._logger.error(f"OpenAI TTS error: {error_text}")
|
||||
return
|
||||
|
||||
# Use 8192 byte chunks for smoother streaming
|
||||
# (larger chunks = more complete MP3 frames, better playback)
|
||||
async for chunk in response.content.iter_chunked(8192):
|
||||
if self._closed:
|
||||
break
|
||||
if chunk:
|
||||
await self._audio_queue.put(chunk)
|
||||
except Exception as e:
|
||||
self._logger.error(f"OpenAIStreamingSynthesizer synthesis error: {e}")
|
||||
|
||||
async def send_text(self, text: str) -> None:
|
||||
"""Queue text to be synthesized via HTTP streaming."""
|
||||
if not text.strip() or self._closed:
|
||||
return
|
||||
await self._text_queue.put(text)
|
||||
|
||||
async def receive_audio(self) -> bytes | None:
|
||||
"""Receive next audio chunk (MP3 format)."""
|
||||
try:
|
||||
return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)
|
||||
except asyncio.TimeoutError:
|
||||
return b"" # No audio yet, but not done
|
||||
|
||||
async def flush(self) -> None:
|
||||
"""Signal end of text input - wait for synthesis to complete."""
|
||||
if self._flushed:
|
||||
return
|
||||
self._flushed = True
|
||||
|
||||
# Signal end of text input
|
||||
await self._text_queue.put(None)
|
||||
|
||||
# Wait for synthesis task to complete processing all text
|
||||
if self._synthesis_task and not self._synthesis_task.done():
|
||||
try:
|
||||
await asyncio.wait_for(self._synthesis_task, timeout=60.0)
|
||||
except asyncio.TimeoutError:
|
||||
self._logger.warning("OpenAIStreamingSynthesizer: flush timeout")
|
||||
self._synthesis_task.cancel()
|
||||
try:
|
||||
await self._synthesis_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Signal end of audio stream
|
||||
await self._audio_queue.put(None)
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the session."""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
# Signal end of queues only if flush wasn't already called
|
||||
if not self._flushed:
|
||||
await self._text_queue.put(None)
|
||||
await self._audio_queue.put(None)
|
||||
|
||||
if self._synthesis_task and not self._synthesis_task.done():
|
||||
self._synthesis_task.cancel()
|
||||
try:
|
||||
await self._synthesis_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
if self._session:
|
||||
await self._session.close()
|
||||
|
||||
|
||||
class OpenAIVoiceProvider(VoiceProviderInterface):
|
||||
"""OpenAI voice provider using Whisper for STT and TTS API for speech synthesis."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None,
|
||||
api_base: str | None = None,
|
||||
stt_model: str | None = None,
|
||||
tts_model: str | None = None,
|
||||
default_voice: str | None = None,
|
||||
):
|
||||
self.api_key = api_key
|
||||
self.api_base = api_base
|
||||
self.stt_model = stt_model or "whisper-1"
|
||||
self.tts_model = tts_model or "tts-1"
|
||||
self.default_voice = default_voice or "alloy"
|
||||
|
||||
self._client: "AsyncOpenAI | None" = None
|
||||
|
||||
def _get_client(self) -> "AsyncOpenAI":
|
||||
if self._client is None:
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
self._client = AsyncOpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.api_base,
|
||||
)
|
||||
return self._client
|
||||
|
||||
async def transcribe(self, audio_data: bytes, audio_format: str) -> str:
|
||||
"""
|
||||
Transcribe audio using OpenAI Whisper.
|
||||
|
||||
Args:
|
||||
audio_data: Raw audio bytes
|
||||
audio_format: Audio format (e.g., "webm", "wav", "mp3")
|
||||
|
||||
Returns:
|
||||
Transcribed text
|
||||
"""
|
||||
client = self._get_client()
|
||||
|
||||
# Create a file-like object from the audio bytes
|
||||
audio_file = io.BytesIO(audio_data)
|
||||
audio_file.name = f"audio.{audio_format}"
|
||||
|
||||
response = await client.audio.transcriptions.create(
|
||||
model=self.stt_model,
|
||||
file=audio_file,
|
||||
)
|
||||
|
||||
return response.text
|
||||
|
||||
async def synthesize_stream(
|
||||
self, text: str, voice: str | None = None, speed: float = 1.0
|
||||
) -> AsyncIterator[bytes]:
|
||||
"""
|
||||
Convert text to audio using OpenAI TTS with streaming.
|
||||
|
||||
Args:
|
||||
text: Text to convert to speech
|
||||
voice: Voice identifier (defaults to provider's default voice)
|
||||
speed: Playback speed multiplier (0.25 to 4.0)
|
||||
|
||||
Yields:
|
||||
Audio data chunks (mp3 format)
|
||||
"""
|
||||
client = self._get_client()
|
||||
|
||||
# Clamp speed to valid range
|
||||
speed = max(0.25, min(4.0, speed))
|
||||
|
||||
# Use with_streaming_response for proper async streaming
|
||||
# Using 8192 byte chunks for better streaming performance
|
||||
# (larger chunks = fewer round-trips, more complete MP3 frames)
|
||||
async with client.audio.speech.with_streaming_response.create(
|
||||
model=self.tts_model,
|
||||
voice=voice or self.default_voice,
|
||||
input=text,
|
||||
speed=speed,
|
||||
response_format="mp3",
|
||||
) as response:
|
||||
async for chunk in response.iter_bytes(chunk_size=8192):
|
||||
yield chunk
|
||||
|
||||
async def validate_credentials(self) -> None:
|
||||
"""Validate OpenAI API key by listing models."""
|
||||
client = self._get_client()
|
||||
await client.models.list()
|
||||
|
||||
def get_available_voices(self) -> list[dict[str, str]]:
|
||||
"""Get available OpenAI TTS voices."""
|
||||
return OPENAI_VOICES.copy()
|
||||
|
||||
def get_available_stt_models(self) -> list[dict[str, str]]:
|
||||
"""Get available OpenAI STT models."""
|
||||
return OPENAI_STT_MODELS.copy()
|
||||
|
||||
def get_available_tts_models(self) -> list[dict[str, str]]:
|
||||
"""Get available OpenAI TTS models."""
|
||||
return OPENAI_TTS_MODELS.copy()
|
||||
|
||||
def supports_streaming_stt(self) -> bool:
|
||||
"""OpenAI supports streaming via Realtime API for all STT models."""
|
||||
return True
|
||||
|
||||
def supports_streaming_tts(self) -> bool:
|
||||
"""OpenAI supports real-time streaming TTS via Realtime API."""
|
||||
return True
|
||||
|
||||
async def create_streaming_transcriber(
|
||||
self, _audio_format: str = "webm"
|
||||
) -> OpenAIStreamingTranscriber:
|
||||
"""Create a streaming transcription session using Realtime API."""
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required for streaming transcription")
|
||||
transcriber = OpenAIStreamingTranscriber(
|
||||
api_key=self.api_key,
|
||||
model=self.stt_model,
|
||||
api_base=self.api_base,
|
||||
)
|
||||
await transcriber.connect()
|
||||
return transcriber
|
||||
|
||||
async def create_streaming_synthesizer(
|
||||
self, voice: str | None = None, speed: float = 1.0
|
||||
) -> OpenAIStreamingSynthesizer:
|
||||
"""Create a streaming TTS session using HTTP streaming API."""
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required for streaming TTS")
|
||||
synthesizer = OpenAIStreamingSynthesizer(
|
||||
api_key=self.api_key,
|
||||
voice=voice or self.default_voice or "alloy",
|
||||
model=self.tts_model or "tts-1",
|
||||
speed=speed,
|
||||
api_base=self.api_base,
|
||||
)
|
||||
await synthesizer.connect()
|
||||
return synthesizer
|
||||
@@ -67,8 +67,6 @@ attrs==25.4.0
|
||||
# zeep
|
||||
authlib==1.6.7
|
||||
# via fastmcp
|
||||
azure-cognitiveservices-speech==1.38.0
|
||||
# via onyx
|
||||
babel==2.17.0
|
||||
# via courlan
|
||||
backoff==2.2.1
|
||||
@@ -1022,7 +1020,7 @@ toolz==1.1.0
|
||||
# dask
|
||||
# distributed
|
||||
# partd
|
||||
tornado==6.5.2
|
||||
tornado==6.5.5
|
||||
# via distributed
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
|
||||
@@ -263,7 +263,7 @@ oauthlib==3.2.2
|
||||
# via
|
||||
# kubernetes
|
||||
# requests-oauthlib
|
||||
onyx-devtools==0.6.3
|
||||
onyx-devtools==0.7.0
|
||||
# via onyx
|
||||
openai==2.14.0
|
||||
# via
|
||||
@@ -466,7 +466,7 @@ tokenizers==0.21.4
|
||||
# via
|
||||
# cohere
|
||||
# litellm
|
||||
tornado==6.5.2
|
||||
tornado==6.5.5
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
|
||||
@@ -1,507 +0,0 @@
|
||||
"""Unit tests for onyx.db.voice module."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.db.models import VoiceProvider
|
||||
from onyx.db.voice import deactivate_stt_provider
|
||||
from onyx.db.voice import deactivate_tts_provider
|
||||
from onyx.db.voice import delete_voice_provider
|
||||
from onyx.db.voice import fetch_default_stt_provider
|
||||
from onyx.db.voice import fetch_default_tts_provider
|
||||
from onyx.db.voice import fetch_voice_provider_by_id
|
||||
from onyx.db.voice import fetch_voice_provider_by_type
|
||||
from onyx.db.voice import fetch_voice_providers
|
||||
from onyx.db.voice import MAX_VOICE_PLAYBACK_SPEED
|
||||
from onyx.db.voice import MIN_VOICE_PLAYBACK_SPEED
|
||||
from onyx.db.voice import set_default_stt_provider
|
||||
from onyx.db.voice import set_default_tts_provider
|
||||
from onyx.db.voice import update_user_voice_settings
|
||||
from onyx.db.voice import upsert_voice_provider
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
|
||||
|
||||
def _make_voice_provider(
|
||||
id: int = 1,
|
||||
name: str = "Test Provider",
|
||||
provider_type: str = "openai",
|
||||
is_default_stt: bool = False,
|
||||
is_default_tts: bool = False,
|
||||
) -> VoiceProvider:
|
||||
"""Create a VoiceProvider instance for testing."""
|
||||
provider = VoiceProvider()
|
||||
provider.id = id
|
||||
provider.name = name
|
||||
provider.provider_type = provider_type
|
||||
provider.is_default_stt = is_default_stt
|
||||
provider.is_default_tts = is_default_tts
|
||||
provider.api_key = None
|
||||
provider.api_base = None
|
||||
provider.custom_config = None
|
||||
provider.stt_model = None
|
||||
provider.tts_model = None
|
||||
provider.default_voice = None
|
||||
return provider
|
||||
|
||||
|
||||
class TestFetchVoiceProviders:
|
||||
"""Tests for fetch_voice_providers."""
|
||||
|
||||
def test_returns_all_providers(self, mock_db_session: MagicMock) -> None:
|
||||
providers = [
|
||||
_make_voice_provider(id=1, name="Provider A"),
|
||||
_make_voice_provider(id=2, name="Provider B"),
|
||||
]
|
||||
mock_db_session.scalars.return_value.all.return_value = providers
|
||||
|
||||
result = fetch_voice_providers(mock_db_session)
|
||||
|
||||
assert result == providers
|
||||
mock_db_session.scalars.assert_called_once()
|
||||
|
||||
def test_returns_empty_list_when_no_providers(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalars.return_value.all.return_value = []
|
||||
|
||||
result = fetch_voice_providers(mock_db_session)
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
class TestFetchVoiceProviderById:
|
||||
"""Tests for fetch_voice_provider_by_id."""
|
||||
|
||||
def test_returns_provider_when_found(self, mock_db_session: MagicMock) -> None:
|
||||
provider = _make_voice_provider(id=1)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
|
||||
result = fetch_voice_provider_by_id(mock_db_session, 1)
|
||||
|
||||
assert result is provider
|
||||
mock_db_session.scalar.assert_called_once()
|
||||
|
||||
def test_returns_none_when_not_found(self, mock_db_session: MagicMock) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
result = fetch_voice_provider_by_id(mock_db_session, 999)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestFetchDefaultProviders:
|
||||
"""Tests for fetch_default_stt_provider and fetch_default_tts_provider."""
|
||||
|
||||
def test_fetch_default_stt_provider_returns_provider(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
provider = _make_voice_provider(id=1, is_default_stt=True)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
|
||||
result = fetch_default_stt_provider(mock_db_session)
|
||||
|
||||
assert result is provider
|
||||
|
||||
def test_fetch_default_stt_provider_returns_none_when_no_default(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
result = fetch_default_stt_provider(mock_db_session)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_fetch_default_tts_provider_returns_provider(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
provider = _make_voice_provider(id=1, is_default_tts=True)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
|
||||
result = fetch_default_tts_provider(mock_db_session)
|
||||
|
||||
assert result is provider
|
||||
|
||||
def test_fetch_default_tts_provider_returns_none_when_no_default(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
result = fetch_default_tts_provider(mock_db_session)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestFetchVoiceProviderByType:
|
||||
"""Tests for fetch_voice_provider_by_type."""
|
||||
|
||||
def test_returns_provider_when_found(self, mock_db_session: MagicMock) -> None:
|
||||
provider = _make_voice_provider(id=1, provider_type="openai")
|
||||
mock_db_session.scalar.return_value = provider
|
||||
|
||||
result = fetch_voice_provider_by_type(mock_db_session, "openai")
|
||||
|
||||
assert result is provider
|
||||
|
||||
def test_returns_none_when_not_found(self, mock_db_session: MagicMock) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
result = fetch_voice_provider_by_type(mock_db_session, "nonexistent")
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestUpsertVoiceProvider:
|
||||
"""Tests for upsert_voice_provider."""
|
||||
|
||||
def test_creates_new_provider_when_no_id(self, mock_db_session: MagicMock) -> None:
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
upsert_voice_provider(
|
||||
db_session=mock_db_session,
|
||||
provider_id=None,
|
||||
name="New Provider",
|
||||
provider_type="openai",
|
||||
api_key="test-key",
|
||||
api_key_changed=True,
|
||||
)
|
||||
|
||||
mock_db_session.add.assert_called_once()
|
||||
mock_db_session.flush.assert_called()
|
||||
added_obj = mock_db_session.add.call_args[0][0]
|
||||
assert added_obj.name == "New Provider"
|
||||
assert added_obj.provider_type == "openai"
|
||||
|
||||
def test_updates_existing_provider(self, mock_db_session: MagicMock) -> None:
|
||||
existing_provider = _make_voice_provider(id=1, name="Old Name")
|
||||
mock_db_session.scalar.return_value = existing_provider
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
upsert_voice_provider(
|
||||
db_session=mock_db_session,
|
||||
provider_id=1,
|
||||
name="Updated Name",
|
||||
provider_type="elevenlabs",
|
||||
api_key="new-key",
|
||||
api_key_changed=True,
|
||||
)
|
||||
|
||||
mock_db_session.add.assert_not_called()
|
||||
assert existing_provider.name == "Updated Name"
|
||||
assert existing_provider.provider_type == "elevenlabs"
|
||||
|
||||
def test_raises_when_provider_not_found(self, mock_db_session: MagicMock) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
upsert_voice_provider(
|
||||
db_session=mock_db_session,
|
||||
provider_id=999,
|
||||
name="Test",
|
||||
provider_type="openai",
|
||||
api_key=None,
|
||||
api_key_changed=False,
|
||||
)
|
||||
|
||||
assert "No voice provider with id 999" in str(exc_info.value)
|
||||
|
||||
def test_does_not_update_api_key_when_not_changed(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
existing_provider = _make_voice_provider(id=1)
|
||||
existing_provider.api_key = "original-key" # type: ignore[assignment]
|
||||
original_api_key = existing_provider.api_key
|
||||
mock_db_session.scalar.return_value = existing_provider
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
upsert_voice_provider(
|
||||
db_session=mock_db_session,
|
||||
provider_id=1,
|
||||
name="Test",
|
||||
provider_type="openai",
|
||||
api_key="new-key",
|
||||
api_key_changed=False,
|
||||
)
|
||||
|
||||
# api_key should remain unchanged (same object reference)
|
||||
assert existing_provider.api_key is original_api_key
|
||||
|
||||
def test_activates_stt_when_requested(self, mock_db_session: MagicMock) -> None:
|
||||
existing_provider = _make_voice_provider(id=1)
|
||||
mock_db_session.scalar.return_value = existing_provider
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
mock_db_session.execute.return_value = None
|
||||
|
||||
upsert_voice_provider(
|
||||
db_session=mock_db_session,
|
||||
provider_id=1,
|
||||
name="Test",
|
||||
provider_type="openai",
|
||||
api_key=None,
|
||||
api_key_changed=False,
|
||||
activate_stt=True,
|
||||
)
|
||||
|
||||
assert existing_provider.is_default_stt is True
|
||||
|
||||
def test_activates_tts_when_requested(self, mock_db_session: MagicMock) -> None:
|
||||
existing_provider = _make_voice_provider(id=1)
|
||||
mock_db_session.scalar.return_value = existing_provider
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
mock_db_session.execute.return_value = None
|
||||
|
||||
upsert_voice_provider(
|
||||
db_session=mock_db_session,
|
||||
provider_id=1,
|
||||
name="Test",
|
||||
provider_type="openai",
|
||||
api_key=None,
|
||||
api_key_changed=False,
|
||||
activate_tts=True,
|
||||
)
|
||||
|
||||
assert existing_provider.is_default_tts is True
|
||||
|
||||
|
||||
class TestDeleteVoiceProvider:
|
||||
"""Tests for delete_voice_provider."""
|
||||
|
||||
def test_soft_deletes_provider_when_found(self, mock_db_session: MagicMock) -> None:
|
||||
provider = _make_voice_provider(id=1)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
|
||||
delete_voice_provider(mock_db_session, 1)
|
||||
|
||||
assert provider.deleted is True
|
||||
mock_db_session.flush.assert_called_once()
|
||||
|
||||
def test_does_nothing_when_provider_not_found(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
delete_voice_provider(mock_db_session, 999)
|
||||
|
||||
mock_db_session.flush.assert_not_called()
|
||||
|
||||
|
||||
class TestSetDefaultProviders:
|
||||
"""Tests for set_default_stt_provider and set_default_tts_provider."""
|
||||
|
||||
def test_set_default_stt_provider_deactivates_others(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
provider = _make_voice_provider(id=1)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
mock_db_session.execute.return_value = None
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
result = set_default_stt_provider(db_session=mock_db_session, provider_id=1)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
assert result.is_default_stt is True
|
||||
|
||||
def test_set_default_stt_provider_raises_when_not_found(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
set_default_stt_provider(db_session=mock_db_session, provider_id=999)
|
||||
|
||||
assert "No voice provider with id 999" in str(exc_info.value)
|
||||
|
||||
def test_set_default_tts_provider_deactivates_others(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
provider = _make_voice_provider(id=1)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
mock_db_session.execute.return_value = None
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
result = set_default_tts_provider(db_session=mock_db_session, provider_id=1)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
assert result.is_default_tts is True
|
||||
|
||||
def test_set_default_tts_provider_updates_model_when_provided(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
provider = _make_voice_provider(id=1)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
mock_db_session.execute.return_value = None
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
result = set_default_tts_provider(
|
||||
db_session=mock_db_session, provider_id=1, tts_model="tts-1-hd"
|
||||
)
|
||||
|
||||
assert result.tts_model == "tts-1-hd"
|
||||
|
||||
def test_set_default_tts_provider_raises_when_not_found(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
set_default_tts_provider(db_session=mock_db_session, provider_id=999)
|
||||
|
||||
assert "No voice provider with id 999" in str(exc_info.value)
|
||||
|
||||
|
||||
class TestDeactivateProviders:
|
||||
"""Tests for deactivate_stt_provider and deactivate_tts_provider."""
|
||||
|
||||
def test_deactivate_stt_provider_sets_false(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
provider = _make_voice_provider(id=1, is_default_stt=True)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
result = deactivate_stt_provider(db_session=mock_db_session, provider_id=1)
|
||||
|
||||
assert result.is_default_stt is False
|
||||
|
||||
def test_deactivate_stt_provider_raises_when_not_found(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
deactivate_stt_provider(db_session=mock_db_session, provider_id=999)
|
||||
|
||||
assert "No voice provider with id 999" in str(exc_info.value)
|
||||
|
||||
def test_deactivate_tts_provider_sets_false(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
provider = _make_voice_provider(id=1, is_default_tts=True)
|
||||
mock_db_session.scalar.return_value = provider
|
||||
mock_db_session.flush.return_value = None
|
||||
mock_db_session.refresh.return_value = None
|
||||
|
||||
result = deactivate_tts_provider(db_session=mock_db_session, provider_id=1)
|
||||
|
||||
assert result.is_default_tts is False
|
||||
|
||||
def test_deactivate_tts_provider_raises_when_not_found(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
mock_db_session.scalar.return_value = None
|
||||
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
deactivate_tts_provider(db_session=mock_db_session, provider_id=999)
|
||||
|
||||
assert "No voice provider with id 999" in str(exc_info.value)
|
||||
|
||||
|
||||
class TestUpdateUserVoiceSettings:
|
||||
"""Tests for update_user_voice_settings."""
|
||||
|
||||
def test_updates_auto_send(self, mock_db_session: MagicMock) -> None:
|
||||
user_id = uuid4()
|
||||
|
||||
update_user_voice_settings(mock_db_session, user_id, auto_send=True)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
mock_db_session.flush.assert_called_once()
|
||||
|
||||
def test_updates_auto_playback(self, mock_db_session: MagicMock) -> None:
|
||||
user_id = uuid4()
|
||||
|
||||
update_user_voice_settings(mock_db_session, user_id, auto_playback=True)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
mock_db_session.flush.assert_called_once()
|
||||
|
||||
def test_updates_playback_speed_within_range(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
user_id = uuid4()
|
||||
|
||||
update_user_voice_settings(mock_db_session, user_id, playback_speed=1.5)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
|
||||
def test_clamps_playback_speed_to_min(self, mock_db_session: MagicMock) -> None:
|
||||
user_id = uuid4()
|
||||
|
||||
update_user_voice_settings(mock_db_session, user_id, playback_speed=0.1)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
stmt = mock_db_session.execute.call_args[0][0]
|
||||
compiled = stmt.compile(compile_kwargs={"literal_binds": True})
|
||||
assert str(MIN_VOICE_PLAYBACK_SPEED) in str(compiled)
|
||||
|
||||
def test_clamps_playback_speed_to_max(self, mock_db_session: MagicMock) -> None:
|
||||
user_id = uuid4()
|
||||
|
||||
update_user_voice_settings(mock_db_session, user_id, playback_speed=5.0)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
stmt = mock_db_session.execute.call_args[0][0]
|
||||
compiled = stmt.compile(compile_kwargs={"literal_binds": True})
|
||||
assert str(MAX_VOICE_PLAYBACK_SPEED) in str(compiled)
|
||||
|
||||
def test_updates_multiple_settings(self, mock_db_session: MagicMock) -> None:
|
||||
user_id = uuid4()
|
||||
|
||||
update_user_voice_settings(
|
||||
mock_db_session,
|
||||
user_id,
|
||||
auto_send=True,
|
||||
auto_playback=False,
|
||||
playback_speed=1.25,
|
||||
)
|
||||
|
||||
mock_db_session.execute.assert_called_once()
|
||||
mock_db_session.flush.assert_called_once()
|
||||
|
||||
def test_does_nothing_when_no_settings_provided(
|
||||
self, mock_db_session: MagicMock
|
||||
) -> None:
|
||||
user_id = uuid4()
|
||||
|
||||
update_user_voice_settings(mock_db_session, user_id)
|
||||
|
||||
mock_db_session.execute.assert_not_called()
|
||||
mock_db_session.flush.assert_not_called()
|
||||
|
||||
|
||||
class TestSpeedClampingLogic:
|
||||
"""Tests for the speed clamping constants and logic."""
|
||||
|
||||
def test_min_speed_constant(self) -> None:
|
||||
assert MIN_VOICE_PLAYBACK_SPEED == 0.5
|
||||
|
||||
def test_max_speed_constant(self) -> None:
|
||||
assert MAX_VOICE_PLAYBACK_SPEED == 2.0
|
||||
|
||||
def test_clamping_formula(self) -> None:
|
||||
"""Verify the clamping formula used in update_user_voice_settings."""
|
||||
test_cases = [
|
||||
(0.1, MIN_VOICE_PLAYBACK_SPEED),
|
||||
(0.5, 0.5),
|
||||
(1.0, 1.0),
|
||||
(1.5, 1.5),
|
||||
(2.0, 2.0),
|
||||
(3.0, MAX_VOICE_PLAYBACK_SPEED),
|
||||
]
|
||||
for speed, expected in test_cases:
|
||||
clamped = max(
|
||||
MIN_VOICE_PLAYBACK_SPEED, min(MAX_VOICE_PLAYBACK_SPEED, speed)
|
||||
)
|
||||
assert (
|
||||
clamped == expected
|
||||
), f"speed={speed} expected={expected} got={clamped}"
|
||||
204
backend/tests/unit/onyx/onyxbot/test_handle_regular_answer.py
Normal file
204
backend/tests/unit/onyx/onyxbot/test_handle_regular_answer.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""Tests for Slack channel reference resolution and tag filtering
|
||||
in handle_regular_answer.py."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from slack_sdk.errors import SlackApiError
|
||||
|
||||
from onyx.context.search.models import Tag
|
||||
from onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN
|
||||
from onyx.onyxbot.slack.handlers.handle_regular_answer import resolve_channel_references
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _mock_client_with_channels(
|
||||
channel_map: dict[str, str],
|
||||
) -> MagicMock:
|
||||
"""Return a mock WebClient where conversations_info resolves IDs to names."""
|
||||
client = MagicMock()
|
||||
|
||||
def _conversations_info(channel: str) -> MagicMock:
|
||||
if channel in channel_map:
|
||||
resp = MagicMock()
|
||||
resp.validate = MagicMock()
|
||||
resp.__getitem__ = lambda _self, key: {
|
||||
"channel": {
|
||||
"name": channel_map[channel],
|
||||
"is_im": False,
|
||||
"is_mpim": False,
|
||||
}
|
||||
}[key]
|
||||
return resp
|
||||
raise SlackApiError("channel_not_found", response=MagicMock())
|
||||
|
||||
client.conversations_info = _conversations_info
|
||||
return client
|
||||
|
||||
|
||||
def _mock_logger() -> MagicMock:
|
||||
return MagicMock()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SLACK_CHANNEL_REF_PATTERN regex tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSlackChannelRefPattern:
|
||||
def test_matches_bare_channel_id(self) -> None:
|
||||
matches = SLACK_CHANNEL_REF_PATTERN.findall("<#C097NBWMY8Y>")
|
||||
assert matches == [("C097NBWMY8Y", "")]
|
||||
|
||||
def test_matches_channel_id_with_name(self) -> None:
|
||||
matches = SLACK_CHANNEL_REF_PATTERN.findall("<#C097NBWMY8Y|eng-infra>")
|
||||
assert matches == [("C097NBWMY8Y", "eng-infra")]
|
||||
|
||||
def test_matches_multiple_channels(self) -> None:
|
||||
msg = "compare <#C111AAA> and <#C222BBB|general>"
|
||||
matches = SLACK_CHANNEL_REF_PATTERN.findall(msg)
|
||||
assert len(matches) == 2
|
||||
assert ("C111AAA", "") in matches
|
||||
assert ("C222BBB", "general") in matches
|
||||
|
||||
def test_no_match_on_plain_text(self) -> None:
|
||||
matches = SLACK_CHANNEL_REF_PATTERN.findall("no channels here")
|
||||
assert matches == []
|
||||
|
||||
def test_no_match_on_user_mention(self) -> None:
|
||||
matches = SLACK_CHANNEL_REF_PATTERN.findall("<@U12345>")
|
||||
assert matches == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# resolve_channel_references tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResolveChannelReferences:
|
||||
def test_resolves_bare_channel_id_via_api(self) -> None:
|
||||
client = _mock_client_with_channels({"C097NBWMY8Y": "eng-infra"})
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="summary of <#C097NBWMY8Y> this week",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
assert message == "summary of #eng-infra this week"
|
||||
assert len(tags) == 1
|
||||
assert tags[0] == Tag(tag_key="Channel", tag_value="eng-infra")
|
||||
|
||||
def test_uses_name_from_pipe_format_without_api_call(self) -> None:
|
||||
client = MagicMock()
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="check <#C097NBWMY8Y|eng-infra> for updates",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
assert message == "check #eng-infra for updates"
|
||||
assert tags == [Tag(tag_key="Channel", tag_value="eng-infra")]
|
||||
# Should NOT have called the API since name was in the markup
|
||||
client.conversations_info.assert_not_called()
|
||||
|
||||
def test_multiple_channels(self) -> None:
|
||||
client = _mock_client_with_channels(
|
||||
{
|
||||
"C111AAA": "eng-infra",
|
||||
"C222BBB": "eng-general",
|
||||
}
|
||||
)
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="compare <#C111AAA> and <#C222BBB>",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
assert "#eng-infra" in message
|
||||
assert "#eng-general" in message
|
||||
assert "<#" not in message
|
||||
assert len(tags) == 2
|
||||
tag_values = {t.tag_value for t in tags}
|
||||
assert tag_values == {"eng-infra", "eng-general"}
|
||||
|
||||
def test_no_channel_references_returns_unchanged(self) -> None:
|
||||
client = MagicMock()
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="just a normal message with no channels",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
assert message == "just a normal message with no channels"
|
||||
assert tags == []
|
||||
|
||||
def test_api_failure_skips_channel_gracefully(self) -> None:
|
||||
# Client that fails for all channel lookups
|
||||
client = _mock_client_with_channels({})
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="check <#CBADID123>",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
# Message should remain unchanged for the failed channel
|
||||
assert "<#CBADID123>" in message
|
||||
assert tags == []
|
||||
logger.warning.assert_called_once()
|
||||
|
||||
def test_partial_failure_resolves_what_it_can(self) -> None:
|
||||
# Only one of two channels resolves
|
||||
client = _mock_client_with_channels({"C111AAA": "eng-infra"})
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="compare <#C111AAA> and <#CBADID123>",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
assert "#eng-infra" in message
|
||||
assert "<#CBADID123>" in message # failed one stays raw
|
||||
assert len(tags) == 1
|
||||
assert tags[0].tag_value == "eng-infra"
|
||||
|
||||
def test_duplicate_channel_produces_single_tag(self) -> None:
|
||||
client = _mock_client_with_channels({"C111AAA": "eng-infra"})
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="summarize <#C111AAA> and compare with <#C111AAA>",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
assert message == "summarize #eng-infra and compare with #eng-infra"
|
||||
assert len(tags) == 1
|
||||
assert tags[0].tag_value == "eng-infra"
|
||||
|
||||
def test_mixed_pipe_and_bare_formats(self) -> None:
|
||||
client = _mock_client_with_channels({"C222BBB": "random"})
|
||||
logger = _mock_logger()
|
||||
|
||||
message, tags = resolve_channel_references(
|
||||
message="see <#C111AAA|eng-infra> and <#C222BBB>",
|
||||
client=client,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
assert "#eng-infra" in message
|
||||
assert "#random" in message
|
||||
assert len(tags) == 2
|
||||
@@ -7,6 +7,8 @@ import timeago # type: ignore
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.context.search.models import SavedSearchDoc
|
||||
from onyx.onyxbot.slack.blocks import _build_documents_blocks
|
||||
from onyx.onyxbot.slack.blocks import _find_unclosed_fence
|
||||
from onyx.onyxbot.slack.blocks import _split_text
|
||||
|
||||
|
||||
def _make_saved_doc(updated_at: datetime | None) -> SavedSearchDoc:
|
||||
@@ -69,3 +71,154 @@ def test_build_documents_blocks_formats_naive_timestamp(
|
||||
formatted_timestamp: datetime = captured["doc"]
|
||||
expected_timestamp: datetime = naive_timestamp.replace(tzinfo=pytz.utc)
|
||||
assert formatted_timestamp == expected_timestamp
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _split_text tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSplitText:
|
||||
def test_short_text_returns_single_chunk(self) -> None:
|
||||
result = _split_text("hello world", limit=100)
|
||||
assert result == ["hello world"]
|
||||
|
||||
def test_splits_at_space_boundary(self) -> None:
|
||||
text = "aaa bbb ccc ddd"
|
||||
result = _split_text(text, limit=8)
|
||||
assert len(result) >= 2
|
||||
|
||||
def test_code_block_not_split_when_fits(self) -> None:
|
||||
# Text fits within limit — exercises the early-return path,
|
||||
# not the fence-aware splitting logic.
|
||||
text = "before ```code here``` after"
|
||||
result = _split_text(text, limit=100)
|
||||
assert result == [text]
|
||||
|
||||
def test_code_block_split_backs_up_before_fence(self) -> None:
|
||||
# Build text where the split point falls inside a code block,
|
||||
# but the code block itself fits within the limit. The split
|
||||
# should back up to before the opening ``` so the block stays intact.
|
||||
before = "some intro text here " * 5 + "\n" # ~105 chars
|
||||
code_content = "x " * 20 # ~40 chars of code
|
||||
text = f"{before}```\n{code_content}\n```\nafter"
|
||||
# limit=120 means the initial split lands inside the code block
|
||||
# but the code block (~50 chars) fits in the next chunk
|
||||
result = _split_text(text, limit=120)
|
||||
|
||||
assert len(result) >= 2
|
||||
# Every chunk must have balanced code fences
|
||||
for chunk in result:
|
||||
is_open, _, _ = _find_unclosed_fence(chunk)
|
||||
assert not is_open, f"Unclosed fence in chunk: {chunk[:80]}..."
|
||||
# The code block should be fully contained in one chunk
|
||||
code_chunks = [c for c in result if "```" in c]
|
||||
assert len(code_chunks) == 1, "Code block should not be split across chunks"
|
||||
|
||||
def test_no_code_fences_splits_normally(self) -> None:
|
||||
text = "word " * 100 # 500 chars
|
||||
result = _split_text(text, limit=100)
|
||||
assert len(result) >= 5
|
||||
for chunk in result:
|
||||
fence_count = chunk.count("```")
|
||||
assert fence_count == 0
|
||||
|
||||
def test_code_block_exceeding_limit_falls_back_to_close_reopen(self) -> None:
|
||||
# When the code block itself is bigger than the limit, we can't
|
||||
# avoid splitting inside it — verify fences are still balanced.
|
||||
code_content = "x " * 100 # ~200 chars
|
||||
text = f"```\n{code_content}\n```"
|
||||
result = _split_text(text, limit=80)
|
||||
|
||||
assert len(result) >= 2
|
||||
for chunk in result:
|
||||
is_open, _, _ = _find_unclosed_fence(chunk)
|
||||
assert not is_open, f"Unclosed fence in chunk: {chunk[:80]}..."
|
||||
|
||||
def test_code_block_exceeding_limit_no_spaces(self) -> None:
|
||||
# When code has no spaces, split_at is forced to limit.
|
||||
# Fences should still be balanced.
|
||||
code_content = "x" * 200
|
||||
text = f"```\n{code_content}\n```"
|
||||
result = _split_text(text, limit=80)
|
||||
|
||||
assert len(result) >= 2
|
||||
for chunk in result:
|
||||
is_open, _, _ = _find_unclosed_fence(chunk)
|
||||
assert not is_open, f"Unclosed fence in chunk: {chunk[:80]}..."
|
||||
|
||||
def test_all_content_preserved_after_split(self) -> None:
|
||||
text = "intro paragraph and more text here\n```\nprint('hello')\n```\nconclusion here"
|
||||
result = _split_text(text, limit=50)
|
||||
|
||||
# Key content should appear somewhere across the chunks
|
||||
joined = " ".join(result)
|
||||
assert "intro" in joined
|
||||
assert "print('hello')" in joined
|
||||
assert "conclusion" in joined
|
||||
|
||||
def test_language_specifier_preserved_on_reopen(self) -> None:
|
||||
# When a ```python block exceeds the limit and must be split,
|
||||
# the continuation chunk should reopen with ```python, not ```.
|
||||
code_content = "x " * 100 # ~200 chars
|
||||
text = f"```python\n{code_content}\n```"
|
||||
result = _split_text(text, limit=80)
|
||||
|
||||
assert len(result) >= 2
|
||||
for chunk in result[1:]:
|
||||
stripped = chunk.lstrip()
|
||||
if stripped.startswith("```"):
|
||||
assert stripped.startswith(
|
||||
"```python"
|
||||
), f"Language specifier lost in continuation: {chunk[:40]}"
|
||||
|
||||
def test_inline_backticks_inside_code_block_ignored(self) -> None:
|
||||
# Triple backticks appearing mid-line inside a code block should
|
||||
# not be mistaken for fence boundaries.
|
||||
before = "some text here " * 6 + "\n" # ~90 chars
|
||||
text = f"{before}```bash\necho '```'\necho done\n```\nafter"
|
||||
result = _split_text(text, limit=110)
|
||||
|
||||
assert len(result) >= 2
|
||||
for chunk in result:
|
||||
is_open, _, _ = _find_unclosed_fence(chunk)
|
||||
assert not is_open, f"Chunk has unclosed fence: {chunk[:80]}..."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _find_unclosed_fence tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFindUnclosedFence:
|
||||
def test_no_fences(self) -> None:
|
||||
is_open, _, _ = _find_unclosed_fence("just plain text")
|
||||
assert not is_open
|
||||
|
||||
def test_balanced_fences(self) -> None:
|
||||
is_open, _, _ = _find_unclosed_fence("```\ncode\n```")
|
||||
assert not is_open
|
||||
|
||||
def test_unclosed_fence(self) -> None:
|
||||
is_open, start, lang = _find_unclosed_fence("before\n```\ncode here")
|
||||
assert is_open
|
||||
assert start == len("before\n")
|
||||
assert lang == ""
|
||||
|
||||
def test_unclosed_fence_with_lang(self) -> None:
|
||||
is_open, _, lang = _find_unclosed_fence("intro\n```python\ncode")
|
||||
assert is_open
|
||||
assert lang == "python"
|
||||
|
||||
def test_inline_backticks_not_counted(self) -> None:
|
||||
# Backticks mid-line should not toggle fence state
|
||||
text = "```bash\necho '```'\necho done\n```"
|
||||
is_open, _, _ = _find_unclosed_fence(text)
|
||||
assert not is_open
|
||||
|
||||
def test_indented_backticks_not_counted_as_fence(self) -> None:
|
||||
# Slack only treats ``` at column 0 as a fence delimiter.
|
||||
# Indented backticks inside a code block are content, not fences.
|
||||
text = "```bash\ncat <<'EOF'\n ```\nEOF\n```"
|
||||
is_open, _, _ = _find_unclosed_fence(text)
|
||||
assert not is_open
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.server.manage.voice.api import _validate_voice_api_base
|
||||
|
||||
|
||||
def test_validate_voice_api_base_blocks_private_for_non_azure() -> None:
|
||||
with pytest.raises(OnyxError, match="Invalid target URI"):
|
||||
_validate_voice_api_base("openai", "http://127.0.0.1:11434")
|
||||
|
||||
|
||||
def test_validate_voice_api_base_allows_private_for_azure() -> None:
|
||||
validated = _validate_voice_api_base("azure", "http://127.0.0.1:5000")
|
||||
assert validated == "http://127.0.0.1:5000"
|
||||
|
||||
|
||||
def test_validate_voice_api_base_blocks_metadata_for_azure() -> None:
|
||||
with pytest.raises(OnyxError, match="Invalid target URI"):
|
||||
_validate_voice_api_base("azure", "http://metadata.google.internal/")
|
||||
|
||||
|
||||
def test_validate_voice_api_base_returns_none_for_none() -> None:
|
||||
assert _validate_voice_api_base("openai", None) is None
|
||||
54
backend/tests/unit/onyx/server/test_full_user_snapshot.py
Normal file
54
backend/tests/unit/onyx/server/test_full_user_snapshot.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import datetime
|
||||
from unittest.mock import MagicMock
|
||||
from uuid import uuid4
|
||||
|
||||
from onyx.auth.schemas import UserRole
|
||||
from onyx.server.models import FullUserSnapshot
|
||||
from onyx.server.models import UserGroupInfo
|
||||
|
||||
|
||||
def _mock_user(
|
||||
personal_name: str | None = "Test User",
|
||||
created_at: datetime.datetime | None = None,
|
||||
updated_at: datetime.datetime | None = None,
|
||||
) -> MagicMock:
|
||||
user = MagicMock()
|
||||
user.id = uuid4()
|
||||
user.email = "test@example.com"
|
||||
user.role = UserRole.BASIC
|
||||
user.is_active = True
|
||||
user.password_configured = True
|
||||
user.personal_name = personal_name
|
||||
user.created_at = created_at or datetime.datetime(
|
||||
2025, 1, 1, tzinfo=datetime.timezone.utc
|
||||
)
|
||||
user.updated_at = updated_at or datetime.datetime(
|
||||
2025, 6, 15, tzinfo=datetime.timezone.utc
|
||||
)
|
||||
return user
|
||||
|
||||
|
||||
def test_from_user_model_includes_new_fields() -> None:
|
||||
user = _mock_user(personal_name="Alice")
|
||||
groups = [UserGroupInfo(id=1, name="Engineering")]
|
||||
|
||||
snapshot = FullUserSnapshot.from_user_model(user, groups=groups)
|
||||
|
||||
assert snapshot.personal_name == "Alice"
|
||||
assert snapshot.created_at == user.created_at
|
||||
assert snapshot.updated_at == user.updated_at
|
||||
assert snapshot.groups == groups
|
||||
|
||||
|
||||
def test_from_user_model_defaults_groups_to_empty() -> None:
|
||||
user = _mock_user()
|
||||
snapshot = FullUserSnapshot.from_user_model(user)
|
||||
|
||||
assert snapshot.groups == []
|
||||
|
||||
|
||||
def test_from_user_model_personal_name_none() -> None:
|
||||
user = _mock_user(personal_name=None)
|
||||
snapshot = FullUserSnapshot.from_user_model(user)
|
||||
|
||||
assert snapshot.personal_name is None
|
||||
@@ -14,7 +14,6 @@ from onyx.utils.url import _is_ip_private_or_reserved
|
||||
from onyx.utils.url import _validate_and_resolve_url
|
||||
from onyx.utils.url import ssrf_safe_get
|
||||
from onyx.utils.url import SSRFException
|
||||
from onyx.utils.url import validate_outbound_http_url
|
||||
|
||||
|
||||
class TestIsIpPrivateOrReserved:
|
||||
@@ -306,22 +305,3 @@ class TestSsrfSafeGet:
|
||||
|
||||
call_args = mock_get.call_args
|
||||
assert call_args[1]["timeout"] == (5, 15)
|
||||
|
||||
|
||||
class TestValidateOutboundHttpUrl:
|
||||
def test_rejects_private_ip_by_default(self) -> None:
|
||||
with pytest.raises(SSRFException, match="internal/private IP"):
|
||||
validate_outbound_http_url("http://127.0.0.1:8000")
|
||||
|
||||
def test_allows_private_ip_when_explicitly_enabled(self) -> None:
|
||||
validated_url = validate_outbound_http_url(
|
||||
"http://127.0.0.1:8000", allow_private_network=True
|
||||
)
|
||||
assert validated_url == "http://127.0.0.1:8000"
|
||||
|
||||
def test_blocks_metadata_hostname_when_private_is_enabled(self) -> None:
|
||||
with pytest.raises(SSRFException, match="not allowed"):
|
||||
validate_outbound_http_url(
|
||||
"http://metadata.google.internal/latest",
|
||||
allow_private_network=True,
|
||||
)
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from onyx.voice.providers.azure import AzureVoiceProvider
|
||||
|
||||
|
||||
def test_azure_provider_extracts_region_from_target_uri() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key",
|
||||
api_base="https://westus.api.cognitive.microsoft.com/",
|
||||
custom_config={},
|
||||
)
|
||||
assert provider.speech_region == "westus"
|
||||
|
||||
|
||||
def test_azure_provider_normalizes_uppercase_region() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key",
|
||||
api_base=None,
|
||||
custom_config={"speech_region": "WestUS2"},
|
||||
)
|
||||
assert provider.speech_region == "westus2"
|
||||
|
||||
|
||||
def test_azure_provider_rejects_invalid_speech_region() -> None:
|
||||
with pytest.raises(ValueError, match="Invalid Azure speech_region"):
|
||||
AzureVoiceProvider(
|
||||
api_key="key",
|
||||
api_base=None,
|
||||
custom_config={"speech_region": "westus/../../etc"},
|
||||
)
|
||||
@@ -1,194 +0,0 @@
|
||||
import io
|
||||
import struct
|
||||
import wave
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.voice.providers.azure import AzureVoiceProvider
|
||||
|
||||
|
||||
# --- _is_azure_cloud_url ---
|
||||
|
||||
|
||||
def test_is_azure_cloud_url_speech_microsoft() -> None:
|
||||
assert AzureVoiceProvider._is_azure_cloud_url(
|
||||
"https://eastus.tts.speech.microsoft.com/cognitiveservices/v1"
|
||||
)
|
||||
|
||||
|
||||
def test_is_azure_cloud_url_cognitive_microsoft() -> None:
|
||||
assert AzureVoiceProvider._is_azure_cloud_url(
|
||||
"https://westus.api.cognitive.microsoft.com/"
|
||||
)
|
||||
|
||||
|
||||
def test_is_azure_cloud_url_rejects_custom_host() -> None:
|
||||
assert not AzureVoiceProvider._is_azure_cloud_url("https://my-custom-host.com/")
|
||||
|
||||
|
||||
def test_is_azure_cloud_url_rejects_none() -> None:
|
||||
assert not AzureVoiceProvider._is_azure_cloud_url(None)
|
||||
|
||||
|
||||
# --- _extract_speech_region_from_uri ---
|
||||
|
||||
|
||||
def test_extract_region_from_tts_url() -> None:
|
||||
assert (
|
||||
AzureVoiceProvider._extract_speech_region_from_uri(
|
||||
"https://eastus.tts.speech.microsoft.com/cognitiveservices/v1"
|
||||
)
|
||||
== "eastus"
|
||||
)
|
||||
|
||||
|
||||
def test_extract_region_from_cognitive_api_url() -> None:
|
||||
assert (
|
||||
AzureVoiceProvider._extract_speech_region_from_uri(
|
||||
"https://eastus.api.cognitive.microsoft.com/"
|
||||
)
|
||||
== "eastus"
|
||||
)
|
||||
|
||||
|
||||
def test_extract_region_returns_none_for_custom_domain() -> None:
|
||||
"""Custom domains use resource name, not region — must use speech_region config."""
|
||||
assert (
|
||||
AzureVoiceProvider._extract_speech_region_from_uri(
|
||||
"https://myresource.cognitiveservices.azure.com/"
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
|
||||
def test_extract_region_returns_none_for_none() -> None:
|
||||
assert AzureVoiceProvider._extract_speech_region_from_uri(None) is None
|
||||
|
||||
|
||||
# --- _validate_speech_region ---
|
||||
|
||||
|
||||
def test_validate_region_normalizes_to_lowercase() -> None:
|
||||
assert AzureVoiceProvider._validate_speech_region("WestUS2") == "westus2"
|
||||
|
||||
|
||||
def test_validate_region_accepts_hyphens() -> None:
|
||||
assert AzureVoiceProvider._validate_speech_region("us-east-1") == "us-east-1"
|
||||
|
||||
|
||||
def test_validate_region_rejects_path_traversal() -> None:
|
||||
with pytest.raises(ValueError, match="Invalid Azure speech_region"):
|
||||
AzureVoiceProvider._validate_speech_region("westus/../../etc")
|
||||
|
||||
|
||||
def test_validate_region_rejects_dots() -> None:
|
||||
with pytest.raises(ValueError, match="Invalid Azure speech_region"):
|
||||
AzureVoiceProvider._validate_speech_region("west.us")
|
||||
|
||||
|
||||
# --- _pcm16_to_wav ---
|
||||
|
||||
|
||||
def test_pcm16_to_wav_produces_valid_wav() -> None:
|
||||
samples = [32767, -32768, 0, 1234]
|
||||
pcm_data = struct.pack(f"<{len(samples)}h", *samples)
|
||||
wav_bytes = AzureVoiceProvider._pcm16_to_wav(pcm_data, sample_rate=16000)
|
||||
|
||||
with wave.open(io.BytesIO(wav_bytes), "rb") as wav_file:
|
||||
assert wav_file.getnchannels() == 1
|
||||
assert wav_file.getsampwidth() == 2
|
||||
assert wav_file.getframerate() == 16000
|
||||
frames = wav_file.readframes(4)
|
||||
recovered = struct.unpack(f"<{len(samples)}h", frames)
|
||||
assert list(recovered) == samples
|
||||
|
||||
|
||||
# --- URL Construction ---
|
||||
|
||||
|
||||
def test_get_tts_url_cloud() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key", api_base=None, custom_config={"speech_region": "eastus"}
|
||||
)
|
||||
assert (
|
||||
provider._get_tts_url()
|
||||
== "https://eastus.tts.speech.microsoft.com/cognitiveservices/v1"
|
||||
)
|
||||
|
||||
|
||||
def test_get_stt_url_cloud() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key", api_base=None, custom_config={"speech_region": "westus2"}
|
||||
)
|
||||
assert "westus2.stt.speech.microsoft.com" in provider._get_stt_url()
|
||||
|
||||
|
||||
def test_get_tts_url_self_hosted() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key", api_base="http://localhost:5000", custom_config={}
|
||||
)
|
||||
assert provider._get_tts_url() == "http://localhost:5000/cognitiveservices/v1"
|
||||
|
||||
|
||||
def test_get_tts_url_self_hosted_strips_trailing_slash() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key", api_base="http://localhost:5000/", custom_config={}
|
||||
)
|
||||
assert provider._get_tts_url() == "http://localhost:5000/cognitiveservices/v1"
|
||||
|
||||
|
||||
# --- _is_self_hosted ---
|
||||
|
||||
|
||||
def test_is_self_hosted_true_for_custom_endpoint() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key", api_base="http://localhost:5000", custom_config={}
|
||||
)
|
||||
assert provider._is_self_hosted() is True
|
||||
|
||||
|
||||
def test_is_self_hosted_false_for_azure_cloud() -> None:
|
||||
provider = AzureVoiceProvider(
|
||||
api_key="key",
|
||||
api_base="https://eastus.api.cognitive.microsoft.com/",
|
||||
custom_config={},
|
||||
)
|
||||
assert provider._is_self_hosted() is False
|
||||
|
||||
|
||||
# --- Resampling ---
|
||||
|
||||
|
||||
def test_resample_pcm16_passthrough() -> None:
|
||||
from onyx.voice.providers.azure import AzureStreamingTranscriber
|
||||
|
||||
t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)
|
||||
t.input_sample_rate = 16000
|
||||
t.target_sample_rate = 16000
|
||||
|
||||
data = struct.pack("<4h", 100, 200, 300, 400)
|
||||
assert t._resample_pcm16(data) == data
|
||||
|
||||
|
||||
def test_resample_pcm16_downsamples() -> None:
|
||||
from onyx.voice.providers.azure import AzureStreamingTranscriber
|
||||
|
||||
t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)
|
||||
t.input_sample_rate = 24000
|
||||
t.target_sample_rate = 16000
|
||||
|
||||
input_samples = [1000, 2000, 3000, 4000, 5000, 6000]
|
||||
data = struct.pack(f"<{len(input_samples)}h", *input_samples)
|
||||
|
||||
result = t._resample_pcm16(data)
|
||||
assert len(result) // 2 == 4
|
||||
|
||||
|
||||
def test_resample_pcm16_empty_data() -> None:
|
||||
from onyx.voice.providers.azure import AzureStreamingTranscriber
|
||||
|
||||
t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)
|
||||
t.input_sample_rate = 24000
|
||||
t.target_sample_rate = 16000
|
||||
|
||||
assert t._resample_pcm16(b"") == b""
|
||||
@@ -1,117 +0,0 @@
|
||||
import struct
|
||||
|
||||
from onyx.voice.providers.elevenlabs import _http_to_ws_url
|
||||
from onyx.voice.providers.elevenlabs import DEFAULT_ELEVENLABS_API_BASE
|
||||
from onyx.voice.providers.elevenlabs import ElevenLabsSTTMessageType
|
||||
from onyx.voice.providers.elevenlabs import ElevenLabsVoiceProvider
|
||||
|
||||
|
||||
# --- _http_to_ws_url ---
|
||||
|
||||
|
||||
def test_http_to_ws_url_converts_https_to_wss() -> None:
|
||||
assert _http_to_ws_url("https://api.elevenlabs.io") == "wss://api.elevenlabs.io"
|
||||
|
||||
|
||||
def test_http_to_ws_url_converts_http_to_ws() -> None:
|
||||
assert _http_to_ws_url("http://localhost:8080") == "ws://localhost:8080"
|
||||
|
||||
|
||||
def test_http_to_ws_url_passes_through_other_schemes() -> None:
|
||||
assert _http_to_ws_url("wss://already.ws") == "wss://already.ws"
|
||||
|
||||
|
||||
def test_http_to_ws_url_preserves_path() -> None:
|
||||
assert (
|
||||
_http_to_ws_url("https://api.elevenlabs.io/v1/tts")
|
||||
== "wss://api.elevenlabs.io/v1/tts"
|
||||
)
|
||||
|
||||
|
||||
# --- StrEnum comparison ---
|
||||
|
||||
|
||||
def test_stt_message_type_compares_as_string() -> None:
|
||||
"""StrEnum members should work in string comparisons (e.g. from JSON)."""
|
||||
assert str(ElevenLabsSTTMessageType.COMMITTED_TRANSCRIPT) == "committed_transcript"
|
||||
assert isinstance(ElevenLabsSTTMessageType.ERROR, str)
|
||||
|
||||
|
||||
# --- Resampling ---
|
||||
|
||||
|
||||
def test_resample_pcm16_passthrough_when_same_rate() -> None:
|
||||
from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber
|
||||
|
||||
t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)
|
||||
t.input_sample_rate = 16000
|
||||
t.target_sample_rate = 16000
|
||||
|
||||
data = struct.pack("<4h", 100, 200, 300, 400)
|
||||
assert t._resample_pcm16(data) == data
|
||||
|
||||
|
||||
def test_resample_pcm16_downsamples() -> None:
|
||||
"""24kHz -> 16kHz should produce fewer samples (ratio 3:2)."""
|
||||
from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber
|
||||
|
||||
t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)
|
||||
t.input_sample_rate = 24000
|
||||
t.target_sample_rate = 16000
|
||||
|
||||
input_samples = [1000, 2000, 3000, 4000, 5000, 6000]
|
||||
data = struct.pack(f"<{len(input_samples)}h", *input_samples)
|
||||
|
||||
result = t._resample_pcm16(data)
|
||||
output_samples = struct.unpack(f"<{len(result) // 2}h", result)
|
||||
|
||||
assert len(output_samples) == 4
|
||||
|
||||
|
||||
def test_resample_pcm16_clamps_to_int16_range() -> None:
|
||||
from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber
|
||||
|
||||
t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)
|
||||
t.input_sample_rate = 24000
|
||||
t.target_sample_rate = 16000
|
||||
|
||||
input_samples = [32767, -32768, 32767, -32768, 32767, -32768]
|
||||
data = struct.pack(f"<{len(input_samples)}h", *input_samples)
|
||||
|
||||
result = t._resample_pcm16(data)
|
||||
output_samples = struct.unpack(f"<{len(result) // 2}h", result)
|
||||
for s in output_samples:
|
||||
assert -32768 <= s <= 32767
|
||||
|
||||
|
||||
# --- Provider Model Defaulting ---
|
||||
|
||||
|
||||
def test_provider_defaults_invalid_stt_model() -> None:
|
||||
provider = ElevenLabsVoiceProvider(api_key="test", stt_model="invalid_model")
|
||||
assert provider.stt_model == "scribe_v1"
|
||||
|
||||
|
||||
def test_provider_defaults_invalid_tts_model() -> None:
|
||||
provider = ElevenLabsVoiceProvider(api_key="test", tts_model="invalid_model")
|
||||
assert provider.tts_model == "eleven_multilingual_v2"
|
||||
|
||||
|
||||
def test_provider_accepts_valid_models() -> None:
|
||||
provider = ElevenLabsVoiceProvider(
|
||||
api_key="test", stt_model="scribe_v2_realtime", tts_model="eleven_turbo_v2_5"
|
||||
)
|
||||
assert provider.stt_model == "scribe_v2_realtime"
|
||||
assert provider.tts_model == "eleven_turbo_v2_5"
|
||||
|
||||
|
||||
def test_provider_defaults_api_base() -> None:
|
||||
provider = ElevenLabsVoiceProvider(api_key="test")
|
||||
assert provider.api_base == DEFAULT_ELEVENLABS_API_BASE
|
||||
|
||||
|
||||
def test_provider_get_available_voices_returns_copy() -> None:
|
||||
provider = ElevenLabsVoiceProvider(api_key="test")
|
||||
voices = provider.get_available_voices()
|
||||
voices.clear()
|
||||
assert len(provider.get_available_voices()) > 0
|
||||
@@ -1,97 +0,0 @@
|
||||
import io
|
||||
import struct
|
||||
import wave
|
||||
|
||||
from onyx.voice.providers.openai import _create_wav_header
|
||||
from onyx.voice.providers.openai import _http_to_ws_url
|
||||
from onyx.voice.providers.openai import OpenAIRealtimeMessageType
|
||||
from onyx.voice.providers.openai import OpenAIVoiceProvider
|
||||
|
||||
|
||||
# --- _http_to_ws_url ---
|
||||
|
||||
|
||||
def test_http_to_ws_url_converts_https_to_wss() -> None:
|
||||
assert _http_to_ws_url("https://api.openai.com") == "wss://api.openai.com"
|
||||
|
||||
|
||||
def test_http_to_ws_url_converts_http_to_ws() -> None:
|
||||
assert _http_to_ws_url("http://localhost:9090") == "ws://localhost:9090"
|
||||
|
||||
|
||||
def test_http_to_ws_url_passes_through_ws() -> None:
|
||||
assert _http_to_ws_url("wss://already.ws") == "wss://already.ws"
|
||||
|
||||
|
||||
# --- StrEnum comparison ---
|
||||
|
||||
|
||||
def test_realtime_message_type_compares_as_string() -> None:
|
||||
assert str(OpenAIRealtimeMessageType.ERROR) == "error"
|
||||
assert (
|
||||
str(OpenAIRealtimeMessageType.TRANSCRIPTION_DELTA)
|
||||
== "conversation.item.input_audio_transcription.delta"
|
||||
)
|
||||
assert isinstance(OpenAIRealtimeMessageType.ERROR, str)
|
||||
|
||||
|
||||
# --- _create_wav_header ---
|
||||
|
||||
|
||||
def test_wav_header_is_44_bytes() -> None:
|
||||
assert len(_create_wav_header(1000)) == 44
|
||||
|
||||
|
||||
def test_wav_header_chunk_size_matches_data_length() -> None:
|
||||
data_length = 2000
|
||||
header = _create_wav_header(data_length)
|
||||
chunk_size = struct.unpack_from("<I", header, 4)[0]
|
||||
assert chunk_size == 36 + data_length
|
||||
|
||||
|
||||
def test_wav_header_byte_rate() -> None:
|
||||
header = _create_wav_header(100, sample_rate=24000, channels=1, bits_per_sample=16)
|
||||
byte_rate = struct.unpack_from("<I", header, 28)[0]
|
||||
assert byte_rate == 24000 * 1 * 16 // 8
|
||||
|
||||
|
||||
def test_wav_header_produces_valid_wav() -> None:
|
||||
"""Header + PCM data should parse as valid WAV."""
|
||||
data_length = 100
|
||||
pcm_data = b"\x00" * data_length
|
||||
header = _create_wav_header(data_length, sample_rate=24000)
|
||||
|
||||
with wave.open(io.BytesIO(header + pcm_data), "rb") as wav_file:
|
||||
assert wav_file.getnchannels() == 1
|
||||
assert wav_file.getsampwidth() == 2
|
||||
assert wav_file.getframerate() == 24000
|
||||
assert wav_file.getnframes() == data_length // 2
|
||||
|
||||
|
||||
# --- Provider Defaults ---
|
||||
|
||||
|
||||
def test_provider_default_models() -> None:
|
||||
provider = OpenAIVoiceProvider(api_key="test")
|
||||
assert provider.stt_model == "whisper-1"
|
||||
assert provider.tts_model == "tts-1"
|
||||
assert provider.default_voice == "alloy"
|
||||
|
||||
|
||||
def test_provider_custom_models() -> None:
|
||||
provider = OpenAIVoiceProvider(
|
||||
api_key="test",
|
||||
stt_model="gpt-4o-transcribe",
|
||||
tts_model="tts-1-hd",
|
||||
default_voice="nova",
|
||||
)
|
||||
assert provider.stt_model == "gpt-4o-transcribe"
|
||||
assert provider.tts_model == "tts-1-hd"
|
||||
assert provider.default_voice == "nova"
|
||||
|
||||
|
||||
def test_provider_get_available_voices_returns_copy() -> None:
|
||||
provider = OpenAIVoiceProvider(api_key="test")
|
||||
voices = provider.get_available_voices()
|
||||
voices.clear()
|
||||
assert len(provider.get_available_voices()) > 0
|
||||
@@ -35,7 +35,6 @@ backend = [
|
||||
"alembic==1.10.4",
|
||||
"asyncpg==0.30.0",
|
||||
"atlassian-python-api==3.41.16",
|
||||
"azure-cognitiveservices-speech==1.38.0",
|
||||
"beautifulsoup4==4.12.3",
|
||||
"boto3==1.39.11",
|
||||
"boto3-stubs[s3]==1.39.11",
|
||||
@@ -144,7 +143,7 @@ dev = [
|
||||
"matplotlib==3.10.8",
|
||||
"mypy-extensions==1.0.0",
|
||||
"mypy==1.13.0",
|
||||
"onyx-devtools==0.6.3",
|
||||
"onyx-devtools==0.7.0",
|
||||
"openapi-generator-cli==7.17.0",
|
||||
"pandas-stubs~=2.3.3",
|
||||
"pre-commit==3.2.2",
|
||||
|
||||
@@ -25,6 +25,9 @@ Some commands require external tools to be installed and configured:
|
||||
- **Docker** - Required for `compose`, `logs`, and `pull` commands
|
||||
- Install from [docker.com](https://docs.docker.com/get-docker/)
|
||||
|
||||
- **uv** - Required for `backend` commands
|
||||
- Install from [docs.astral.sh/uv](https://docs.astral.sh/uv/)
|
||||
|
||||
- **GitHub CLI** (`gh`) - Required for `run-ci` and `cherry-pick` commands
|
||||
- Install from [cli.github.com](https://cli.github.com/)
|
||||
- Authenticate with `gh auth login`
|
||||
@@ -170,6 +173,53 @@ ods pull
|
||||
ods pull --tag edge
|
||||
```
|
||||
|
||||
### `backend` - Run Backend Services
|
||||
|
||||
Run backend services (API server, model server) with environment loaded from
|
||||
`.vscode/.env`. On first run, copies `.vscode/env_template.txt` to `.vscode/.env`
|
||||
if the `.env` file does not already exist.
|
||||
|
||||
Enterprise Edition features are enabled by default with license enforcement
|
||||
disabled, matching the `compose` command behavior.
|
||||
|
||||
```shell
|
||||
ods backend <subcommand>
|
||||
```
|
||||
|
||||
**Subcommands:**
|
||||
|
||||
- `api` - Start the FastAPI backend server (`uvicorn onyx.main:app --reload`)
|
||||
- `model_server` - Start the model server (`uvicorn model_server.main:app --reload`)
|
||||
|
||||
**Flags:**
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--no-ee` | `false` | Disable Enterprise Edition features (enabled by default) |
|
||||
| `--port` | `8080` (api) / `9000` (model_server) | Port to listen on |
|
||||
|
||||
Shell environment takes precedence over `.env` file values, so inline overrides
|
||||
work as expected (e.g. `S3_ENDPOINT_URL=foo ods backend api`).
|
||||
|
||||
**Examples:**
|
||||
|
||||
```shell
|
||||
# Start the API server
|
||||
ods backend api
|
||||
|
||||
# Start the API server on a custom port
|
||||
ods backend api --port 9090
|
||||
|
||||
# Start without Enterprise Edition
|
||||
ods backend api --no-ee
|
||||
|
||||
# Start the model server
|
||||
ods backend model_server
|
||||
|
||||
# Start the model server on a custom port
|
||||
ods backend model_server --port 9001
|
||||
```
|
||||
|
||||
### `web` - Run Frontend Scripts
|
||||
|
||||
Run npm scripts from `web/package.json` without manually changing directories.
|
||||
|
||||
242
tools/ods/cmd/backend.go
Normal file
242
tools/ods/cmd/backend.go
Normal file
@@ -0,0 +1,242 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
|
||||
)
|
||||
|
||||
// NewBackendCommand creates the parent "backend" command with subcommands for
|
||||
// running backend services.
|
||||
// BackendOptions holds options shared across backend subcommands.
|
||||
type BackendOptions struct {
|
||||
NoEE bool
|
||||
}
|
||||
|
||||
func NewBackendCommand() *cobra.Command {
|
||||
opts := &BackendOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "backend",
|
||||
Short: "Run backend services (api, model_server)",
|
||||
Long: `Run backend services with environment from .vscode/.env.
|
||||
|
||||
On first run, copies .vscode/env_template.txt to .vscode/.env if the
|
||||
.env file does not already exist.
|
||||
|
||||
Enterprise Edition features are enabled by default for development,
|
||||
with license enforcement disabled.
|
||||
|
||||
Available subcommands:
|
||||
api Start the FastAPI backend server
|
||||
model_server Start the model server`,
|
||||
}
|
||||
|
||||
cmd.PersistentFlags().BoolVar(&opts.NoEE, "no-ee", false, "Disable Enterprise Edition features (enabled by default)")
|
||||
|
||||
cmd.AddCommand(newBackendAPICommand(opts))
|
||||
cmd.AddCommand(newBackendModelServerCommand(opts))
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func newBackendAPICommand(opts *BackendOptions) *cobra.Command {
|
||||
var port string
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "api",
|
||||
Short: "Start the backend API server (uvicorn with hot-reload)",
|
||||
Long: `Start the backend API server using uvicorn with hot-reload.
|
||||
|
||||
Examples:
|
||||
ods backend api
|
||||
ods backend api --port 9090
|
||||
ods backend api --no-ee`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
runBackendService("api", "onyx.main:app", port, opts)
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().StringVar(&port, "port", "8080", "Port to listen on")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func newBackendModelServerCommand(opts *BackendOptions) *cobra.Command {
|
||||
var port string
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "model_server",
|
||||
Short: "Start the model server (uvicorn with hot-reload)",
|
||||
Long: `Start the model server using uvicorn with hot-reload.
|
||||
|
||||
Examples:
|
||||
ods backend model_server
|
||||
ods backend model_server --port 9001`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
runBackendService("model_server", "model_server.main:app", port, opts)
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().StringVar(&port, "port", "9000", "Port to listen on")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func runBackendService(name, module, port string, opts *BackendOptions) {
|
||||
root, err := paths.GitRoot()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find git root: %v", err)
|
||||
}
|
||||
|
||||
envFile := ensureBackendEnvFile(root)
|
||||
fileVars := loadBackendEnvFile(envFile)
|
||||
|
||||
eeDefaults := eeEnvDefaults(opts.NoEE)
|
||||
fileVars = append(fileVars, eeDefaults...)
|
||||
|
||||
backendDir := filepath.Join(root, "backend")
|
||||
|
||||
uvicornArgs := []string{
|
||||
"run", "uvicorn", module,
|
||||
"--reload",
|
||||
"--port", port,
|
||||
}
|
||||
log.Infof("Starting %s on port %s...", name, port)
|
||||
if !opts.NoEE {
|
||||
log.Info("Enterprise Edition enabled (use --no-ee to disable)")
|
||||
}
|
||||
log.Debugf("Running in %s: uv %v", backendDir, uvicornArgs)
|
||||
|
||||
mergedEnv := mergeEnv(os.Environ(), fileVars)
|
||||
log.Debugf("Applied %d env vars from %s (shell takes precedence)", len(fileVars), envFile)
|
||||
|
||||
svcCmd := exec.Command("uv", uvicornArgs...)
|
||||
svcCmd.Dir = backendDir
|
||||
svcCmd.Stdout = os.Stdout
|
||||
svcCmd.Stderr = os.Stderr
|
||||
svcCmd.Stdin = os.Stdin
|
||||
svcCmd.Env = mergedEnv
|
||||
|
||||
if err := svcCmd.Run(); err != nil {
|
||||
var exitErr *exec.ExitError
|
||||
if errors.As(err, &exitErr) {
|
||||
if code := exitErr.ExitCode(); code != -1 {
|
||||
os.Exit(code)
|
||||
}
|
||||
}
|
||||
log.Fatalf("Failed to run %s: %v", name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// eeEnvDefaults returns env entries for EE and license enforcement settings.
|
||||
// These are appended to the file vars so they act as defaults — shell env
|
||||
// and .env file values still take precedence via mergeEnv.
|
||||
func eeEnvDefaults(noEE bool) []string {
|
||||
if noEE {
|
||||
return []string{
|
||||
"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false",
|
||||
}
|
||||
}
|
||||
return []string{
|
||||
"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true",
|
||||
"LICENSE_ENFORCEMENT_ENABLED=false",
|
||||
}
|
||||
}
|
||||
|
||||
// ensureBackendEnvFile copies env_template.txt to .env if .env doesn't exist.
|
||||
func ensureBackendEnvFile(root string) string {
|
||||
vscodeDir := filepath.Join(root, ".vscode")
|
||||
envFile := filepath.Join(vscodeDir, ".env")
|
||||
templateFile := filepath.Join(vscodeDir, "env_template.txt")
|
||||
|
||||
if _, err := os.Stat(envFile); err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
log.Fatalf("Failed to stat env file %s: %v", envFile, err)
|
||||
}
|
||||
} else {
|
||||
log.Debugf("Using existing env file: %s", envFile)
|
||||
return envFile
|
||||
}
|
||||
|
||||
templateData, err := os.ReadFile(templateFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to read env template %s: %v", templateFile, err)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(vscodeDir, 0755); err != nil {
|
||||
log.Fatalf("Failed to create .vscode directory: %v", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(envFile, templateData, 0644); err != nil {
|
||||
log.Fatalf("Failed to write env file %s: %v", envFile, err)
|
||||
}
|
||||
|
||||
log.Infof("Created %s from template (review and fill in <REPLACE THIS> values)", envFile)
|
||||
return envFile
|
||||
}
|
||||
|
||||
// mergeEnv combines shell environment with file-based defaults. Shell values
|
||||
// take precedence — file entries are only added for keys not already present.
|
||||
func mergeEnv(shellEnv, fileVars []string) []string {
|
||||
existing := make(map[string]bool, len(shellEnv))
|
||||
for _, entry := range shellEnv {
|
||||
if idx := strings.Index(entry, "="); idx > 0 {
|
||||
existing[entry[:idx]] = true
|
||||
}
|
||||
}
|
||||
|
||||
merged := make([]string, len(shellEnv))
|
||||
copy(merged, shellEnv)
|
||||
for _, entry := range fileVars {
|
||||
if idx := strings.Index(entry, "="); idx > 0 {
|
||||
key := entry[:idx]
|
||||
if !existing[key] {
|
||||
merged = append(merged, entry)
|
||||
} else {
|
||||
log.Debugf("Env var %s already set in shell, skipping .env value", key)
|
||||
}
|
||||
}
|
||||
}
|
||||
return merged
|
||||
}
|
||||
|
||||
// loadBackendEnvFile parses a .env file into KEY=VALUE entries suitable for
|
||||
// appending to os.Environ(). Blank lines and comments are skipped.
|
||||
func loadBackendEnvFile(path string) []string {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to open env file %s: %v", path, err)
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
|
||||
var envVars []string
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
if idx := strings.Index(line, "="); idx > 0 {
|
||||
key := strings.TrimSpace(line[:idx])
|
||||
value := strings.TrimSpace(line[idx+1:])
|
||||
value = strings.Trim(value, `"'`)
|
||||
envVars = append(envVars, fmt.Sprintf("%s=%s", key, value))
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("Failed to read env file %s: %v", path, err)
|
||||
}
|
||||
|
||||
return envVars
|
||||
}
|
||||
@@ -41,6 +41,7 @@ func NewRootCommand() *cobra.Command {
|
||||
cmd.PersistentFlags().BoolVar(&opts.Debug, "debug", false, "run in debug mode")
|
||||
|
||||
// Add subcommands
|
||||
cmd.AddCommand(NewBackendCommand())
|
||||
cmd.AddCommand(NewCheckLazyImportsCommand())
|
||||
cmd.AddCommand(NewCherryPickCommand())
|
||||
cmd.AddCommand(NewDBCommand())
|
||||
|
||||
56
uv.lock
generated
56
uv.lock
generated
@@ -463,19 +463,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/00/3ed12264094ec91f534fae429945efbaa9f8c666f3aa7061cc3b2a26a0cd/authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0", size = 244115, upload-time = "2026-02-06T14:04:12.141Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "azure-cognitiveservices-speech"
|
||||
version = "1.38.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/85/f4/4571c42cb00f8af317d5431f594b4ece1fbe59ab59f106947fea8e90cf89/azure_cognitiveservices_speech-1.38.0-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:18dce915ab032711f687abb3297dd19176b9cbea562b322ee6fa7365ef4a5091", size = 6775838, upload-time = "2024-06-11T03:08:35.202Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/22/0ca2c59a573119950cad1f53531fec9872fc38810c405a4e1827f3d13a8e/azure_cognitiveservices_speech-1.38.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9dd0800fbc4a8438c6dfd5747a658251914fe2d205a29e9b46158cadac6ab381", size = 6687975, upload-time = "2024-06-11T03:08:38.797Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/96/5436c09de3af3a9aefaa8cc00533c3a0f5d17aef5bbe017c17f0a30ad66e/azure_cognitiveservices_speech-1.38.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:1c344e8a6faadb063cea451f0301e13b44d9724e1242337039bff601e81e6f86", size = 40022287, upload-time = "2024-06-11T03:08:16.777Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/2d/ba20d05ff77ec9870cd489e6e7a474ba7fe820524bcf6fd202025e0c11cf/azure_cognitiveservices_speech-1.38.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1e002595a749471efeac3a54c80097946570b76c13049760b97a4b881d9d24af", size = 39788653, upload-time = "2024-06-11T03:08:30.405Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0c/21/25f8c37fb6868db4346ca977c287ede9e87f609885d932653243c9ed5f63/azure_cognitiveservices_speech-1.38.0-py3-none-win32.whl", hash = "sha256:16a530e6c646eb49ea0bc05cb45a9d28b99e4b67613f6c3a6c54e26e6bf65241", size = 1428364, upload-time = "2024-06-11T03:08:03.965Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/05/a6414a3481c5ee30c4f32742abe055e5f3ce4ff69e936089d86ece354067/azure_cognitiveservices_speech-1.38.0-py3-none-win_amd64.whl", hash = "sha256:1d38d8c056fb3f513a9ff27ab4e77fd08ca487f8788cc7a6df772c1ab2c97b54", size = 1539297, upload-time = "2024-06-11T03:08:01.304Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "babel"
|
||||
version = "2.17.0"
|
||||
@@ -4240,7 +4227,6 @@ backend = [
|
||||
{ name = "asana" },
|
||||
{ name = "asyncpg" },
|
||||
{ name = "atlassian-python-api" },
|
||||
{ name = "azure-cognitiveservices-speech" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "boto3" },
|
||||
{ name = "boto3-stubs", extra = ["s3"] },
|
||||
@@ -4395,7 +4381,6 @@ requires-dist = [
|
||||
{ name = "asana", marker = "extra == 'backend'", specifier = "==5.0.8" },
|
||||
{ name = "asyncpg", marker = "extra == 'backend'", specifier = "==0.30.0" },
|
||||
{ name = "atlassian-python-api", marker = "extra == 'backend'", specifier = "==3.41.16" },
|
||||
{ name = "azure-cognitiveservices-speech", marker = "extra == 'backend'", specifier = "==1.38.0" },
|
||||
{ name = "beautifulsoup4", marker = "extra == 'backend'", specifier = "==4.12.3" },
|
||||
{ name = "black", marker = "extra == 'dev'", specifier = "==25.1.0" },
|
||||
{ name = "boto3", marker = "extra == 'backend'", specifier = "==1.39.11" },
|
||||
@@ -4458,7 +4443,7 @@ requires-dist = [
|
||||
{ name = "numpy", marker = "extra == 'model-server'", specifier = "==2.4.1" },
|
||||
{ name = "oauthlib", marker = "extra == 'backend'", specifier = "==3.2.2" },
|
||||
{ name = "office365-rest-python-client", marker = "extra == 'backend'", specifier = "==2.6.2" },
|
||||
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.6.3" },
|
||||
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.7.0" },
|
||||
{ name = "openai", specifier = "==2.14.0" },
|
||||
{ name = "openapi-generator-cli", marker = "extra == 'dev'", specifier = "==7.17.0" },
|
||||
{ name = "openinference-instrumentation", marker = "extra == 'backend'", specifier = "==0.1.42" },
|
||||
@@ -4563,20 +4548,19 @@ requires-dist = [{ name = "onyx", extras = ["backend", "dev", "ee"], editable =
|
||||
|
||||
[[package]]
|
||||
name = "onyx-devtools"
|
||||
version = "0.6.3"
|
||||
version = "0.7.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "fastapi" },
|
||||
{ name = "openapi-generator-cli" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/84/e2/e7619722c3ccd18eb38100f776fb3dd6b4ae0fbbee09fca5af7c69a279b5/onyx_devtools-0.6.3-py3-none-any.whl", hash = "sha256:d3a5422945d9da12cafc185f64b39f6e727ee4cc92b37427deb7a38f9aad4966", size = 3945381, upload-time = "2026-03-05T20:39:25.896Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/09/513d2dabedc1e54ad4376830fc9b34a3d9c164bdbcdedfcdbb8b8154dc5a/onyx_devtools-0.6.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:efe300e9f3a2e7ae75f88a4f9e0a5c4c471478296cb1615b6a1f03d247582e13", size = 3978761, upload-time = "2026-03-05T20:39:28.822Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/39/41/e757602a0de032d74ed01c7ee57f30e57728fb9cd4f922f50d2affda3889/onyx_devtools-0.6.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:594066eed3f917cfab5a8c7eac3d4a210df30259f2049f664787749709345e19", size = 3665378, upload-time = "2026-03-05T20:44:22.696Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/33/1c/c93b65d0b32e202596a2647922a75c7011cb982f899ddfcfd171f792c58f/onyx_devtools-0.6.3-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:384ef66030b55c0fd68b3898782b5b4b868ff3de119569dfc8544e2ce534b98a", size = 3540890, upload-time = "2026-03-05T20:39:28.886Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f4/33/760eb656013f7f0cdff24570480d3dc4e52bbd8e6147ea1e8cf6fad7554f/onyx_devtools-0.6.3-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:82e218f3a49f64910c2c4c34d5dc12d1ea1520a27e0b0f6e4c0949ff9abaf0e1", size = 3945396, upload-time = "2026-03-05T20:39:34.323Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/eb/f54b3675c464df8a51194ff75afc97c2417659e3a209dc46948b47c28860/onyx_devtools-0.6.3-py3-none-win_amd64.whl", hash = "sha256:8af614ae7229290ef2417cb85270184a1e826ed9a3a34658da93851edb36df57", size = 4045936, upload-time = "2026-03-05T20:39:28.375Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/b8/5bee38e748f3d4b8ec935766224db1bbc1214c91092e5822c080fccd9130/onyx_devtools-0.6.3-py3-none-win_arm64.whl", hash = "sha256:717589db4b42528d33ae96f8006ee6aad3555034dcfee724705b6576be6a6ec4", size = 3608268, upload-time = "2026-03-05T20:39:28.731Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/9e/6957b11555da57d9e97092f4cd8ac09a86666264b0c9491838f4b27db5dc/onyx_devtools-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ad962a168d46ea11dcde9fa3b37e4f12ec520b4a4cb4d49d8732de110d46c4b6", size = 3998057, upload-time = "2026-03-12T03:09:11.585Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/90/c72f3d06ba677012d77c77de36195b6a32a15c755c79ba0282be74e3c366/onyx_devtools-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e46d252e2b048ff053b03519c3a875998780738d7c334eaa1c9a32ff445e3e1a", size = 3687753, upload-time = "2026-03-12T03:09:11.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/42/4e9fe36eccf9f76d67ba8f4ff6539196a09cd60351fb63f5865e1544cbfa/onyx_devtools-0.7.0-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:f280bc9320e1cc310e7d753a371009bfaab02cc0e0cfd78559663b15655b5a50", size = 3560144, upload-time = "2026-03-12T03:12:24.02Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/76/40/36dc12d99760b358c7f39b27361cb18fa9681ffe194107f982d0e1a74016/onyx_devtools-0.7.0-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:e31df751c7540ae7e70a7fe8e1153c79c31c2254af6aa4c72c0dd54fa381d2ab", size = 3964387, upload-time = "2026-03-12T03:09:11.356Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/18/74744230c3820a5a7687335507ca5f1dbebab2c5325805041c1cd5703e6a/onyx_devtools-0.7.0-py3-none-win_amd64.whl", hash = "sha256:541bfd347c2d5b11e7f63ab5001d2594df91d215ad9d07b1562f5e715700f7e6", size = 4068030, upload-time = "2026-03-12T03:09:12.98Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/78/1320436607d3ffcb321ba7b064556c020ea15843a7e7d903fbb7529a71f5/onyx_devtools-0.7.0-py3-none-win_arm64.whl", hash = "sha256:83016330a9d39712431916cc25b2fb2cfcaa0112a55cc4f919d545da3a8974f9", size = 3626409, upload-time = "2026-03-12T03:09:10.222Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7248,21 +7232,19 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "tornado"
|
||||
version = "6.5.2"
|
||||
version = "6.5.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/09/ce/1eb500eae19f4648281bb2186927bb062d2438c2e5093d1360391afd2f90/tornado-6.5.2.tar.gz", hash = "sha256:ab53c8f9a0fa351e2c0741284e06c7a45da86afb544133201c5cc8578eb076a0", size = 510821, upload-time = "2025-08-08T18:27:00.78Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/48/6a7529df2c9cc12efd2e8f5dd219516184d703b34c06786809670df5b3bd/tornado-6.5.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2436822940d37cde62771cff8774f4f00b3c8024fe482e16ca8387b8a2724db6", size = 442563, upload-time = "2025-08-08T18:26:42.945Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/b5/9b575a0ed3e50b00c40b08cbce82eb618229091d09f6d14bce80fc01cb0b/tornado-6.5.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:583a52c7aa94ee046854ba81d9ebb6c81ec0fd30386d96f7640c96dad45a03ef", size = 440729, upload-time = "2025-08-08T18:26:44.473Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/4e/619174f52b120efcf23633c817fd3fed867c30bff785e2cd5a53a70e483c/tornado-6.5.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0fe179f28d597deab2842b86ed4060deec7388f1fd9c1b4a41adf8af058907e", size = 444295, upload-time = "2025-08-08T18:26:46.021Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/fa/87b41709552bbd393c85dd18e4e3499dcd8983f66e7972926db8d96aa065/tornado-6.5.2-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b186e85d1e3536d69583d2298423744740986018e393d0321df7340e71898882", size = 443644, upload-time = "2025-08-08T18:26:47.625Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/41/fb15f06e33d7430ca89420283a8762a4e6b8025b800ea51796ab5e6d9559/tornado-6.5.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e792706668c87709709c18b353da1f7662317b563ff69f00bab83595940c7108", size = 443878, upload-time = "2025-08-08T18:26:50.599Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/11/92/fe6d57da897776ad2e01e279170ea8ae726755b045fe5ac73b75357a5a3f/tornado-6.5.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:06ceb1300fd70cb20e43b1ad8aaee0266e69e7ced38fa910ad2e03285009ce7c", size = 444549, upload-time = "2025-08-08T18:26:51.864Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/02/c8f4f6c9204526daf3d760f4aa555a7a33ad0e60843eac025ccfd6ff4a93/tornado-6.5.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:74db443e0f5251be86cbf37929f84d8c20c27a355dd452a5cfa2aada0d001ec4", size = 443973, upload-time = "2025-08-08T18:26:53.625Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/2d/f5f5707b655ce2317190183868cd0f6822a1121b4baeae509ceb9590d0bd/tornado-6.5.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b5e735ab2889d7ed33b32a459cac490eda71a1ba6857b0118de476ab6c366c04", size = 443954, upload-time = "2025-08-08T18:26:55.072Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e8/59/593bd0f40f7355806bf6573b47b8c22f8e1374c9b6fd03114bd6b7a3dcfd/tornado-6.5.2-cp39-abi3-win32.whl", hash = "sha256:c6f29e94d9b37a95013bb669616352ddb82e3bfe8326fccee50583caebc8a5f0", size = 445023, upload-time = "2025-08-08T18:26:56.677Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c7/2a/f609b420c2f564a748a2d80ebfb2ee02a73ca80223af712fca591386cafb/tornado-6.5.2-cp39-abi3-win_amd64.whl", hash = "sha256:e56a5af51cc30dd2cae649429af65ca2f6571da29504a07995175df14c18f35f", size = 445427, upload-time = "2025-08-08T18:26:57.91Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/4f/e1f65e8f8c76d73658b33d33b81eed4322fb5085350e4328d5c956f0c8f9/tornado-6.5.2-cp39-abi3-win_arm64.whl", hash = "sha256:d6c33dc3672e3a1f3618eb63b7ef4683a7688e7b9e6e8f0d9aa5726360a004af", size = 444456, upload-time = "2025-08-08T18:26:59.207Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -143,6 +143,7 @@ module.exports = {
|
||||
"**/src/app/**/utils/*.test.ts",
|
||||
"**/src/app/**/hooks/*.test.ts", // Pure packet processor tests
|
||||
"**/src/refresh-components/**/*.test.ts",
|
||||
"**/src/refresh-pages/**/*.test.ts",
|
||||
"**/src/sections/**/*.test.ts",
|
||||
"**/src/components/**/*.test.ts",
|
||||
// Add more patterns here as you add more unit tests
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
import type { IconProps } from "@opal/types";
|
||||
const SvgAudio = ({ size, ...props }: IconProps) => (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 16 16"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
stroke="currentColor"
|
||||
{...props}
|
||||
>
|
||||
<path
|
||||
d="M2 10V6M5 14V2M11 11V5M14 9V7M8 10V6"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
);
|
||||
export default SvgAudio;
|
||||
@@ -17,7 +17,6 @@ export { default as SvgArrowUpDown } from "@opal/icons/arrow-up-down";
|
||||
export { default as SvgArrowUpDot } from "@opal/icons/arrow-up-dot";
|
||||
export { default as SvgArrowUpRight } from "@opal/icons/arrow-up-right";
|
||||
export { default as SvgArrowWallRight } from "@opal/icons/arrow-wall-right";
|
||||
export { default as SvgAudio } from "@opal/icons/audio";
|
||||
export { default as SvgAudioEqSmall } from "@opal/icons/audio-eq-small";
|
||||
export { default as SvgAws } from "@opal/icons/aws";
|
||||
export { default as SvgAzure } from "@opal/icons/azure";
|
||||
@@ -107,8 +106,6 @@ export { default as SvgLogOut } from "@opal/icons/log-out";
|
||||
export { default as SvgMaximize2 } from "@opal/icons/maximize-2";
|
||||
export { default as SvgMcp } from "@opal/icons/mcp";
|
||||
export { default as SvgMenu } from "@opal/icons/menu";
|
||||
export { default as SvgMicrophone } from "@opal/icons/microphone";
|
||||
export { default as SvgMicrophoneOff } from "@opal/icons/microphone-off";
|
||||
export { default as SvgMinus } from "@opal/icons/minus";
|
||||
export { default as SvgMinusCircle } from "@opal/icons/minus-circle";
|
||||
export { default as SvgMoon } from "@opal/icons/moon";
|
||||
@@ -179,8 +176,6 @@ export { default as SvgUserManage } from "@opal/icons/user-manage";
|
||||
export { default as SvgUserPlus } from "@opal/icons/user-plus";
|
||||
export { default as SvgUserSync } from "@opal/icons/user-sync";
|
||||
export { default as SvgUsers } from "@opal/icons/users";
|
||||
export { default as SvgVolume } from "@opal/icons/volume";
|
||||
export { default as SvgVolumeOff } from "@opal/icons/volume-off";
|
||||
export { default as SvgWallet } from "@opal/icons/wallet";
|
||||
export { default as SvgWorkflow } from "@opal/icons/workflow";
|
||||
export { default as SvgX } from "@opal/icons/x";
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
import type { IconProps } from "@opal/types";
|
||||
|
||||
const SvgMicrophoneOff = ({ size, ...props }: IconProps) => (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 16 16"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
stroke="currentColor"
|
||||
{...props}
|
||||
>
|
||||
{/* Microphone body */}
|
||||
<path
|
||||
d="M12.5 7V7.5C12.5 9.98528 10.4853 12 8 12M3.5 7V7.5C3.5 9.98528 5.51472 12 8 12M8 12V14.5M8 14.5H5M8 14.5H11M8 9.5C6.89543 9.5 6 8.60457 6 7.5V3.5C6 2.39543 6.89543 1.5 8 1.5C9.10457 1.5 10 2.39543 10 3.5V7.5C10 8.60457 9.10457 9.5 8 9.5Z"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
{/* Diagonal slash */}
|
||||
<path
|
||||
d="M2 2L14 14"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
);
|
||||
export default SvgMicrophoneOff;
|
||||
@@ -1,21 +0,0 @@
|
||||
import type { IconProps } from "@opal/types";
|
||||
|
||||
const SvgMicrophone = ({ size, ...props }: IconProps) => (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 16 16"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
stroke="currentColor"
|
||||
{...props}
|
||||
>
|
||||
<path
|
||||
d="M12.5 7V7.5C12.5 9.98528 10.4853 12 8 12M3.5 7V7.5C3.5 9.98528 5.51472 12 8 12M8 12V14.5M8 14.5H5M8 14.5H11M8 9.5C6.89543 9.5 6 8.60457 6 7.5V3.5C6 2.39543 6.89543 1.5 8 1.5C9.10457 1.5 10 2.39543 10 3.5V7.5C10 8.60457 9.10457 9.5 8 9.5Z"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
);
|
||||
export default SvgMicrophone;
|
||||
@@ -1,26 +0,0 @@
|
||||
import type { IconProps } from "@opal/types";
|
||||
const SvgVolumeOff = ({ size, ...props }: IconProps) => (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 16 16"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
stroke="currentColor"
|
||||
{...props}
|
||||
>
|
||||
<path
|
||||
d="M2 6V10H5L9 13V3L5 6H2Z"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
<path
|
||||
d="M14 6L11 9M11 6L14 9"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
);
|
||||
export default SvgVolumeOff;
|
||||
@@ -1,26 +0,0 @@
|
||||
import type { IconProps } from "@opal/types";
|
||||
const SvgVolume = ({ size, ...props }: IconProps) => (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 16 16"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
stroke="currentColor"
|
||||
{...props}
|
||||
>
|
||||
<path
|
||||
d="M2 6V10H5L9 13V3L5 6H2Z"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
<path
|
||||
d="M11.5 5.5C12.3 6.3 12.8 7.4 12.8 8.5C12.8 9.6 12.3 10.7 11.5 11.5"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
);
|
||||
export default SvgVolume;
|
||||
@@ -59,7 +59,7 @@ const nextConfig = {
|
||||
{
|
||||
key: "Permissions-Policy",
|
||||
value:
|
||||
"accelerometer=(), ambient-light-sensor=(), autoplay=(), battery=(), camera=(), cross-origin-isolated=(), display-capture=(), document-domain=(), encrypted-media=(), execution-while-not-rendered=(), execution-while-out-of-viewport=(), fullscreen=(), geolocation=(), gyroscope=(), keyboard-map=(), magnetometer=(), microphone=(self), midi=(), navigation-override=(), payment=(), picture-in-picture=(), publickey-credentials-get=(), screen-wake-lock=(), sync-xhr=(), usb=(), web-share=(), xr-spatial-tracking=()",
|
||||
"accelerometer=(), ambient-light-sensor=(), autoplay=(), battery=(), camera=(), cross-origin-isolated=(), display-capture=(), document-domain=(), encrypted-media=(), execution-while-not-rendered=(), execution-while-out-of-viewport=(), fullscreen=(), geolocation=(), gyroscope=(), keyboard-map=(), magnetometer=(), microphone=(), midi=(), navigation-override=(), payment=(), picture-in-picture=(), publickey-credentials-get=(), screen-wake-lock=(), sync-xhr=(), usb=(), web-share=(), xr-spatial-tracking=()",
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M10.5 2H13V14H10.5V2Z" fill="currentColor"/>
|
||||
<path d="M3 2H5.5V14H3V2Z" fill="currentColor"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 206 B |
@@ -1,4 +0,0 @@
|
||||
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M10.5 2H13V14H10.5V2Z" fill="white"/>
|
||||
<path d="M3 2H5.5V14H3V2Z" fill="white"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 192 B |
@@ -1,507 +0,0 @@
|
||||
"use client";
|
||||
|
||||
import Image from "next/image";
|
||||
import { FunctionComponent, useState, useEffect } from "react";
|
||||
import {
|
||||
AzureIcon,
|
||||
ElevenLabsIcon,
|
||||
OpenAIIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import Modal from "@/refresh-components/Modal";
|
||||
import Button from "@/refresh-components/buttons/Button";
|
||||
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
|
||||
import InputSelect from "@/refresh-components/inputs/InputSelect";
|
||||
import InputComboBox from "@/refresh-components/inputs/InputComboBox";
|
||||
import { Vertical, Horizontal } from "@/layouts/input-layouts";
|
||||
import { toast } from "@/hooks/useToast";
|
||||
import { Section } from "@/layouts/general-layouts";
|
||||
import { SvgArrowExchange, SvgOnyxLogo } from "@opal/icons";
|
||||
import type { IconProps } from "@opal/types";
|
||||
import { VoiceProviderView } from "@/hooks/useVoiceProviders";
|
||||
import {
|
||||
testVoiceProvider,
|
||||
upsertVoiceProvider,
|
||||
fetchVoicesByType,
|
||||
fetchLLMProviders,
|
||||
} from "@/lib/admin/voice/svc";
|
||||
|
||||
interface VoiceOption {
|
||||
value: string;
|
||||
label: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
interface LLMProviderView {
|
||||
id: number;
|
||||
name: string;
|
||||
provider: string;
|
||||
api_key: string | null;
|
||||
}
|
||||
|
||||
interface ApiKeyOption {
|
||||
value: string;
|
||||
label: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
interface VoiceProviderSetupModalProps {
|
||||
providerType: string;
|
||||
existingProvider: VoiceProviderView | null;
|
||||
mode: "stt" | "tts";
|
||||
defaultModelId?: string | null;
|
||||
onClose: () => void;
|
||||
onSuccess: () => void;
|
||||
}
|
||||
|
||||
const PROVIDER_LABELS: Record<string, string> = {
|
||||
openai: "OpenAI",
|
||||
azure: "Azure Speech Services",
|
||||
elevenlabs: "ElevenLabs",
|
||||
};
|
||||
|
||||
const PROVIDER_API_KEY_URLS: Record<string, string> = {
|
||||
openai: "https://platform.openai.com/api-keys",
|
||||
azure: "https://portal.azure.com/",
|
||||
elevenlabs: "https://elevenlabs.io/app/settings/api-keys",
|
||||
};
|
||||
|
||||
const PROVIDER_LOGO_URLS: Record<string, string> = {
|
||||
openai: "/Openai.svg",
|
||||
azure: "/Azure.png",
|
||||
elevenlabs: "/ElevenLabs.svg",
|
||||
};
|
||||
|
||||
const PROVIDER_DOCS_URLS: Record<string, string> = {
|
||||
openai: "https://platform.openai.com/docs/guides/text-to-speech",
|
||||
azure: "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/",
|
||||
elevenlabs: "https://elevenlabs.io/docs",
|
||||
};
|
||||
|
||||
const PROVIDER_VOICE_DOCS_URLS: Record<string, { url: string; label: string }> =
|
||||
{
|
||||
openai: {
|
||||
url: "https://platform.openai.com/docs/guides/text-to-speech#voice-options",
|
||||
label: "OpenAI",
|
||||
},
|
||||
azure: {
|
||||
url: "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts",
|
||||
label: "Azure",
|
||||
},
|
||||
elevenlabs: {
|
||||
url: "https://elevenlabs.io/docs/voices/premade-voices",
|
||||
label: "ElevenLabs",
|
||||
},
|
||||
};
|
||||
|
||||
const OPENAI_STT_MODELS = [{ id: "whisper-1", name: "Whisper v1" }];
|
||||
|
||||
const OPENAI_TTS_MODELS = [
|
||||
{ id: "tts-1", name: "TTS-1" },
|
||||
{ id: "tts-1-hd", name: "TTS-1 HD" },
|
||||
];
|
||||
|
||||
// Map model IDs from cards to actual API model IDs
|
||||
const MODEL_ID_MAP: Record<string, string> = {
|
||||
"tts-1": "tts-1",
|
||||
"tts-1-hd": "tts-1-hd",
|
||||
whisper: "whisper-1",
|
||||
};
|
||||
|
||||
export default function VoiceProviderSetupModal({
|
||||
providerType,
|
||||
existingProvider,
|
||||
mode,
|
||||
defaultModelId,
|
||||
onClose,
|
||||
onSuccess,
|
||||
}: VoiceProviderSetupModalProps) {
|
||||
// Map the card model ID to the actual API model ID
|
||||
// Prioritize defaultModelId (from the clicked card) over stored value
|
||||
const initialTtsModel = defaultModelId
|
||||
? MODEL_ID_MAP[defaultModelId] ?? "tts-1"
|
||||
: existingProvider?.tts_model ?? "tts-1";
|
||||
|
||||
const [apiKey, setApiKey] = useState("");
|
||||
const [apiKeyChanged, setApiKeyChanged] = useState(false);
|
||||
const [targetUri, setTargetUri] = useState(
|
||||
existingProvider?.target_uri ?? ""
|
||||
);
|
||||
const [selectedLlmProviderId, setSelectedLlmProviderId] = useState<
|
||||
number | null
|
||||
>(null);
|
||||
const [sttModel, setSttModel] = useState(
|
||||
existingProvider?.stt_model ?? "whisper-1"
|
||||
);
|
||||
const [ttsModel, setTtsModel] = useState(initialTtsModel);
|
||||
const [defaultVoice, setDefaultVoice] = useState(
|
||||
existingProvider?.default_voice ?? ""
|
||||
);
|
||||
const [isSubmitting, setIsSubmitting] = useState(false);
|
||||
|
||||
// Dynamic voices fetched from backend
|
||||
const [voiceOptions, setVoiceOptions] = useState<VoiceOption[]>([]);
|
||||
const [isLoadingVoices, setIsLoadingVoices] = useState(false);
|
||||
|
||||
// Existing OpenAI LLM providers for API key reuse
|
||||
const [existingApiKeyOptions, setExistingApiKeyOptions] = useState<
|
||||
ApiKeyOption[]
|
||||
>([]);
|
||||
const [llmProviderMap, setLlmProviderMap] = useState<Map<string, number>>(
|
||||
new Map()
|
||||
);
|
||||
|
||||
// Fetch existing OpenAI LLM providers (for API key reuse)
|
||||
useEffect(() => {
|
||||
if (providerType !== "openai") return;
|
||||
|
||||
fetchLLMProviders()
|
||||
.then((res) => res.json())
|
||||
.then((data: LLMProviderView[]) => {
|
||||
const openaiProviders = data.filter(
|
||||
(p) => p.provider === "openai" && p.api_key
|
||||
);
|
||||
const options: ApiKeyOption[] = openaiProviders.map((p) => ({
|
||||
value: p.api_key!,
|
||||
label: p.api_key!,
|
||||
description: `Used for LLM provider ${p.name}`,
|
||||
}));
|
||||
setExistingApiKeyOptions(options);
|
||||
|
||||
// Map masked API keys to provider IDs for lookup on selection
|
||||
const providerMap = new Map<string, number>();
|
||||
openaiProviders.forEach((p) => {
|
||||
if (p.api_key) {
|
||||
providerMap.set(p.api_key, p.id);
|
||||
}
|
||||
});
|
||||
setLlmProviderMap(providerMap);
|
||||
})
|
||||
.catch(() => {
|
||||
setExistingApiKeyOptions([]);
|
||||
});
|
||||
}, [providerType]);
|
||||
|
||||
// Fetch voices on mount (works without API key for ElevenLabs/OpenAI)
|
||||
useEffect(() => {
|
||||
setIsLoadingVoices(true);
|
||||
fetchVoicesByType(providerType)
|
||||
.then((res) => res.json())
|
||||
.then((data: Array<{ id: string; name: string }>) => {
|
||||
const options = data.map((v) => ({
|
||||
value: v.id,
|
||||
label: v.name,
|
||||
description: v.id,
|
||||
}));
|
||||
setVoiceOptions(options);
|
||||
// Set default voice to first option if not already set,
|
||||
// or if current value doesn't exist in the new options
|
||||
setDefaultVoice((prev) => {
|
||||
if (!prev) return options[0]?.value ?? "";
|
||||
const existsInOptions = options.some((opt) => opt.value === prev);
|
||||
return existsInOptions ? prev : options[0]?.value ?? "";
|
||||
});
|
||||
})
|
||||
.catch(() => {
|
||||
setVoiceOptions([]);
|
||||
})
|
||||
.finally(() => {
|
||||
setIsLoadingVoices(false);
|
||||
});
|
||||
}, [providerType]);
|
||||
|
||||
const isEditing = !!existingProvider;
|
||||
const label = PROVIDER_LABELS[providerType] ?? providerType;
|
||||
|
||||
// Logo arrangement component for the modal header
|
||||
// No useMemo needed - providerType and label are stable props
|
||||
const LogoArrangement: FunctionComponent<IconProps> = () => (
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="flex items-center justify-center size-7 shrink-0 overflow-clip">
|
||||
{providerType === "openai" ? (
|
||||
<OpenAIIcon size={24} />
|
||||
) : providerType === "azure" ? (
|
||||
<AzureIcon size={24} />
|
||||
) : providerType === "elevenlabs" ? (
|
||||
<ElevenLabsIcon size={24} />
|
||||
) : (
|
||||
<Image
|
||||
src={PROVIDER_LOGO_URLS[providerType] ?? "/Openai.svg"}
|
||||
alt={`${label} logo`}
|
||||
width={24}
|
||||
height={24}
|
||||
className="object-contain"
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center justify-center size-4 shrink-0">
|
||||
<SvgArrowExchange className="size-3 text-text-04" />
|
||||
</div>
|
||||
<div className="flex items-center justify-center size-7 p-0.5 shrink-0 overflow-clip">
|
||||
<SvgOnyxLogo size={24} className="text-text-04 shrink-0" />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
const handleSubmit = async () => {
|
||||
// API key required for new providers, or when explicitly changed during edit
|
||||
if (!selectedLlmProviderId) {
|
||||
if (!isEditing && !apiKey) {
|
||||
toast.error("API key is required");
|
||||
return;
|
||||
}
|
||||
if (isEditing && apiKeyChanged && !apiKey) {
|
||||
toast.error(
|
||||
"API key cannot be empty. Leave blank to keep existing key."
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (providerType === "azure" && !isEditing && !targetUri) {
|
||||
toast.error("Target URI is required");
|
||||
return;
|
||||
}
|
||||
|
||||
setIsSubmitting(true);
|
||||
try {
|
||||
// Test the connection first (skip if reusing LLM provider key - it's already validated)
|
||||
if (!selectedLlmProviderId) {
|
||||
const testResponse = await testVoiceProvider({
|
||||
provider_type: providerType,
|
||||
api_key: apiKeyChanged ? apiKey : undefined,
|
||||
target_uri: targetUri || undefined,
|
||||
use_stored_key: isEditing && !apiKeyChanged,
|
||||
});
|
||||
|
||||
if (!testResponse.ok) {
|
||||
const data = await testResponse.json();
|
||||
toast.error(data.detail || "Connection test failed");
|
||||
setIsSubmitting(false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Save the provider
|
||||
const response = await upsertVoiceProvider({
|
||||
id: existingProvider?.id,
|
||||
name: label,
|
||||
provider_type: providerType,
|
||||
api_key: selectedLlmProviderId
|
||||
? undefined
|
||||
: apiKeyChanged
|
||||
? apiKey
|
||||
: undefined,
|
||||
api_key_changed: selectedLlmProviderId ? false : apiKeyChanged,
|
||||
target_uri: targetUri || undefined,
|
||||
llm_provider_id: selectedLlmProviderId,
|
||||
stt_model: sttModel,
|
||||
tts_model: ttsModel,
|
||||
default_voice: defaultVoice,
|
||||
activate_stt: mode === "stt",
|
||||
activate_tts: mode === "tts",
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
toast.success(isEditing ? "Provider updated" : "Provider connected");
|
||||
onSuccess();
|
||||
} else {
|
||||
const data = await response.json();
|
||||
toast.error(data.detail || "Failed to save provider");
|
||||
}
|
||||
} catch {
|
||||
toast.error("Failed to save provider");
|
||||
} finally {
|
||||
setIsSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Modal open onOpenChange={(isOpen) => !isOpen && onClose()}>
|
||||
<Modal.Content width="sm">
|
||||
<Modal.Header
|
||||
icon={LogoArrangement}
|
||||
title={isEditing ? `Edit ${label}` : `Set up ${label}`}
|
||||
description={`Connect to ${label} and set up your voice models.`}
|
||||
onClose={onClose}
|
||||
/>
|
||||
<Modal.Body>
|
||||
<Section gap={1} alignItems="stretch">
|
||||
<Vertical
|
||||
title="API Key"
|
||||
subDescription={
|
||||
isEditing ? (
|
||||
"Leave blank to keep existing key"
|
||||
) : (
|
||||
<>
|
||||
Paste your{" "}
|
||||
<a
|
||||
href={PROVIDER_API_KEY_URLS[providerType]}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="underline"
|
||||
>
|
||||
API key
|
||||
</a>{" "}
|
||||
from {label} to access your models.
|
||||
</>
|
||||
)
|
||||
}
|
||||
nonInteractive
|
||||
>
|
||||
{providerType === "openai" && existingApiKeyOptions.length > 0 ? (
|
||||
<InputComboBox
|
||||
placeholder={isEditing ? "••••••••" : "Enter API key"}
|
||||
value={apiKey}
|
||||
onChange={(e) => {
|
||||
setApiKey(e.target.value);
|
||||
setApiKeyChanged(true);
|
||||
setSelectedLlmProviderId(null);
|
||||
}}
|
||||
onValueChange={(value) => {
|
||||
setApiKey(value);
|
||||
// Check if this is an existing key
|
||||
const llmProviderId = llmProviderMap.get(value);
|
||||
if (llmProviderId) {
|
||||
setSelectedLlmProviderId(llmProviderId);
|
||||
setApiKeyChanged(false);
|
||||
} else {
|
||||
setSelectedLlmProviderId(null);
|
||||
setApiKeyChanged(true);
|
||||
}
|
||||
}}
|
||||
options={existingApiKeyOptions}
|
||||
separatorLabel="Reuse OpenAI API Keys"
|
||||
strict={false}
|
||||
showAddPrefix
|
||||
/>
|
||||
) : (
|
||||
<InputTypeIn
|
||||
type="password"
|
||||
placeholder={isEditing ? "••••••••" : "Enter API key"}
|
||||
value={apiKey}
|
||||
onChange={(e) => {
|
||||
setApiKey(e.target.value);
|
||||
setApiKeyChanged(true);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</Vertical>
|
||||
|
||||
{providerType === "azure" && (
|
||||
<Vertical
|
||||
title="Target URI"
|
||||
subDescription={
|
||||
<>
|
||||
Paste the endpoint shown in{" "}
|
||||
<a
|
||||
href="https://portal.azure.com/"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="underline"
|
||||
>
|
||||
Azure Portal (Keys and Endpoint)
|
||||
</a>
|
||||
. Onyx extracts the speech region from this URL. Examples:
|
||||
https://westus.api.cognitive.microsoft.com/ or
|
||||
https://westus.tts.speech.microsoft.com/.
|
||||
</>
|
||||
}
|
||||
nonInteractive
|
||||
>
|
||||
<InputTypeIn
|
||||
placeholder={
|
||||
isEditing
|
||||
? "Leave blank to keep existing"
|
||||
: "https://<region>.api.cognitive.microsoft.com/"
|
||||
}
|
||||
value={targetUri}
|
||||
onChange={(e) => setTargetUri(e.target.value)}
|
||||
/>
|
||||
</Vertical>
|
||||
)}
|
||||
|
||||
{providerType === "openai" && mode === "stt" && (
|
||||
<Horizontal title="STT Model" center nonInteractive>
|
||||
<InputSelect value={sttModel} onValueChange={setSttModel}>
|
||||
<InputSelect.Trigger />
|
||||
<InputSelect.Content>
|
||||
{OPENAI_STT_MODELS.map((model) => (
|
||||
<InputSelect.Item key={model.id} value={model.id}>
|
||||
{model.name}
|
||||
</InputSelect.Item>
|
||||
))}
|
||||
</InputSelect.Content>
|
||||
</InputSelect>
|
||||
</Horizontal>
|
||||
)}
|
||||
|
||||
{providerType === "openai" && mode === "tts" && (
|
||||
<Vertical
|
||||
title="Default Model"
|
||||
subDescription="This model will be used by Onyx by default for text-to-speech."
|
||||
nonInteractive
|
||||
>
|
||||
<InputSelect value={ttsModel} onValueChange={setTtsModel}>
|
||||
<InputSelect.Trigger />
|
||||
<InputSelect.Content>
|
||||
{OPENAI_TTS_MODELS.map((model) => (
|
||||
<InputSelect.Item key={model.id} value={model.id}>
|
||||
{model.name}
|
||||
</InputSelect.Item>
|
||||
))}
|
||||
</InputSelect.Content>
|
||||
</InputSelect>
|
||||
</Vertical>
|
||||
)}
|
||||
|
||||
{mode === "tts" && (
|
||||
<Vertical
|
||||
title="Voice"
|
||||
subDescription={
|
||||
<>
|
||||
This voice will be used for spoken responses. See full list
|
||||
of supported languages and voices at{" "}
|
||||
<a
|
||||
href={
|
||||
PROVIDER_VOICE_DOCS_URLS[providerType]?.url ??
|
||||
PROVIDER_DOCS_URLS[providerType]
|
||||
}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="underline"
|
||||
>
|
||||
{PROVIDER_VOICE_DOCS_URLS[providerType]?.label ?? label}
|
||||
</a>
|
||||
.
|
||||
</>
|
||||
}
|
||||
nonInteractive
|
||||
>
|
||||
<InputComboBox
|
||||
value={defaultVoice}
|
||||
onValueChange={setDefaultVoice}
|
||||
options={voiceOptions}
|
||||
placeholder={
|
||||
isLoadingVoices
|
||||
? "Loading voices..."
|
||||
: "Select a voice or enter voice ID"
|
||||
}
|
||||
disabled={isLoadingVoices}
|
||||
strict={false}
|
||||
/>
|
||||
</Vertical>
|
||||
)}
|
||||
</Section>
|
||||
</Modal.Body>
|
||||
<Modal.Footer>
|
||||
<Button secondary onClick={onClose}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button onClick={handleSubmit} disabled={isSubmitting}>
|
||||
{isSubmitting ? "Connecting..." : isEditing ? "Save" : "Connect"}
|
||||
</Button>
|
||||
</Modal.Footer>
|
||||
</Modal.Content>
|
||||
</Modal>
|
||||
);
|
||||
}
|
||||
@@ -1,630 +0,0 @@
|
||||
"use client";
|
||||
|
||||
import Image from "next/image";
|
||||
import { useMemo, useState } from "react";
|
||||
import { AdminPageTitle } from "@/components/admin/Title";
|
||||
import {
|
||||
AzureIcon,
|
||||
ElevenLabsIcon,
|
||||
InfoIcon,
|
||||
OpenAIIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
import Separator from "@/refresh-components/Separator";
|
||||
import { FetchError } from "@/lib/fetcher";
|
||||
import {
|
||||
useVoiceProviders,
|
||||
VoiceProviderView,
|
||||
} from "@/hooks/useVoiceProviders";
|
||||
import {
|
||||
activateVoiceProvider,
|
||||
deactivateVoiceProvider,
|
||||
} from "@/lib/admin/voice/svc";
|
||||
import { ThreeDotsLoader } from "@/components/Loading";
|
||||
import { Callout } from "@/components/ui/callout";
|
||||
import Button from "@/refresh-components/buttons/Button";
|
||||
import { Button as OpalButton } from "@opal/components";
|
||||
import { cn } from "@/lib/utils";
|
||||
import {
|
||||
SvgArrowExchange,
|
||||
SvgArrowRightCircle,
|
||||
SvgAudio,
|
||||
SvgCheckSquare,
|
||||
SvgEdit,
|
||||
SvgMicrophone,
|
||||
SvgX,
|
||||
} from "@opal/icons";
|
||||
import VoiceProviderSetupModal from "./VoiceProviderSetupModal";
|
||||
|
||||
interface ModelDetails {
|
||||
id: string;
|
||||
label: string;
|
||||
subtitle: string;
|
||||
logoSrc?: string;
|
||||
providerType: string;
|
||||
}
|
||||
|
||||
interface ProviderGroup {
|
||||
providerType: string;
|
||||
providerLabel: string;
|
||||
logoSrc?: string;
|
||||
models: ModelDetails[];
|
||||
}
|
||||
|
||||
// STT Models - individual cards
|
||||
const STT_MODELS: ModelDetails[] = [
|
||||
{
|
||||
id: "whisper",
|
||||
label: "Whisper",
|
||||
subtitle: "OpenAI's general purpose speech recognition model.",
|
||||
logoSrc: "/Openai.svg",
|
||||
providerType: "openai",
|
||||
},
|
||||
{
|
||||
id: "azure-speech-stt",
|
||||
label: "Azure Speech",
|
||||
subtitle: "Speech to text in Microsoft Foundry Tools.",
|
||||
logoSrc: "/Azure.png",
|
||||
providerType: "azure",
|
||||
},
|
||||
{
|
||||
id: "elevenlabs-stt",
|
||||
label: "ElevenAPI",
|
||||
subtitle: "ElevenLabs Speech to Text API.",
|
||||
logoSrc: "/ElevenLabs.svg",
|
||||
providerType: "elevenlabs",
|
||||
},
|
||||
];
|
||||
|
||||
// TTS Models - grouped by provider
|
||||
const TTS_PROVIDER_GROUPS: ProviderGroup[] = [
|
||||
{
|
||||
providerType: "openai",
|
||||
providerLabel: "OpenAI",
|
||||
logoSrc: "/Openai.svg",
|
||||
models: [
|
||||
{
|
||||
id: "tts-1",
|
||||
label: "TTS-1",
|
||||
subtitle: "OpenAI's text-to-speech model optimized for speed.",
|
||||
logoSrc: "/Openai.svg",
|
||||
providerType: "openai",
|
||||
},
|
||||
{
|
||||
id: "tts-1-hd",
|
||||
label: "TTS-1 HD",
|
||||
subtitle: "OpenAI's text-to-speech model optimized for quality.",
|
||||
logoSrc: "/Openai.svg",
|
||||
providerType: "openai",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
providerType: "azure",
|
||||
providerLabel: "Azure",
|
||||
logoSrc: "/Azure.png",
|
||||
models: [
|
||||
{
|
||||
id: "azure-speech-tts",
|
||||
label: "Azure Speech",
|
||||
subtitle: "Text to speech in Microsoft Foundry Tools.",
|
||||
logoSrc: "/Azure.png",
|
||||
providerType: "azure",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
providerType: "elevenlabs",
|
||||
providerLabel: "ElevenLabs",
|
||||
logoSrc: "/ElevenLabs.svg",
|
||||
models: [
|
||||
{
|
||||
id: "elevenlabs-tts",
|
||||
label: "ElevenAPI",
|
||||
subtitle: "ElevenLabs Text to Speech API.",
|
||||
logoSrc: "/ElevenLabs.svg",
|
||||
providerType: "elevenlabs",
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
interface HoverIconButtonProps extends React.ComponentProps<typeof Button> {
|
||||
isHovered: boolean;
|
||||
onMouseEnter: () => void;
|
||||
onMouseLeave: () => void;
|
||||
children: React.ReactNode;
|
||||
}
|
||||
|
||||
function HoverIconButton({
|
||||
isHovered,
|
||||
onMouseEnter,
|
||||
onMouseLeave,
|
||||
children,
|
||||
...buttonProps
|
||||
}: HoverIconButtonProps) {
|
||||
return (
|
||||
<div onMouseEnter={onMouseEnter} onMouseLeave={onMouseLeave}>
|
||||
<Button {...buttonProps} rightIcon={isHovered ? SvgX : SvgCheckSquare}>
|
||||
{children}
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
type ProviderMode = "stt" | "tts";
|
||||
|
||||
export default function VoiceConfigurationPage() {
|
||||
const [modalOpen, setModalOpen] = useState(false);
|
||||
const [selectedProvider, setSelectedProvider] = useState<string | null>(null);
|
||||
const [editingProvider, setEditingProvider] =
|
||||
useState<VoiceProviderView | null>(null);
|
||||
const [modalMode, setModalMode] = useState<ProviderMode>("stt");
|
||||
const [selectedModelId, setSelectedModelId] = useState<string | null>(null);
|
||||
const [sttActivationError, setSTTActivationError] = useState<string | null>(
|
||||
null
|
||||
);
|
||||
const [ttsActivationError, setTTSActivationError] = useState<string | null>(
|
||||
null
|
||||
);
|
||||
const [hoveredButtonKey, setHoveredButtonKey] = useState<string | null>(null);
|
||||
|
||||
const { providers, error, isLoading, refresh: mutate } = useVoiceProviders();
|
||||
|
||||
const handleConnect = (
|
||||
providerType: string,
|
||||
mode: ProviderMode,
|
||||
modelId?: string
|
||||
) => {
|
||||
setSelectedProvider(providerType);
|
||||
setEditingProvider(null);
|
||||
setModalMode(mode);
|
||||
setSelectedModelId(modelId ?? null);
|
||||
setModalOpen(true);
|
||||
setSTTActivationError(null);
|
||||
setTTSActivationError(null);
|
||||
};
|
||||
|
||||
const handleEdit = (
|
||||
provider: VoiceProviderView,
|
||||
mode: ProviderMode,
|
||||
modelId?: string
|
||||
) => {
|
||||
setSelectedProvider(provider.provider_type);
|
||||
setEditingProvider(provider);
|
||||
setModalMode(mode);
|
||||
setSelectedModelId(modelId ?? null);
|
||||
setModalOpen(true);
|
||||
};
|
||||
|
||||
const handleSetDefault = async (
|
||||
providerId: number,
|
||||
mode: ProviderMode,
|
||||
modelId?: string
|
||||
) => {
|
||||
const setError =
|
||||
mode === "stt" ? setSTTActivationError : setTTSActivationError;
|
||||
setError(null);
|
||||
try {
|
||||
const response = await activateVoiceProvider(providerId, mode, modelId);
|
||||
if (!response.ok) {
|
||||
const errorBody = await response.json().catch(() => ({}));
|
||||
throw new Error(
|
||||
typeof errorBody?.detail === "string"
|
||||
? errorBody.detail
|
||||
: `Failed to set provider as default ${mode.toUpperCase()}.`
|
||||
);
|
||||
}
|
||||
await mutate();
|
||||
} catch (err) {
|
||||
const message =
|
||||
err instanceof Error ? err.message : "Unexpected error occurred.";
|
||||
setError(message);
|
||||
}
|
||||
};
|
||||
|
||||
const handleDeactivate = async (providerId: number, mode: ProviderMode) => {
|
||||
const setError =
|
||||
mode === "stt" ? setSTTActivationError : setTTSActivationError;
|
||||
setError(null);
|
||||
try {
|
||||
const response = await deactivateVoiceProvider(providerId, mode);
|
||||
if (!response.ok) {
|
||||
const errorBody = await response.json().catch(() => ({}));
|
||||
throw new Error(
|
||||
typeof errorBody?.detail === "string"
|
||||
? errorBody.detail
|
||||
: `Failed to deactivate ${mode.toUpperCase()} provider.`
|
||||
);
|
||||
}
|
||||
await mutate();
|
||||
} catch (err) {
|
||||
const message =
|
||||
err instanceof Error ? err.message : "Unexpected error occurred.";
|
||||
setError(message);
|
||||
}
|
||||
};
|
||||
|
||||
const handleModalClose = () => {
|
||||
setModalOpen(false);
|
||||
setSelectedProvider(null);
|
||||
setEditingProvider(null);
|
||||
setSelectedModelId(null);
|
||||
};
|
||||
|
||||
const handleModalSuccess = () => {
|
||||
mutate();
|
||||
handleModalClose();
|
||||
};
|
||||
|
||||
const isProviderConfigured = (provider?: VoiceProviderView): boolean => {
|
||||
return !!provider?.has_api_key;
|
||||
};
|
||||
|
||||
// Map provider types to their configured provider data
|
||||
const providersByType = useMemo(() => {
|
||||
return new Map((providers ?? []).map((p) => [p.provider_type, p] as const));
|
||||
}, [providers]);
|
||||
|
||||
const hasActiveSTTProvider =
|
||||
providers?.some((p) => p.is_default_stt) ?? false;
|
||||
const hasActiveTTSProvider =
|
||||
providers?.some((p) => p.is_default_tts) ?? false;
|
||||
|
||||
const renderLogo = ({
|
||||
logoSrc,
|
||||
providerType,
|
||||
alt,
|
||||
size = 16,
|
||||
}: {
|
||||
logoSrc?: string;
|
||||
providerType: string;
|
||||
alt: string;
|
||||
size?: number;
|
||||
}) => {
|
||||
const containerSizeClass = size === 24 ? "size-7" : "size-5";
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center justify-center px-0.5 py-0 shrink-0 overflow-clip",
|
||||
containerSizeClass
|
||||
)}
|
||||
>
|
||||
{providerType === "openai" ? (
|
||||
<OpenAIIcon size={size} />
|
||||
) : providerType === "azure" ? (
|
||||
<AzureIcon size={size} />
|
||||
) : providerType === "elevenlabs" ? (
|
||||
<ElevenLabsIcon size={size} />
|
||||
) : logoSrc ? (
|
||||
<Image
|
||||
src={logoSrc}
|
||||
alt={alt}
|
||||
width={size}
|
||||
height={size}
|
||||
className="object-contain"
|
||||
/>
|
||||
) : (
|
||||
<SvgMicrophone size={size} className="text-text-02" />
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const renderModelCard = ({
|
||||
model,
|
||||
mode,
|
||||
}: {
|
||||
model: ModelDetails;
|
||||
mode: ProviderMode;
|
||||
}) => {
|
||||
const provider = providersByType.get(model.providerType);
|
||||
const isConfigured = isProviderConfigured(provider);
|
||||
// For TTS, also check that this specific model is the default (not just the provider)
|
||||
const isActive =
|
||||
mode === "stt"
|
||||
? provider?.is_default_stt
|
||||
: provider?.is_default_tts && provider?.tts_model === model.id;
|
||||
const isHighlighted = isActive ?? false;
|
||||
const providerId = provider?.id;
|
||||
|
||||
const buttonState = (() => {
|
||||
if (!provider || !isConfigured) {
|
||||
return {
|
||||
label: "Connect",
|
||||
disabled: false,
|
||||
icon: "arrow" as const,
|
||||
onClick: () => handleConnect(model.providerType, mode, model.id),
|
||||
};
|
||||
}
|
||||
|
||||
if (isActive) {
|
||||
return {
|
||||
label: "Current Default",
|
||||
disabled: false,
|
||||
icon: "check" as const,
|
||||
onClick: providerId
|
||||
? () => handleDeactivate(providerId, mode)
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
label: "Set as Default",
|
||||
disabled: false,
|
||||
icon: "arrow-circle" as const,
|
||||
onClick: providerId
|
||||
? () => handleSetDefault(providerId, mode, model.id)
|
||||
: undefined,
|
||||
};
|
||||
})();
|
||||
|
||||
const buttonKey = `${mode}-${model.id}`;
|
||||
const isButtonHovered = hoveredButtonKey === buttonKey;
|
||||
const isCardClickable =
|
||||
buttonState.icon === "arrow" &&
|
||||
typeof buttonState.onClick === "function" &&
|
||||
!buttonState.disabled;
|
||||
|
||||
const handleCardClick = () => {
|
||||
if (isCardClickable) {
|
||||
buttonState.onClick?.();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
key={`${mode}-${model.id}`}
|
||||
onClick={isCardClickable ? handleCardClick : undefined}
|
||||
className={cn(
|
||||
"flex items-start justify-between gap-4 rounded-16 border p-2 bg-background-neutral-01",
|
||||
isHighlighted ? "border-action-link-05" : "border-border-01",
|
||||
isCardClickable &&
|
||||
"cursor-pointer hover:bg-background-tint-01 transition-colors"
|
||||
)}
|
||||
>
|
||||
<div className="flex flex-1 items-start gap-2.5 p-2">
|
||||
{renderLogo({
|
||||
logoSrc: model.logoSrc,
|
||||
providerType: model.providerType,
|
||||
alt: `${model.label} logo`,
|
||||
size: 16,
|
||||
})}
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<Text as="p" mainUiAction text04>
|
||||
{model.label}
|
||||
</Text>
|
||||
<Text as="p" secondaryBody text03>
|
||||
{model.subtitle}
|
||||
</Text>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center justify-end gap-1.5 self-center">
|
||||
{isConfigured && (
|
||||
<OpalButton
|
||||
icon={SvgEdit}
|
||||
tooltip="Edit"
|
||||
prominence="tertiary"
|
||||
size="sm"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
if (provider) handleEdit(provider, mode, model.id);
|
||||
}}
|
||||
aria-label={`Edit ${model.label}`}
|
||||
/>
|
||||
)}
|
||||
{buttonState.icon === "check" ? (
|
||||
<HoverIconButton
|
||||
isHovered={isButtonHovered}
|
||||
onMouseEnter={() => setHoveredButtonKey(buttonKey)}
|
||||
onMouseLeave={() => setHoveredButtonKey(null)}
|
||||
action={true}
|
||||
tertiary
|
||||
disabled={buttonState.disabled}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
buttonState.onClick?.();
|
||||
}}
|
||||
>
|
||||
{buttonState.label}
|
||||
</HoverIconButton>
|
||||
) : (
|
||||
<Button
|
||||
action={false}
|
||||
tertiary
|
||||
disabled={buttonState.disabled || !buttonState.onClick}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
buttonState.onClick?.();
|
||||
}}
|
||||
rightIcon={
|
||||
buttonState.icon === "arrow"
|
||||
? SvgArrowExchange
|
||||
: buttonState.icon === "arrow-circle"
|
||||
? SvgArrowRightCircle
|
||||
: undefined
|
||||
}
|
||||
>
|
||||
{buttonState.label}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
if (error) {
|
||||
const message = error?.message || "Unable to load voice configuration.";
|
||||
const detail =
|
||||
error instanceof FetchError && typeof error.info?.detail === "string"
|
||||
? error.info.detail
|
||||
: undefined;
|
||||
|
||||
return (
|
||||
<>
|
||||
<AdminPageTitle
|
||||
title="Voice"
|
||||
icon={SvgMicrophone}
|
||||
includeDivider={false}
|
||||
/>
|
||||
<Callout type="danger" title="Failed to load voice settings">
|
||||
{message}
|
||||
{detail && (
|
||||
<Text as="p" className="mt-2 text-text-03" mainContentBody text03>
|
||||
{detail}
|
||||
</Text>
|
||||
)}
|
||||
</Callout>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<>
|
||||
<AdminPageTitle
|
||||
title="Voice"
|
||||
icon={SvgMicrophone}
|
||||
includeDivider={false}
|
||||
/>
|
||||
<div className="mt-8">
|
||||
<ThreeDotsLoader />
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<AdminPageTitle icon={SvgAudio} title="Voice" />
|
||||
<div className="pt-4 pb-4">
|
||||
<Text as="p" secondaryBody text03>
|
||||
Speech to text (STT) and text to speech (TTS) capabilities.
|
||||
</Text>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex w-full flex-col gap-8 pb-6">
|
||||
{/* Speech-to-Text Section */}
|
||||
<div className="flex w-full max-w-[960px] flex-col gap-3">
|
||||
<div className="flex flex-col">
|
||||
<Text as="p" mainContentEmphasis text04>
|
||||
Speech to Text
|
||||
</Text>
|
||||
<Text as="p" secondaryBody text03>
|
||||
Select a model to transcribe speech to text in chats.
|
||||
</Text>
|
||||
</div>
|
||||
|
||||
{sttActivationError && (
|
||||
<Callout type="danger" title="Unable to update STT provider">
|
||||
{sttActivationError}
|
||||
</Callout>
|
||||
)}
|
||||
|
||||
{!hasActiveSTTProvider && (
|
||||
<div
|
||||
className="flex items-start rounded-16 border p-2"
|
||||
style={{
|
||||
backgroundColor: "var(--status-info-00)",
|
||||
borderColor: "var(--status-info-02)",
|
||||
}}
|
||||
>
|
||||
<div className="flex items-start gap-1 p-2">
|
||||
<div
|
||||
className="flex size-5 items-center justify-center rounded-full p-0.5"
|
||||
style={{
|
||||
backgroundColor: "var(--status-info-01)",
|
||||
}}
|
||||
>
|
||||
<div style={{ color: "var(--status-text-info-05)" }}>
|
||||
<InfoIcon size={16} />
|
||||
</div>
|
||||
</div>
|
||||
<Text as="p" className="flex-1 px-0.5" mainUiBody text04>
|
||||
Connect a speech to text provider to use in chat.
|
||||
</Text>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex flex-col gap-2">
|
||||
{STT_MODELS.map((model) => renderModelCard({ model, mode: "stt" }))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Text-to-Speech Section */}
|
||||
<div className="flex w-full max-w-[960px] flex-col gap-3">
|
||||
<div className="flex flex-col">
|
||||
<Text as="p" mainContentEmphasis text04>
|
||||
Text to Speech
|
||||
</Text>
|
||||
<Text as="p" secondaryBody text03>
|
||||
Select a model to speak out chat responses.
|
||||
</Text>
|
||||
</div>
|
||||
|
||||
{ttsActivationError && (
|
||||
<Callout type="danger" title="Unable to update TTS provider">
|
||||
{ttsActivationError}
|
||||
</Callout>
|
||||
)}
|
||||
|
||||
{!hasActiveTTSProvider && (
|
||||
<div
|
||||
className="flex items-start rounded-16 border p-2"
|
||||
style={{
|
||||
backgroundColor: "var(--status-info-00)",
|
||||
borderColor: "var(--status-info-02)",
|
||||
}}
|
||||
>
|
||||
<div className="flex items-start gap-1 p-2">
|
||||
<div
|
||||
className="flex size-5 items-center justify-center rounded-full p-0.5"
|
||||
style={{
|
||||
backgroundColor: "var(--status-info-01)",
|
||||
}}
|
||||
>
|
||||
<div style={{ color: "var(--status-text-info-05)" }}>
|
||||
<InfoIcon size={16} />
|
||||
</div>
|
||||
</div>
|
||||
<Text as="p" className="flex-1 px-0.5" mainUiBody text04>
|
||||
Connect a text to speech provider to use in chat.
|
||||
</Text>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex flex-col gap-4">
|
||||
{TTS_PROVIDER_GROUPS.map((group) => (
|
||||
<div key={group.providerType} className="flex flex-col gap-2">
|
||||
<Text as="p" secondaryBody text03 className="px-0.5">
|
||||
{group.providerLabel}
|
||||
</Text>
|
||||
<div className="flex flex-col gap-2">
|
||||
{group.models.map((model) =>
|
||||
renderModelCard({ model, mode: "tts" })
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{modalOpen && selectedProvider && (
|
||||
<VoiceProviderSetupModal
|
||||
providerType={selectedProvider}
|
||||
existingProvider={editingProvider}
|
||||
mode={modalMode}
|
||||
defaultModelId={selectedModelId}
|
||||
onClose={handleModalClose}
|
||||
onSuccess={handleModalSuccess}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -3,7 +3,6 @@ import type { Route } from "next";
|
||||
import { unstable_noStore as noStore } from "next/cache";
|
||||
import { requireAuth } from "@/lib/auth/requireAuth";
|
||||
import { ProjectsProvider } from "@/providers/ProjectsContext";
|
||||
import { VoiceModeProvider } from "@/providers/VoiceModeProvider";
|
||||
import AppSidebar from "@/sections/sidebar/AppSidebar";
|
||||
|
||||
export interface LayoutProps {
|
||||
@@ -22,15 +21,10 @@ export default async function Layout({ children }: LayoutProps) {
|
||||
|
||||
return (
|
||||
<ProjectsProvider>
|
||||
{/* VoiceModeProvider wraps the full app layout so TTS playback state
|
||||
persists across page navigations (e.g., sidebar clicks during playback).
|
||||
It only activates WebSocket connections when TTS is actually triggered. */}
|
||||
<VoiceModeProvider>
|
||||
<div className="flex flex-row w-full h-full">
|
||||
<AppSidebar />
|
||||
{children}
|
||||
</div>
|
||||
</VoiceModeProvider>
|
||||
<div className="flex flex-row w-full h-full">
|
||||
<AppSidebar />
|
||||
{children}
|
||||
</div>
|
||||
</ProjectsProvider>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import React, {
|
||||
useRef,
|
||||
RefObject,
|
||||
useMemo,
|
||||
useEffect,
|
||||
useLayoutEffect,
|
||||
} from "react";
|
||||
import React, { useRef, RefObject, useMemo } from "react";
|
||||
import { Packet, StopReason } from "@/app/app/services/streamingModels";
|
||||
import CustomToolAuthCard from "@/app/app/message/messageComponents/CustomToolAuthCard";
|
||||
import { FullChatState } from "@/app/app/message/messageComponents/interfaces";
|
||||
@@ -22,9 +16,6 @@ import { LlmDescriptor, LlmManager } from "@/lib/hooks";
|
||||
import { Message } from "@/app/app/interfaces";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
import { AgentTimeline } from "@/app/app/message/messageComponents/timeline/AgentTimeline";
|
||||
import { useVoiceMode } from "@/providers/VoiceModeProvider";
|
||||
import { getTextContent } from "@/app/app/services/packetUtils";
|
||||
import { removeThinkingTokens } from "@/app/app/services/thinkingTokens";
|
||||
|
||||
// Type for the regeneration factory function passed from ChatUI
|
||||
export type RegenerationFactory = (regenerationRequest: {
|
||||
@@ -84,7 +75,6 @@ function arePropsEqual(
|
||||
|
||||
const AgentMessage = React.memo(function AgentMessage({
|
||||
rawPackets,
|
||||
packetCount,
|
||||
chatState,
|
||||
nodeId,
|
||||
messageId,
|
||||
@@ -172,59 +162,6 @@ const AgentMessage = React.memo(function AgentMessage({
|
||||
onMessageSelection,
|
||||
});
|
||||
|
||||
// Streaming TTS integration
|
||||
const { streamTTS, resetTTS, stopTTS } = useVoiceMode();
|
||||
const ttsCompletedRef = useRef(false);
|
||||
const streamTTSRef = useRef(streamTTS);
|
||||
|
||||
// Keep streamTTS ref in sync without triggering effect re-runs
|
||||
useEffect(() => {
|
||||
streamTTSRef.current = streamTTS;
|
||||
}, [streamTTS]);
|
||||
|
||||
// Stream TTS as text content arrives - only for messages still streaming
|
||||
// Uses ref for streamTTS to avoid re-triggering when its identity changes
|
||||
// Note: packetCount is used instead of rawPackets because the array is mutated in place
|
||||
useLayoutEffect(() => {
|
||||
// Skip if we've already finished TTS for this message
|
||||
if (ttsCompletedRef.current) return;
|
||||
|
||||
// If user cancelled generation, do not send more text to TTS.
|
||||
if (stopPacketSeen && stopReason === StopReason.USER_CANCELLED) {
|
||||
ttsCompletedRef.current = true;
|
||||
return;
|
||||
}
|
||||
|
||||
const textContent = removeThinkingTokens(getTextContent(rawPackets));
|
||||
if (typeof textContent === "string" && textContent.length > 0) {
|
||||
streamTTSRef.current(textContent, isComplete, nodeId);
|
||||
|
||||
// Mark as completed once the message is done streaming
|
||||
if (isComplete) {
|
||||
ttsCompletedRef.current = true;
|
||||
}
|
||||
}
|
||||
}, [packetCount, isComplete, rawPackets, nodeId, stopPacketSeen, stopReason]); // packetCount triggers on new packets since rawPackets is mutated in place
|
||||
|
||||
// Stop TTS immediately when user cancels generation.
|
||||
useEffect(() => {
|
||||
if (stopPacketSeen && stopReason === StopReason.USER_CANCELLED) {
|
||||
stopTTS({ manual: true });
|
||||
}
|
||||
}, [stopPacketSeen, stopReason, stopTTS]);
|
||||
|
||||
// Reset TTS completed flag when nodeId changes (new message)
|
||||
useEffect(() => {
|
||||
ttsCompletedRef.current = false;
|
||||
}, [nodeId]);
|
||||
|
||||
// Reset TTS when component unmounts or nodeId changes
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
resetTTS();
|
||||
};
|
||||
}, [nodeId, resetTTS]);
|
||||
|
||||
return (
|
||||
<div
|
||||
className="flex flex-col gap-3"
|
||||
@@ -271,8 +208,6 @@ const AgentMessage = React.memo(function AgentMessage({
|
||||
key={`${displayGroup.turn_index}-${displayGroup.tab_index}`}
|
||||
packets={displayGroup.packets}
|
||||
chatState={effectiveChatState}
|
||||
messageNodeId={nodeId}
|
||||
hasTimelineThinking={pacedTurnGroups.length > 0 || hasSteps}
|
||||
onComplete={() => {
|
||||
// Only mark complete on the last display group
|
||||
// Hook handles the finalAnswerComing check internally
|
||||
|
||||
@@ -29,9 +29,6 @@ import FeedbackModal, {
|
||||
FeedbackModalProps,
|
||||
} from "@/sections/modals/FeedbackModal";
|
||||
import { Button, SelectButton } from "@opal/components";
|
||||
import TTSButton from "./TTSButton";
|
||||
import { useVoiceMode } from "@/providers/VoiceModeProvider";
|
||||
import { useVoiceStatus } from "@/hooks/useVoiceStatus";
|
||||
|
||||
// Wrapper component for SourceTag in toolbar to handle memoization
|
||||
const SourcesTagWrapper = React.memo(function SourcesTagWrapper({
|
||||
@@ -147,14 +144,6 @@ export default function MessageToolbar({
|
||||
(state) => state.updateCurrentSelectedNodeForDocDisplay
|
||||
);
|
||||
|
||||
// Voice mode - hide toolbar during TTS playback for this message
|
||||
const { isTTSPlaying, activeMessageNodeId, isAwaitingAutoPlaybackStart } =
|
||||
useVoiceMode();
|
||||
const { ttsEnabled } = useVoiceStatus();
|
||||
const isTTSActiveForThisMessage =
|
||||
(isTTSPlaying || isAwaitingAutoPlaybackStart) &&
|
||||
activeMessageNodeId === nodeId;
|
||||
|
||||
// Feedback modal state and handlers
|
||||
const { handleFeedbackChange } = useFeedbackController();
|
||||
const modal = useCreateModal();
|
||||
@@ -215,11 +204,6 @@ export default function MessageToolbar({
|
||||
[messageId, currentFeedback, handleFeedbackChange, modal]
|
||||
);
|
||||
|
||||
// Hide toolbar while TTS is playing for this message
|
||||
if (isTTSActiveForThisMessage) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<modal.Provider>
|
||||
@@ -284,13 +268,6 @@ export default function MessageToolbar({
|
||||
}
|
||||
data-testid="AgentMessage/dislike-button"
|
||||
/>
|
||||
{ttsEnabled && (
|
||||
<TTSButton
|
||||
text={
|
||||
removeThinkingTokens(getTextContent(rawPackets)) as string
|
||||
}
|
||||
/>
|
||||
)}
|
||||
|
||||
{onRegenerate &&
|
||||
messageId !== undefined &&
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
"use client";
|
||||
|
||||
import { useCallback, useEffect } from "react";
|
||||
import { SvgPlayCircle, SvgStop } from "@opal/icons";
|
||||
import { Button } from "@opal/components";
|
||||
import { useVoicePlayback } from "@/hooks/useVoicePlayback";
|
||||
import { useVoiceMode } from "@/providers/VoiceModeProvider";
|
||||
import { toast } from "@/hooks/useToast";
|
||||
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
|
||||
|
||||
interface TTSButtonProps {
|
||||
text: string;
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
}
|
||||
|
||||
function TTSButton({ text, voice, speed }: TTSButtonProps) {
|
||||
const { isPlaying, isLoading, error, play, pause, stop } = useVoicePlayback();
|
||||
const { isTTSPlaying, isTTSLoading, isAwaitingAutoPlaybackStart, stopTTS } =
|
||||
useVoiceMode();
|
||||
|
||||
const isGlobalTTSActive =
|
||||
isTTSPlaying || isTTSLoading || isAwaitingAutoPlaybackStart;
|
||||
const isButtonPlaying = isGlobalTTSActive || isPlaying;
|
||||
const isButtonLoading = !isGlobalTTSActive && isLoading;
|
||||
|
||||
const handleClick = useCallback(async () => {
|
||||
if (isGlobalTTSActive) {
|
||||
// Stop auto-playback voice mode stream from the toolbar button.
|
||||
stopTTS({ manual: true });
|
||||
stop();
|
||||
} else if (isPlaying) {
|
||||
pause();
|
||||
} else if (isButtonLoading) {
|
||||
stop();
|
||||
} else {
|
||||
try {
|
||||
// Ensure no voice-mode stream is active before starting manual playback.
|
||||
stopTTS();
|
||||
await play(text, voice, speed);
|
||||
} catch (err) {
|
||||
console.error("TTS playback failed:", err);
|
||||
toast.error("Could not play audio");
|
||||
}
|
||||
}
|
||||
}, [
|
||||
isGlobalTTSActive,
|
||||
isPlaying,
|
||||
isButtonLoading,
|
||||
text,
|
||||
voice,
|
||||
speed,
|
||||
play,
|
||||
pause,
|
||||
stop,
|
||||
stopTTS,
|
||||
]);
|
||||
|
||||
// Surface streaming voice playback errors to the user via toast
|
||||
useEffect(() => {
|
||||
if (error) {
|
||||
console.error("Voice playback error:", error);
|
||||
toast.error(error);
|
||||
}
|
||||
}, [error]);
|
||||
|
||||
const icon = isButtonLoading
|
||||
? SimpleLoader
|
||||
: isButtonPlaying
|
||||
? SvgStop
|
||||
: SvgPlayCircle;
|
||||
|
||||
const tooltip = isButtonPlaying
|
||||
? "Stop playback"
|
||||
: isButtonLoading
|
||||
? "Loading..."
|
||||
: "Read aloud";
|
||||
|
||||
return (
|
||||
<Button
|
||||
icon={icon}
|
||||
onClick={handleClick}
|
||||
prominence="tertiary"
|
||||
tooltip={tooltip}
|
||||
data-testid="AgentMessage/tts-button"
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export default TTSButton;
|
||||
@@ -67,10 +67,6 @@ export type MessageRenderer<
|
||||
> = React.ComponentType<{
|
||||
packets: T[];
|
||||
state: S;
|
||||
/** Node id for the message currently being rendered */
|
||||
messageNodeId?: number;
|
||||
/** True when timeline/thinking UI is already shown above this text block */
|
||||
hasTimelineThinking?: boolean;
|
||||
onComplete: () => void;
|
||||
renderType: RenderType;
|
||||
animate: boolean;
|
||||
|
||||
@@ -166,8 +166,6 @@ function MixedContentHandler({
|
||||
chatPackets,
|
||||
imagePackets,
|
||||
chatState,
|
||||
messageNodeId,
|
||||
hasTimelineThinking,
|
||||
onComplete,
|
||||
animate,
|
||||
stopPacketSeen,
|
||||
@@ -177,8 +175,6 @@ function MixedContentHandler({
|
||||
chatPackets: Packet[];
|
||||
imagePackets: Packet[];
|
||||
chatState: FullChatState;
|
||||
messageNodeId?: number;
|
||||
hasTimelineThinking?: boolean;
|
||||
onComplete: () => void;
|
||||
animate: boolean;
|
||||
stopPacketSeen: boolean;
|
||||
@@ -189,8 +185,6 @@ function MixedContentHandler({
|
||||
<MessageTextRenderer
|
||||
packets={chatPackets as ChatPacket[]}
|
||||
state={chatState}
|
||||
messageNodeId={messageNodeId}
|
||||
hasTimelineThinking={hasTimelineThinking}
|
||||
onComplete={() => {}}
|
||||
animate={animate}
|
||||
renderType={RenderType.FULL}
|
||||
@@ -218,8 +212,6 @@ function MixedContentHandler({
|
||||
interface RendererComponentProps {
|
||||
packets: Packet[];
|
||||
chatState: FullChatState;
|
||||
messageNodeId?: number;
|
||||
hasTimelineThinking?: boolean;
|
||||
onComplete: () => void;
|
||||
animate: boolean;
|
||||
stopPacketSeen: boolean;
|
||||
@@ -237,8 +229,7 @@ function areRendererPropsEqual(
|
||||
prev.stopPacketSeen === next.stopPacketSeen &&
|
||||
prev.stopReason === next.stopReason &&
|
||||
prev.animate === next.animate &&
|
||||
prev.chatState.agent?.id === next.chatState.agent?.id &&
|
||||
prev.messageNodeId === next.messageNodeId
|
||||
prev.chatState.agent?.id === next.chatState.agent?.id
|
||||
// Skip: onComplete, children (function refs), chatState (memoized upstream)
|
||||
);
|
||||
}
|
||||
@@ -247,8 +238,6 @@ function areRendererPropsEqual(
|
||||
export const RendererComponent = memo(function RendererComponent({
|
||||
packets,
|
||||
chatState,
|
||||
messageNodeId,
|
||||
hasTimelineThinking,
|
||||
onComplete,
|
||||
animate,
|
||||
stopPacketSeen,
|
||||
@@ -283,8 +272,6 @@ export const RendererComponent = memo(function RendererComponent({
|
||||
chatPackets={chatPackets}
|
||||
imagePackets={imagePackets}
|
||||
chatState={chatState}
|
||||
messageNodeId={messageNodeId}
|
||||
hasTimelineThinking={hasTimelineThinking}
|
||||
onComplete={onComplete}
|
||||
animate={animate}
|
||||
stopPacketSeen={stopPacketSeen}
|
||||
@@ -305,8 +292,6 @@ export const RendererComponent = memo(function RendererComponent({
|
||||
<RendererFn
|
||||
packets={packets as any}
|
||||
state={chatState}
|
||||
messageNodeId={messageNodeId}
|
||||
hasTimelineThinking={hasTimelineThinking}
|
||||
onComplete={onComplete}
|
||||
animate={animate}
|
||||
renderType={RenderType.FULL}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useEffect, useMemo, useRef, useState } from "react";
|
||||
import React, { useEffect, useMemo, useState } from "react";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
|
||||
import {
|
||||
@@ -10,55 +10,6 @@ import { MessageRenderer, FullChatState } from "../interfaces";
|
||||
import { isFinalAnswerComplete } from "../../../services/packetUtils";
|
||||
import { useMarkdownRenderer } from "../markdownUtils";
|
||||
import { BlinkingBar } from "../../BlinkingBar";
|
||||
import { useVoiceMode } from "@/providers/VoiceModeProvider";
|
||||
|
||||
/**
|
||||
* Maps a cleaned character position to the corresponding position in markdown text.
|
||||
* This allows progressive reveal to work with markdown formatting.
|
||||
*/
|
||||
function getRevealPosition(markdown: string, cleanChars: number): number {
|
||||
// Skip patterns that don't contribute to visible character count
|
||||
const skipChars = new Set(["*", "`", "#"]);
|
||||
let cleanIndex = 0;
|
||||
let mdIndex = 0;
|
||||
|
||||
while (cleanIndex < cleanChars && mdIndex < markdown.length) {
|
||||
const char = markdown[mdIndex];
|
||||
|
||||
// Skip markdown formatting characters
|
||||
if (char !== undefined && skipChars.has(char)) {
|
||||
mdIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle link syntax [text](url) - skip the (url) part but count the text
|
||||
if (
|
||||
char === "]" &&
|
||||
mdIndex + 1 < markdown.length &&
|
||||
markdown[mdIndex + 1] === "("
|
||||
) {
|
||||
const closeIdx = markdown.indexOf(")", mdIndex + 2);
|
||||
if (closeIdx > 0) {
|
||||
mdIndex = closeIdx + 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
cleanIndex++;
|
||||
mdIndex++;
|
||||
}
|
||||
|
||||
// Extend to word boundary to avoid cutting mid-word
|
||||
while (
|
||||
mdIndex < markdown.length &&
|
||||
markdown[mdIndex] !== " " &&
|
||||
markdown[mdIndex] !== "\n"
|
||||
) {
|
||||
mdIndex++;
|
||||
}
|
||||
|
||||
return mdIndex;
|
||||
}
|
||||
|
||||
// Control the rate of packet streaming (packets per second)
|
||||
const PACKET_DELAY_MS = 10;
|
||||
@@ -69,8 +20,6 @@ export const MessageTextRenderer: MessageRenderer<
|
||||
> = ({
|
||||
packets,
|
||||
state,
|
||||
messageNodeId,
|
||||
hasTimelineThinking,
|
||||
onComplete,
|
||||
renderType,
|
||||
animate,
|
||||
@@ -87,17 +36,6 @@ export const MessageTextRenderer: MessageRenderer<
|
||||
|
||||
const [displayedPacketCount, setDisplayedPacketCount] =
|
||||
useState(initialPacketCount);
|
||||
const lastStableSyncedContentRef = useRef("");
|
||||
const lastVisibleContentRef = useRef("");
|
||||
|
||||
// Get voice mode context for progressive text reveal synced with audio
|
||||
const {
|
||||
revealedCharCount,
|
||||
autoPlayback,
|
||||
isAudioSyncActive,
|
||||
activeMessageNodeId,
|
||||
isAwaitingAutoPlaybackStart,
|
||||
} = useVoiceMode();
|
||||
|
||||
// Get the full content from all packets
|
||||
const fullContent = packets
|
||||
@@ -112,11 +50,6 @@ export const MessageTextRenderer: MessageRenderer<
|
||||
})
|
||||
.join("");
|
||||
|
||||
const shouldUseAutoPlaybackSync =
|
||||
autoPlayback &&
|
||||
typeof messageNodeId === "number" &&
|
||||
activeMessageNodeId === messageNodeId;
|
||||
|
||||
// Animation effect - gradually increase displayed packets at controlled rate
|
||||
useEffect(() => {
|
||||
if (!animate) {
|
||||
@@ -160,37 +93,13 @@ export const MessageTextRenderer: MessageRenderer<
|
||||
}
|
||||
}, [packets, onComplete, animate, displayedPacketCount]);
|
||||
|
||||
// Get content based on displayed packet count or audio progress
|
||||
const computedContent = useMemo(() => {
|
||||
// Hold response in "thinking" state only while autoplay startup is pending.
|
||||
if (shouldUseAutoPlaybackSync && isAwaitingAutoPlaybackStart) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Sync text with audio only for the message currently being spoken.
|
||||
if (shouldUseAutoPlaybackSync && isAudioSyncActive) {
|
||||
const MIN_REVEAL_CHARS = 12;
|
||||
if (revealedCharCount < MIN_REVEAL_CHARS) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Reveal text progressively based on audio progress
|
||||
const revealPos = getRevealPosition(fullContent, revealedCharCount);
|
||||
return fullContent.slice(0, Math.max(revealPos, 0));
|
||||
}
|
||||
|
||||
// During an active synced turn, if sync temporarily drops, keep current reveal
|
||||
// instead of jumping to full content or blanking.
|
||||
if (shouldUseAutoPlaybackSync && !stopPacketSeen) {
|
||||
return lastStableSyncedContentRef.current;
|
||||
}
|
||||
|
||||
// Standard behavior when auto-playback is off
|
||||
// Get content based on displayed packet count
|
||||
const content = useMemo(() => {
|
||||
if (!animate || displayedPacketCount === -1) {
|
||||
return fullContent; // Show all content
|
||||
}
|
||||
|
||||
// Packet-based reveal (when auto-playback is disabled)
|
||||
// Only show content from packets up to displayedPacketCount
|
||||
return packets
|
||||
.slice(0, displayedPacketCount)
|
||||
.map((packet) => {
|
||||
@@ -203,109 +112,31 @@ export const MessageTextRenderer: MessageRenderer<
|
||||
return "";
|
||||
})
|
||||
.join("");
|
||||
}, [
|
||||
animate,
|
||||
displayedPacketCount,
|
||||
fullContent,
|
||||
packets,
|
||||
revealedCharCount,
|
||||
autoPlayback,
|
||||
isAudioSyncActive,
|
||||
activeMessageNodeId,
|
||||
isAwaitingAutoPlaybackStart,
|
||||
messageNodeId,
|
||||
shouldUseAutoPlaybackSync,
|
||||
stopPacketSeen,
|
||||
]);
|
||||
|
||||
// Keep synced text monotonic: once visible, never regress or disappear between chunks.
|
||||
const content = useMemo(() => {
|
||||
const wasUserCancelled = stopReason === StopReason.USER_CANCELLED;
|
||||
|
||||
// On user cancel, freeze at exactly what was already visible.
|
||||
if (wasUserCancelled) {
|
||||
return lastVisibleContentRef.current;
|
||||
}
|
||||
|
||||
if (!shouldUseAutoPlaybackSync) {
|
||||
return computedContent;
|
||||
}
|
||||
|
||||
if (computedContent.length === 0) {
|
||||
return lastStableSyncedContentRef.current;
|
||||
}
|
||||
|
||||
const last = lastStableSyncedContentRef.current;
|
||||
if (computedContent.startsWith(last)) {
|
||||
return computedContent;
|
||||
}
|
||||
|
||||
// If content shape changed unexpectedly mid-stream, prefer the stable version
|
||||
// to avoid flicker/dumps.
|
||||
if (!stopPacketSeen || wasUserCancelled) {
|
||||
return last;
|
||||
}
|
||||
|
||||
// For normal completed responses, allow final full content.
|
||||
return computedContent;
|
||||
}, [computedContent, shouldUseAutoPlaybackSync, stopPacketSeen, stopReason]);
|
||||
|
||||
// Sync the stable ref outside of useMemo to avoid side effects during render.
|
||||
useEffect(() => {
|
||||
if (stopReason === StopReason.USER_CANCELLED) {
|
||||
return;
|
||||
}
|
||||
if (!shouldUseAutoPlaybackSync) {
|
||||
lastStableSyncedContentRef.current = "";
|
||||
} else if (content.length > 0) {
|
||||
lastStableSyncedContentRef.current = content;
|
||||
}
|
||||
}, [content, shouldUseAutoPlaybackSync, stopReason]);
|
||||
|
||||
// Track last actually rendered content so cancel can freeze without dumping buffered text.
|
||||
useEffect(() => {
|
||||
if (content.length > 0) {
|
||||
lastVisibleContentRef.current = content;
|
||||
}
|
||||
}, [content]);
|
||||
|
||||
const shouldShowThinkingPlaceholder =
|
||||
shouldUseAutoPlaybackSync &&
|
||||
isAwaitingAutoPlaybackStart &&
|
||||
!hasTimelineThinking &&
|
||||
!stopPacketSeen;
|
||||
|
||||
const shouldShowSpeechWarmupIndicator =
|
||||
shouldUseAutoPlaybackSync &&
|
||||
!isAwaitingAutoPlaybackStart &&
|
||||
content.length === 0 &&
|
||||
fullContent.length > 0 &&
|
||||
!hasTimelineThinking &&
|
||||
!stopPacketSeen;
|
||||
|
||||
const shouldShowCursor =
|
||||
content.length > 0 &&
|
||||
(!stopPacketSeen ||
|
||||
(shouldUseAutoPlaybackSync && content.length < fullContent.length));
|
||||
}, [animate, displayedPacketCount, fullContent, packets]);
|
||||
|
||||
const { renderedContent } = useMarkdownRenderer(
|
||||
// the [*]() is a hack to show a blinking dot when the packet is not complete
|
||||
shouldShowCursor ? content + " [*]() " : content,
|
||||
stopPacketSeen ? content : content + " [*]() ",
|
||||
state,
|
||||
"font-main-content-body"
|
||||
);
|
||||
|
||||
const wasUserCancelled = stopReason === StopReason.USER_CANCELLED;
|
||||
|
||||
return children([
|
||||
{
|
||||
icon: null,
|
||||
status: null,
|
||||
content:
|
||||
shouldShowThinkingPlaceholder || shouldShowSpeechWarmupIndicator ? (
|
||||
<Text as="span" secondaryBody text04 className="italic">
|
||||
Thinking
|
||||
</Text>
|
||||
) : content.length > 0 ? (
|
||||
<>{renderedContent}</>
|
||||
content.length > 0 || packets.length > 0 ? (
|
||||
<>
|
||||
{renderedContent}
|
||||
{wasUserCancelled && (
|
||||
<Text as="p" secondaryBody text04>
|
||||
User has stopped generation
|
||||
</Text>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<BlinkingBar addMargin />
|
||||
),
|
||||
|
||||
@@ -566,21 +566,6 @@ textarea {
|
||||
animation: fadeIn 0.2s ease-out forwards;
|
||||
}
|
||||
|
||||
/* Recording waveform animation */
|
||||
@keyframes waveform {
|
||||
0%,
|
||||
100% {
|
||||
transform: scaleY(0.3);
|
||||
}
|
||||
50% {
|
||||
transform: scaleY(1);
|
||||
}
|
||||
}
|
||||
|
||||
.animate-waveform {
|
||||
animation: waveform 0.8s ease-in-out infinite;
|
||||
}
|
||||
|
||||
.container {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { ProjectsProvider } from "@/providers/ProjectsContext";
|
||||
import { VoiceModeProvider } from "@/providers/VoiceModeProvider";
|
||||
|
||||
export interface LayoutProps {
|
||||
children: React.ReactNode;
|
||||
@@ -12,9 +11,5 @@ export interface LayoutProps {
|
||||
* Sidebar and chrome are handled by sub-layouts / individual pages.
|
||||
*/
|
||||
export default function Layout({ children }: LayoutProps) {
|
||||
return (
|
||||
<ProjectsProvider>
|
||||
<VoiceModeProvider>{children}</VoiceModeProvider>
|
||||
</ProjectsProvider>
|
||||
);
|
||||
return <ProjectsProvider>{children}</ProjectsProvider>;
|
||||
}
|
||||
|
||||
@@ -39,8 +39,6 @@ import document360Icon from "@public/Document360.png";
|
||||
import dropboxIcon from "@public/Dropbox.png";
|
||||
import drupalwikiIcon from "@public/DrupalWiki.png";
|
||||
import egnyteIcon from "@public/Egnyte.png";
|
||||
import elevenLabsDarkSVG from "@public/ElevenLabsDark.svg";
|
||||
import elevenLabsSVG from "@public/ElevenLabs.svg";
|
||||
import firefliesIcon from "@public/Fireflies.png";
|
||||
import freshdeskIcon from "@public/Freshdesk.png";
|
||||
import geminiSVG from "@public/Gemini.svg";
|
||||
@@ -845,9 +843,6 @@ export const Document360Icon = createLogoIcon(document360Icon);
|
||||
export const DropboxIcon = createLogoIcon(dropboxIcon);
|
||||
export const DrupalWikiIcon = createLogoIcon(drupalwikiIcon);
|
||||
export const EgnyteIcon = createLogoIcon(egnyteIcon);
|
||||
export const ElevenLabsIcon = createLogoIcon(elevenLabsSVG, {
|
||||
darkSrc: elevenLabsDarkSVG,
|
||||
});
|
||||
export const FirefliesIcon = createLogoIcon(firefliesIcon);
|
||||
export const FreshdeskIcon = createLogoIcon(freshdeskIcon);
|
||||
export const GeminiIcon = createLogoIcon(geminiSVG);
|
||||
|
||||
@@ -1,206 +0,0 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState, useMemo, useRef } from "react";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { formatElapsedTime } from "@/lib/dateUtils";
|
||||
import { Button } from "@opal/components";
|
||||
import {
|
||||
SvgMicrophone,
|
||||
SvgMicrophoneOff,
|
||||
SvgVolume,
|
||||
SvgVolumeOff,
|
||||
} from "@opal/icons";
|
||||
|
||||
// Recording waveform constants
|
||||
const RECORDING_BAR_COUNT = 120;
|
||||
const MIN_BAR_HEIGHT = 2;
|
||||
const MAX_BAR_HEIGHT = 16;
|
||||
|
||||
// Speaking waveform constants
|
||||
const SPEAKING_BAR_COUNT = 28;
|
||||
|
||||
interface WaveformProps {
|
||||
/** Visual style and behavior variant */
|
||||
variant: "speaking" | "recording";
|
||||
/** Whether the waveform is actively animating */
|
||||
isActive: boolean;
|
||||
/** Whether audio is muted */
|
||||
isMuted?: boolean;
|
||||
/** Current microphone audio level (0-1), only used for recording variant */
|
||||
audioLevel?: number;
|
||||
/** Callback when mute button is clicked */
|
||||
onMuteToggle?: () => void;
|
||||
}
|
||||
|
||||
function Waveform({
|
||||
variant,
|
||||
isActive,
|
||||
isMuted = false,
|
||||
audioLevel = 0,
|
||||
onMuteToggle,
|
||||
}: WaveformProps) {
|
||||
// ─── Recording variant state ───────────────────────────────────────────────
|
||||
const [elapsedSeconds, setElapsedSeconds] = useState(0);
|
||||
const [barHeights, setBarHeights] = useState<number[]>(
|
||||
() => new Array(RECORDING_BAR_COUNT).fill(MIN_BAR_HEIGHT) as number[]
|
||||
);
|
||||
const animationRef = useRef<number | null>(null);
|
||||
const lastPushTimeRef = useRef(0);
|
||||
const audioLevelRef = useRef(audioLevel);
|
||||
audioLevelRef.current = audioLevel;
|
||||
|
||||
// ─── Speaking variant bars ─────────────────────────────────────────────────
|
||||
const speakingBars = useMemo(() => {
|
||||
return Array.from({ length: SPEAKING_BAR_COUNT }, (_, i) => ({
|
||||
id: i,
|
||||
// Create a natural wave pattern with height variation
|
||||
baseHeight: Math.sin(i * 0.4) * 5 + 8,
|
||||
delay: i * 0.025,
|
||||
}));
|
||||
}, []);
|
||||
|
||||
// ─── Recording: Timer effect ───────────────────────────────────────────────
|
||||
useEffect(() => {
|
||||
if (variant !== "recording") return;
|
||||
|
||||
if (!isActive) {
|
||||
setElapsedSeconds(0);
|
||||
return;
|
||||
}
|
||||
|
||||
const interval = setInterval(() => {
|
||||
setElapsedSeconds((prev) => prev + 1);
|
||||
}, 1000);
|
||||
|
||||
return () => clearInterval(interval);
|
||||
}, [variant, isActive]);
|
||||
|
||||
// ─── Recording: Audio level visualization effect ───────────────────────────
|
||||
useEffect(() => {
|
||||
if (variant !== "recording") return;
|
||||
|
||||
if (!isActive) {
|
||||
setBarHeights(
|
||||
new Array(RECORDING_BAR_COUNT).fill(MIN_BAR_HEIGHT) as number[]
|
||||
);
|
||||
lastPushTimeRef.current = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
const updateBars = (timestamp: number) => {
|
||||
// Push a new bar roughly every 50ms (~20fps scrolling)
|
||||
if (timestamp - lastPushTimeRef.current >= 50) {
|
||||
lastPushTimeRef.current = timestamp;
|
||||
const level = isMuted ? 0 : audioLevelRef.current;
|
||||
const height =
|
||||
MIN_BAR_HEIGHT + level * (MAX_BAR_HEIGHT - MIN_BAR_HEIGHT);
|
||||
|
||||
setBarHeights((prev) => {
|
||||
const next = prev.slice(1);
|
||||
next.push(height);
|
||||
return next;
|
||||
});
|
||||
}
|
||||
|
||||
animationRef.current = requestAnimationFrame(updateBars);
|
||||
};
|
||||
|
||||
animationRef.current = requestAnimationFrame(updateBars);
|
||||
|
||||
return () => {
|
||||
if (animationRef.current) {
|
||||
cancelAnimationFrame(animationRef.current);
|
||||
animationRef.current = null;
|
||||
}
|
||||
};
|
||||
}, [variant, isActive, isMuted]);
|
||||
|
||||
const formattedTime = useMemo(
|
||||
() => formatElapsedTime(elapsedSeconds),
|
||||
[elapsedSeconds]
|
||||
);
|
||||
|
||||
if (!isActive) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// ─── Speaking variant render ───────────────────────────────────────────────
|
||||
if (variant === "speaking") {
|
||||
return (
|
||||
<div className="flex items-center gap-0.5 p-1.5 bg-background-tint-00 rounded-16 shadow-01">
|
||||
{/* Waveform container */}
|
||||
<div className="flex items-center p-1 bg-background-tint-00 rounded-12 max-w-[144px] min-h-[32px]">
|
||||
<div className="flex items-center p-1">
|
||||
{/* Waveform bars */}
|
||||
<div className="flex items-center justify-center gap-[2px] h-4 w-[120px] overflow-hidden">
|
||||
{speakingBars.map((bar) => (
|
||||
<div
|
||||
key={bar.id}
|
||||
className={cn(
|
||||
"w-[3px] rounded-full",
|
||||
isMuted ? "bg-text-03" : "bg-theme-blue-05",
|
||||
!isMuted && "animate-waveform"
|
||||
)}
|
||||
style={{
|
||||
height: isMuted ? "2px" : `${bar.baseHeight}px`,
|
||||
animationDelay: isMuted ? undefined : `${bar.delay}s`,
|
||||
}}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Divider */}
|
||||
<div className="w-0.5 self-stretch bg-border-02" />
|
||||
|
||||
{/* Volume button */}
|
||||
{onMuteToggle && (
|
||||
<div className="flex items-center p-1 bg-background-tint-00 rounded-12">
|
||||
<Button
|
||||
icon={isMuted ? SvgVolumeOff : SvgVolume}
|
||||
onClick={onMuteToggle}
|
||||
prominence="tertiary"
|
||||
size="sm"
|
||||
tooltip={isMuted ? "Unmute" : "Mute"}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Recording variant render ──────────────────────────────────────────────
|
||||
return (
|
||||
<div className="flex items-center gap-3 px-3 py-2 bg-background-tint-00 rounded-12 min-h-[32px]">
|
||||
{/* Waveform visualization driven by real audio levels */}
|
||||
<div className="flex-1 flex items-center justify-between h-4 overflow-hidden">
|
||||
{barHeights.map((height, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className="w-[1.5px] bg-text-03 rounded-full shrink-0 transition-[height] duration-75"
|
||||
style={{ height: `${height}px` }}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Timer */}
|
||||
<span className="font-mono text-xs text-text-03 tabular-nums shrink-0">
|
||||
{formattedTime}
|
||||
</span>
|
||||
|
||||
{/* Mute button */}
|
||||
{onMuteToggle && (
|
||||
<Button
|
||||
icon={isMuted ? SvgMicrophoneOff : SvgMicrophone}
|
||||
onClick={onMuteToggle}
|
||||
prominence="tertiary"
|
||||
size="sm"
|
||||
aria-label={isMuted ? "Unmute microphone" : "Mute microphone"}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default Waveform;
|
||||
122
web/src/hooks/useAdminUsers.ts
Normal file
122
web/src/hooks/useAdminUsers.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
"use client";
|
||||
|
||||
import useSWR from "swr";
|
||||
import { errorHandlingFetcher } from "@/lib/fetcher";
|
||||
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
|
||||
import { UserStatus } from "@/lib/types";
|
||||
import type { UserRole, InvitedUserSnapshot } from "@/lib/types";
|
||||
import type {
|
||||
UserRow,
|
||||
UserGroupInfo,
|
||||
} from "@/refresh-pages/admin/UsersPage/interfaces";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Backend response shape (GET /manage/users/accepted/all)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface FullUserSnapshot {
|
||||
id: string;
|
||||
email: string;
|
||||
role: UserRole;
|
||||
is_active: boolean;
|
||||
password_configured: boolean;
|
||||
personal_name: string | null;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
groups: UserGroupInfo[];
|
||||
is_scim_synced: boolean;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Converters
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function toUserRow(snapshot: FullUserSnapshot): UserRow {
|
||||
return {
|
||||
id: snapshot.id,
|
||||
email: snapshot.email,
|
||||
role: snapshot.role,
|
||||
status: snapshot.is_active ? UserStatus.ACTIVE : UserStatus.INACTIVE,
|
||||
is_active: snapshot.is_active,
|
||||
is_scim_synced: snapshot.is_scim_synced,
|
||||
personal_name: snapshot.personal_name,
|
||||
created_at: snapshot.created_at,
|
||||
updated_at: snapshot.updated_at,
|
||||
groups: snapshot.groups,
|
||||
};
|
||||
}
|
||||
|
||||
function emailToUserRow(
|
||||
email: string,
|
||||
status: UserStatus.INVITED | UserStatus.REQUESTED
|
||||
): UserRow {
|
||||
return {
|
||||
id: null,
|
||||
email,
|
||||
role: null,
|
||||
status,
|
||||
is_active: false,
|
||||
is_scim_synced: false,
|
||||
personal_name: null,
|
||||
created_at: null,
|
||||
updated_at: null,
|
||||
groups: [],
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hook
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export default function useAdminUsers() {
|
||||
const {
|
||||
data: acceptedData,
|
||||
isLoading: acceptedLoading,
|
||||
error: acceptedError,
|
||||
mutate: acceptedMutate,
|
||||
} = useSWR<FullUserSnapshot[]>(
|
||||
"/api/manage/users/accepted/all",
|
||||
errorHandlingFetcher
|
||||
);
|
||||
|
||||
const {
|
||||
data: invitedData,
|
||||
isLoading: invitedLoading,
|
||||
error: invitedError,
|
||||
mutate: invitedMutate,
|
||||
} = useSWR<InvitedUserSnapshot[]>(
|
||||
"/api/manage/users/invited",
|
||||
errorHandlingFetcher
|
||||
);
|
||||
|
||||
const {
|
||||
data: requestedData,
|
||||
isLoading: requestedLoading,
|
||||
error: requestedError,
|
||||
mutate: requestedMutate,
|
||||
} = useSWR<InvitedUserSnapshot[]>(
|
||||
NEXT_PUBLIC_CLOUD_ENABLED ? "/api/tenants/users/pending" : null,
|
||||
errorHandlingFetcher
|
||||
);
|
||||
|
||||
const acceptedRows = (acceptedData ?? []).map(toUserRow);
|
||||
const invitedRows = (invitedData ?? []).map((u) =>
|
||||
emailToUserRow(u.email, UserStatus.INVITED)
|
||||
);
|
||||
const requestedRows = (requestedData ?? []).map((u) =>
|
||||
emailToUserRow(u.email, UserStatus.REQUESTED)
|
||||
);
|
||||
|
||||
const users = [...invitedRows, ...requestedRows, ...acceptedRows];
|
||||
|
||||
const isLoading = acceptedLoading || invitedLoading || requestedLoading;
|
||||
const error = acceptedError ?? invitedError ?? requestedError;
|
||||
|
||||
function refresh() {
|
||||
acceptedMutate();
|
||||
invitedMutate();
|
||||
requestedMutate();
|
||||
}
|
||||
|
||||
return { users, isLoading, error, refresh };
|
||||
}
|
||||
@@ -4,23 +4,28 @@ import useSWR from "swr";
|
||||
import { errorHandlingFetcher } from "@/lib/fetcher";
|
||||
import type { InvitedUserSnapshot } from "@/lib/types";
|
||||
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
|
||||
import type { StatusCountMap } from "@/refresh-pages/admin/UsersPage/interfaces";
|
||||
|
||||
type PaginatedCountResponse = {
|
||||
total_items: number;
|
||||
type UserCountsResponse = {
|
||||
role_counts: Record<string, number>;
|
||||
status_counts: Record<string, number>;
|
||||
};
|
||||
|
||||
type UserCounts = {
|
||||
activeCount: number | null;
|
||||
invitedCount: number | null;
|
||||
pendingCount: number | null;
|
||||
roleCounts: Record<string, number>;
|
||||
statusCounts: StatusCountMap;
|
||||
refreshCounts: () => void;
|
||||
};
|
||||
|
||||
export default function useUserCounts(): UserCounts {
|
||||
// Active user count — lightweight fetch (page_size=1 to minimize payload)
|
||||
const { data: activeData } = useSWR<PaginatedCountResponse>(
|
||||
"/api/manage/users/accepted?page_num=0&page_size=1",
|
||||
errorHandlingFetcher
|
||||
);
|
||||
const { data: countsData, mutate: refreshCounts } =
|
||||
useSWR<UserCountsResponse>(
|
||||
"/api/manage/users/counts",
|
||||
errorHandlingFetcher
|
||||
);
|
||||
|
||||
const { data: invitedUsers } = useSWR<InvitedUserSnapshot[]>(
|
||||
"/api/manage/users/invited",
|
||||
@@ -32,9 +37,20 @@ export default function useUserCounts(): UserCounts {
|
||||
errorHandlingFetcher
|
||||
);
|
||||
|
||||
const activeCount = countsData?.status_counts?.active ?? null;
|
||||
const inactiveCount = countsData?.status_counts?.inactive ?? null;
|
||||
|
||||
return {
|
||||
activeCount: activeData?.total_items ?? null,
|
||||
activeCount,
|
||||
invitedCount: invitedUsers?.length ?? null,
|
||||
pendingCount: pendingUsers?.length ?? null,
|
||||
roleCounts: countsData?.role_counts ?? {},
|
||||
statusCounts: {
|
||||
...(activeCount !== null ? { active: activeCount } : {}),
|
||||
...(inactiveCount !== null ? { inactive: inactiveCount } : {}),
|
||||
...(invitedUsers ? { invited: invitedUsers.length } : {}),
|
||||
...(pendingUsers ? { requested: pendingUsers.length } : {}),
|
||||
} satisfies StatusCountMap,
|
||||
refreshCounts,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
import { useState, useRef, useCallback, useEffect } from "react";
|
||||
import { StreamingTTSPlayer } from "@/lib/streamingTTS";
|
||||
import { useVoiceMode } from "@/providers/VoiceModeProvider";
|
||||
|
||||
export interface UseVoicePlaybackReturn {
|
||||
isPlaying: boolean;
|
||||
isLoading: boolean;
|
||||
error: string | null;
|
||||
play: (text: string, voice?: string, speed?: number) => Promise<void>;
|
||||
pause: () => void;
|
||||
stop: () => void;
|
||||
}
|
||||
|
||||
export function useVoicePlayback(): UseVoicePlaybackReturn {
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const playerRef = useRef<StreamingTTSPlayer | null>(null);
|
||||
const suppressPlayerErrorsRef = useRef(false);
|
||||
const { setManualTTSPlaying, isTTSMuted, registerManualTTSMuteHandler } =
|
||||
useVoiceMode();
|
||||
|
||||
useEffect(() => {
|
||||
registerManualTTSMuteHandler((muted) => {
|
||||
playerRef.current?.setMuted(muted);
|
||||
});
|
||||
return () => {
|
||||
registerManualTTSMuteHandler(null);
|
||||
};
|
||||
}, [registerManualTTSMuteHandler]);
|
||||
|
||||
const stop = useCallback(() => {
|
||||
suppressPlayerErrorsRef.current = true;
|
||||
if (playerRef.current) {
|
||||
playerRef.current.stop();
|
||||
playerRef.current = null;
|
||||
}
|
||||
setManualTTSPlaying(false);
|
||||
setError(null);
|
||||
setIsPlaying(false);
|
||||
setIsLoading(false);
|
||||
}, [setManualTTSPlaying]);
|
||||
|
||||
const pause = useCallback(() => {
|
||||
// Streaming player currently supports stop/resume via restart, not true pause.
|
||||
stop();
|
||||
}, [stop]);
|
||||
|
||||
const play = useCallback(
|
||||
async (text: string, voice?: string, speed?: number) => {
|
||||
// Stop any existing playback
|
||||
stop();
|
||||
suppressPlayerErrorsRef.current = false;
|
||||
setError(null);
|
||||
setIsLoading(true);
|
||||
|
||||
try {
|
||||
const player = new StreamingTTSPlayer({
|
||||
onPlayingChange: (playing) => {
|
||||
setIsPlaying(playing);
|
||||
setManualTTSPlaying(playing);
|
||||
if (playing) {
|
||||
setIsLoading(false);
|
||||
}
|
||||
},
|
||||
onError: (playbackError) => {
|
||||
if (suppressPlayerErrorsRef.current) {
|
||||
return;
|
||||
}
|
||||
console.error("Voice playback error:", playbackError);
|
||||
setManualTTSPlaying(false);
|
||||
setError(playbackError);
|
||||
setIsLoading(false);
|
||||
setIsPlaying(false);
|
||||
},
|
||||
});
|
||||
playerRef.current = player;
|
||||
player.setMuted(isTTSMuted);
|
||||
|
||||
await player.speak(text, voice, speed);
|
||||
setIsLoading(false);
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
// Request was cancelled, not an error
|
||||
return;
|
||||
}
|
||||
const message =
|
||||
err instanceof Error ? err.message : "Speech synthesis failed";
|
||||
setError(message);
|
||||
setIsLoading(false);
|
||||
setIsPlaying(false);
|
||||
setManualTTSPlaying(false);
|
||||
}
|
||||
},
|
||||
[isTTSMuted, setManualTTSPlaying, stop]
|
||||
);
|
||||
|
||||
return {
|
||||
isPlaying,
|
||||
isLoading,
|
||||
error,
|
||||
play,
|
||||
pause,
|
||||
stop,
|
||||
};
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
import useSWR from "swr";
|
||||
import { errorHandlingFetcher } from "@/lib/fetcher";
|
||||
|
||||
export interface VoiceProviderView {
|
||||
id: number;
|
||||
name: string;
|
||||
provider_type: string;
|
||||
is_default_stt: boolean;
|
||||
is_default_tts: boolean;
|
||||
stt_model: string | null;
|
||||
tts_model: string | null;
|
||||
default_voice: string | null;
|
||||
has_api_key: boolean;
|
||||
target_uri: string | null;
|
||||
}
|
||||
|
||||
const VOICE_PROVIDERS_URL = "/api/admin/voice/providers";
|
||||
|
||||
export function useVoiceProviders() {
|
||||
const { data, error, isLoading, mutate } = useSWR<VoiceProviderView[]>(
|
||||
VOICE_PROVIDERS_URL,
|
||||
errorHandlingFetcher,
|
||||
{
|
||||
revalidateOnFocus: false,
|
||||
dedupingInterval: 60000,
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
providers: data ?? [],
|
||||
isLoading,
|
||||
error,
|
||||
refresh: mutate,
|
||||
};
|
||||
}
|
||||
@@ -1,500 +0,0 @@
|
||||
import { useState, useRef, useCallback, useEffect } from "react";
|
||||
|
||||
// Target format for OpenAI Realtime API
|
||||
const TARGET_SAMPLE_RATE = 24000;
|
||||
const CHUNK_INTERVAL_MS = 250;
|
||||
|
||||
interface TranscriptMessage {
|
||||
type: "transcript" | "error";
|
||||
text?: string;
|
||||
message?: string;
|
||||
is_final?: boolean;
|
||||
}
|
||||
|
||||
export interface UseVoiceRecorderOptions {
|
||||
/** Called when VAD detects silence and final transcript is received */
|
||||
onFinalTranscript?: (text: string) => void;
|
||||
/** If true, automatically stop recording when VAD detects silence */
|
||||
autoStopOnSilence?: boolean;
|
||||
}
|
||||
|
||||
export interface UseVoiceRecorderReturn {
|
||||
isRecording: boolean;
|
||||
isProcessing: boolean;
|
||||
isMuted: boolean;
|
||||
error: string | null;
|
||||
liveTranscript: string;
|
||||
/** Current microphone audio level (0-1, RMS-based) */
|
||||
audioLevel: number;
|
||||
startRecording: () => Promise<void>;
|
||||
stopRecording: () => Promise<string | null>;
|
||||
setMuted: (muted: boolean) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates all browser resources for a voice recording session.
|
||||
* Manages WebSocket, Web Audio API, and audio buffering.
|
||||
*/
|
||||
class VoiceRecorderSession {
|
||||
// Browser resources
|
||||
private websocket: WebSocket | null = null;
|
||||
private audioContext: AudioContext | null = null;
|
||||
private scriptNode: ScriptProcessorNode | null = null;
|
||||
private sourceNode: MediaStreamAudioSourceNode | null = null;
|
||||
private mediaStream: MediaStream | null = null;
|
||||
private sendInterval: NodeJS.Timeout | null = null;
|
||||
|
||||
// State
|
||||
private audioBuffer: Float32Array[] = [];
|
||||
private transcript = "";
|
||||
private stopResolver: ((text: string | null) => void) | null = null;
|
||||
private isActive = false;
|
||||
|
||||
// Callbacks to update React state
|
||||
private onTranscriptChange: (text: string) => void;
|
||||
private onFinalTranscript: ((text: string) => void) | null;
|
||||
private onError: (error: string) => void;
|
||||
private onAudioLevel: (level: number) => void;
|
||||
private onSilenceTimeout: (() => void) | null;
|
||||
private onVADStop: (() => void) | null;
|
||||
private autoStopOnSilence: boolean;
|
||||
|
||||
constructor(
|
||||
onTranscriptChange: (text: string) => void,
|
||||
onFinalTranscript: ((text: string) => void) | null,
|
||||
onError: (error: string) => void,
|
||||
onAudioLevel: (level: number) => void,
|
||||
onSilenceTimeout?: () => void,
|
||||
autoStopOnSilence?: boolean,
|
||||
onVADStop?: () => void
|
||||
) {
|
||||
this.onTranscriptChange = onTranscriptChange;
|
||||
this.onFinalTranscript = onFinalTranscript;
|
||||
this.onError = onError;
|
||||
this.onAudioLevel = onAudioLevel;
|
||||
this.onSilenceTimeout = onSilenceTimeout || null;
|
||||
this.autoStopOnSilence = autoStopOnSilence ?? false;
|
||||
this.onVADStop = onVADStop || null;
|
||||
}
|
||||
|
||||
get recording(): boolean {
|
||||
return this.isActive;
|
||||
}
|
||||
|
||||
get currentTranscript(): string {
|
||||
return this.transcript;
|
||||
}
|
||||
|
||||
setMuted(muted: boolean): void {
|
||||
if (this.mediaStream) {
|
||||
this.mediaStream.getAudioTracks().forEach((track) => {
|
||||
track.enabled = !muted;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async start(): Promise<void> {
|
||||
if (this.isActive) return;
|
||||
|
||||
this.cleanup();
|
||||
this.transcript = "";
|
||||
this.audioBuffer = [];
|
||||
|
||||
// Get microphone
|
||||
this.mediaStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
sampleRate: { ideal: TARGET_SAMPLE_RATE },
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
},
|
||||
});
|
||||
|
||||
// Get WS token and connect WebSocket
|
||||
const wsUrl = await this.getWebSocketUrl();
|
||||
this.websocket = new WebSocket(wsUrl);
|
||||
this.websocket.onmessage = this.handleMessage;
|
||||
this.websocket.onerror = () => this.onError("Connection failed");
|
||||
this.websocket.onclose = () => {
|
||||
if (this.stopResolver) {
|
||||
this.stopResolver(this.transcript || null);
|
||||
this.stopResolver = null;
|
||||
}
|
||||
};
|
||||
|
||||
await this.waitForConnection();
|
||||
|
||||
// Restore error handler after connection (waitForConnection overwrites it)
|
||||
this.websocket.onerror = () => this.onError("Connection failed");
|
||||
|
||||
// Set up audio capture
|
||||
this.audioContext = new AudioContext({ sampleRate: TARGET_SAMPLE_RATE });
|
||||
this.sourceNode = this.audioContext.createMediaStreamSource(
|
||||
this.mediaStream
|
||||
);
|
||||
this.scriptNode = this.audioContext.createScriptProcessor(4096, 1, 1);
|
||||
|
||||
this.scriptNode.onaudioprocess = (event) => {
|
||||
const inputData = event.inputBuffer.getChannelData(0);
|
||||
this.audioBuffer.push(new Float32Array(inputData));
|
||||
|
||||
// Compute RMS audio level (0-1) for waveform visualization
|
||||
let sum = 0;
|
||||
for (let i = 0; i < inputData.length; i++) {
|
||||
sum += inputData[i]! * inputData[i]!;
|
||||
}
|
||||
const rms = Math.sqrt(sum / inputData.length);
|
||||
// Scale RMS to a more visible range (raw RMS is usually very small)
|
||||
this.onAudioLevel(Math.min(1, rms * 5));
|
||||
};
|
||||
|
||||
this.sourceNode.connect(this.scriptNode);
|
||||
this.scriptNode.connect(this.audioContext.destination);
|
||||
|
||||
// Start sending audio chunks
|
||||
this.sendInterval = setInterval(
|
||||
() => this.sendAudioBuffer(),
|
||||
CHUNK_INTERVAL_MS
|
||||
);
|
||||
this.isActive = true;
|
||||
}
|
||||
|
||||
async stop(): Promise<string | null> {
|
||||
if (!this.isActive) return this.transcript || null;
|
||||
|
||||
// Stop audio capture
|
||||
if (this.sendInterval) {
|
||||
clearInterval(this.sendInterval);
|
||||
this.sendInterval = null;
|
||||
}
|
||||
if (this.scriptNode) {
|
||||
this.scriptNode.disconnect();
|
||||
this.scriptNode = null;
|
||||
}
|
||||
if (this.sourceNode) {
|
||||
this.sourceNode.disconnect();
|
||||
this.sourceNode = null;
|
||||
}
|
||||
if (this.audioContext) {
|
||||
this.audioContext.close();
|
||||
this.audioContext = null;
|
||||
}
|
||||
if (this.mediaStream) {
|
||||
this.mediaStream.getTracks().forEach((track) => track.stop());
|
||||
this.mediaStream = null;
|
||||
}
|
||||
|
||||
this.audioBuffer = [];
|
||||
this.isActive = false;
|
||||
|
||||
// Get final transcript from server
|
||||
if (this.websocket?.readyState === WebSocket.OPEN) {
|
||||
return new Promise((resolve) => {
|
||||
this.stopResolver = resolve;
|
||||
this.websocket!.send(JSON.stringify({ type: "end" }));
|
||||
|
||||
// Timeout fallback
|
||||
setTimeout(() => {
|
||||
if (this.stopResolver) {
|
||||
this.stopResolver(this.transcript || null);
|
||||
this.stopResolver = null;
|
||||
}
|
||||
}, 3000);
|
||||
});
|
||||
}
|
||||
|
||||
return this.transcript || null;
|
||||
}
|
||||
|
||||
cleanup(): void {
|
||||
if (this.sendInterval) clearInterval(this.sendInterval);
|
||||
if (this.scriptNode) this.scriptNode.disconnect();
|
||||
if (this.sourceNode) this.sourceNode.disconnect();
|
||||
if (this.audioContext) this.audioContext.close();
|
||||
if (this.mediaStream) this.mediaStream.getTracks().forEach((t) => t.stop());
|
||||
if (this.websocket) this.websocket.close();
|
||||
|
||||
this.sendInterval = null;
|
||||
this.scriptNode = null;
|
||||
this.sourceNode = null;
|
||||
this.audioContext = null;
|
||||
this.mediaStream = null;
|
||||
this.websocket = null;
|
||||
this.isActive = false;
|
||||
}
|
||||
|
||||
private async getWebSocketUrl(): Promise<string> {
|
||||
// Fetch short-lived WS token
|
||||
const tokenResponse = await fetch("/api/voice/ws-token", {
|
||||
method: "POST",
|
||||
credentials: "include",
|
||||
});
|
||||
if (!tokenResponse.ok) {
|
||||
throw new Error("Failed to get WebSocket authentication token");
|
||||
}
|
||||
const { token } = await tokenResponse.json();
|
||||
|
||||
const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
|
||||
const isDev = window.location.port === "3000";
|
||||
const host = isDev ? "localhost:8080" : window.location.host;
|
||||
const path = isDev
|
||||
? "/voice/transcribe/stream"
|
||||
: "/api/voice/transcribe/stream";
|
||||
return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;
|
||||
}
|
||||
|
||||
private waitForConnection(): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this.websocket) return reject(new Error("No WebSocket"));
|
||||
|
||||
const timeout = setTimeout(
|
||||
() => reject(new Error("Connection timeout")),
|
||||
5000
|
||||
);
|
||||
|
||||
this.websocket.onopen = () => {
|
||||
clearTimeout(timeout);
|
||||
resolve();
|
||||
};
|
||||
this.websocket.onerror = () => {
|
||||
clearTimeout(timeout);
|
||||
reject(new Error("Connection failed"));
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private handleMessage = (event: MessageEvent): void => {
|
||||
try {
|
||||
const data: TranscriptMessage = JSON.parse(event.data);
|
||||
|
||||
if (data.type === "transcript") {
|
||||
if (data.text) {
|
||||
this.transcript = data.text;
|
||||
this.onTranscriptChange(data.text);
|
||||
}
|
||||
|
||||
if (data.is_final && data.text) {
|
||||
// VAD detected silence - trigger callback
|
||||
if (this.onFinalTranscript) {
|
||||
this.onFinalTranscript(data.text);
|
||||
}
|
||||
|
||||
// Auto-stop recording if enabled
|
||||
if (this.autoStopOnSilence) {
|
||||
// Trigger stop callback to update React state
|
||||
if (this.onVADStop) {
|
||||
this.onVADStop();
|
||||
}
|
||||
} else {
|
||||
// If not auto-stopping, reset for next utterance
|
||||
this.transcript = "";
|
||||
this.onTranscriptChange("");
|
||||
this.resetBackendTranscript();
|
||||
}
|
||||
|
||||
// Resolve stop promise if waiting
|
||||
if (this.stopResolver) {
|
||||
this.stopResolver(data.text);
|
||||
this.stopResolver = null;
|
||||
}
|
||||
}
|
||||
} else if (data.type === "error") {
|
||||
this.onError(data.message || "Transcription error");
|
||||
}
|
||||
} catch (e) {
|
||||
console.error("Failed to parse transcript message:", e);
|
||||
}
|
||||
};
|
||||
|
||||
private resetBackendTranscript(): void {
|
||||
if (this.websocket?.readyState === WebSocket.OPEN) {
|
||||
this.websocket.send(JSON.stringify({ type: "reset" }));
|
||||
}
|
||||
}
|
||||
|
||||
private sendAudioBuffer(): void {
|
||||
if (
|
||||
!this.websocket ||
|
||||
this.websocket.readyState !== WebSocket.OPEN ||
|
||||
!this.audioContext ||
|
||||
this.audioBuffer.length === 0
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Concatenate buffered chunks
|
||||
const totalLength = this.audioBuffer.reduce(
|
||||
(sum, chunk) => sum + chunk.length,
|
||||
0
|
||||
);
|
||||
|
||||
// Prevent buffer overflow
|
||||
if (totalLength > this.audioContext.sampleRate * 0.5 * 2) {
|
||||
this.audioBuffer = this.audioBuffer.slice(-10);
|
||||
return;
|
||||
}
|
||||
|
||||
const concatenated = new Float32Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const chunk of this.audioBuffer) {
|
||||
concatenated.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
this.audioBuffer = [];
|
||||
|
||||
// Resample and convert to PCM16
|
||||
const resampled = this.resampleAudio(
|
||||
concatenated,
|
||||
this.audioContext.sampleRate
|
||||
);
|
||||
const pcm16 = this.float32ToInt16(resampled);
|
||||
|
||||
this.websocket.send(pcm16.buffer);
|
||||
}
|
||||
|
||||
private resampleAudio(input: Float32Array, inputRate: number): Float32Array {
|
||||
if (inputRate === TARGET_SAMPLE_RATE) return input;
|
||||
|
||||
const ratio = inputRate / TARGET_SAMPLE_RATE;
|
||||
const outputLength = Math.round(input.length / ratio);
|
||||
const output = new Float32Array(outputLength);
|
||||
|
||||
for (let i = 0; i < outputLength; i++) {
|
||||
const srcIndex = i * ratio;
|
||||
const floor = Math.floor(srcIndex);
|
||||
const ceil = Math.min(floor + 1, input.length - 1);
|
||||
const fraction = srcIndex - floor;
|
||||
output[i] = input[floor]! * (1 - fraction) + input[ceil]! * fraction;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private float32ToInt16(float32: Float32Array): Int16Array {
|
||||
const int16 = new Int16Array(float32.length);
|
||||
for (let i = 0; i < float32.length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, float32[i]!));
|
||||
int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
|
||||
}
|
||||
return int16;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for voice recording with streaming transcription.
|
||||
*/
|
||||
export function useVoiceRecorder(
|
||||
options?: UseVoiceRecorderOptions
|
||||
): UseVoiceRecorderReturn {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [isProcessing, setIsProcessing] = useState(false);
|
||||
const [isMuted, setIsMutedState] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [liveTranscript, setLiveTranscript] = useState("");
|
||||
const [audioLevel, setAudioLevel] = useState(0);
|
||||
|
||||
const sessionRef = useRef<VoiceRecorderSession | null>(null);
|
||||
const onFinalTranscriptRef = useRef(options?.onFinalTranscript);
|
||||
const autoStopOnSilenceRef = useRef(options?.autoStopOnSilence ?? true); // Default to true
|
||||
|
||||
// Keep callback ref in sync
|
||||
useEffect(() => {
|
||||
onFinalTranscriptRef.current = options?.onFinalTranscript;
|
||||
autoStopOnSilenceRef.current = options?.autoStopOnSilence ?? true;
|
||||
}, [options?.onFinalTranscript, options?.autoStopOnSilence]);
|
||||
|
||||
// Cleanup on unmount
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
sessionRef.current?.cleanup();
|
||||
};
|
||||
}, []);
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
if (sessionRef.current?.recording) return;
|
||||
|
||||
setError(null);
|
||||
setLiveTranscript("");
|
||||
|
||||
// Clear any stale, inactive session before starting a new one.
|
||||
if (sessionRef.current && !sessionRef.current.recording) {
|
||||
sessionRef.current.cleanup();
|
||||
sessionRef.current = null;
|
||||
}
|
||||
|
||||
// Create VAD stop handler that will stop the session
|
||||
const currentSession = new VoiceRecorderSession(
|
||||
setLiveTranscript,
|
||||
(text) => onFinalTranscriptRef.current?.(text),
|
||||
setError,
|
||||
setAudioLevel,
|
||||
undefined, // onSilenceTimeout
|
||||
autoStopOnSilenceRef.current,
|
||||
() => {
|
||||
// Stop only this session instance, and only clear recording state if it
|
||||
// is still the active session when stop resolves.
|
||||
currentSession.stop().then(() => {
|
||||
if (sessionRef.current === currentSession) {
|
||||
setIsRecording(false);
|
||||
setIsMutedState(false);
|
||||
sessionRef.current = null;
|
||||
}
|
||||
});
|
||||
}
|
||||
);
|
||||
sessionRef.current = currentSession;
|
||||
|
||||
try {
|
||||
await currentSession.start();
|
||||
if (sessionRef.current === currentSession) {
|
||||
setIsRecording(true);
|
||||
}
|
||||
} catch (err) {
|
||||
currentSession.cleanup();
|
||||
setError(
|
||||
err instanceof Error ? err.message : "Failed to start recording"
|
||||
);
|
||||
if (sessionRef.current === currentSession) {
|
||||
sessionRef.current = null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stopRecording = useCallback(async (): Promise<string | null> => {
|
||||
if (!sessionRef.current) return null;
|
||||
const currentSession = sessionRef.current;
|
||||
|
||||
setIsProcessing(true);
|
||||
|
||||
try {
|
||||
const transcript = await currentSession.stop();
|
||||
return transcript;
|
||||
} finally {
|
||||
// Only clear state if this is still the active session.
|
||||
if (sessionRef.current === currentSession) {
|
||||
setIsRecording(false);
|
||||
setIsMutedState(false); // Reset mute state when recording stops
|
||||
sessionRef.current = null;
|
||||
}
|
||||
setIsProcessing(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const setMuted = useCallback((muted: boolean) => {
|
||||
setIsMutedState(muted);
|
||||
sessionRef.current?.setMuted(muted);
|
||||
}, []);
|
||||
|
||||
return {
|
||||
isRecording,
|
||||
isProcessing,
|
||||
isMuted,
|
||||
error,
|
||||
liveTranscript,
|
||||
audioLevel,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
setMuted,
|
||||
};
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
import useSWR from "swr";
|
||||
import { errorHandlingFetcher } from "@/lib/fetcher";
|
||||
|
||||
interface VoiceStatus {
|
||||
stt_enabled: boolean;
|
||||
tts_enabled: boolean;
|
||||
}
|
||||
|
||||
export function useVoiceStatus() {
|
||||
const { data, error, isLoading } = useSWR<VoiceStatus>(
|
||||
"/api/voice/status",
|
||||
errorHandlingFetcher,
|
||||
{
|
||||
revalidateOnFocus: false,
|
||||
dedupingInterval: 60000,
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
sttEnabled: data?.stt_enabled ?? false,
|
||||
ttsEnabled: data?.tts_enabled ?? false,
|
||||
isLoading,
|
||||
error,
|
||||
};
|
||||
}
|
||||
@@ -1,150 +0,0 @@
|
||||
import { useState, useRef, useCallback, useEffect } from "react";
|
||||
|
||||
export type WebSocketStatus =
|
||||
| "connecting"
|
||||
| "connected"
|
||||
| "disconnected"
|
||||
| "error";
|
||||
|
||||
export interface UseWebSocketOptions<T> {
|
||||
/** URL to connect to */
|
||||
url: string;
|
||||
/** Called when a message is received */
|
||||
onMessage?: (data: T) => void;
|
||||
/** Called when connection opens */
|
||||
onOpen?: () => void;
|
||||
/** Called when connection closes */
|
||||
onClose?: () => void;
|
||||
/** Called on error */
|
||||
onError?: (error: Event) => void;
|
||||
/** Auto-connect on mount */
|
||||
autoConnect?: boolean;
|
||||
}
|
||||
|
||||
export interface UseWebSocketReturn<T> {
|
||||
/** Current connection status */
|
||||
status: WebSocketStatus;
|
||||
/** Send JSON data */
|
||||
sendJson: (data: T) => void;
|
||||
/** Send binary data */
|
||||
sendBinary: (data: Blob | ArrayBuffer) => void;
|
||||
/** Connect to WebSocket */
|
||||
connect: () => Promise<void>;
|
||||
/** Disconnect from WebSocket */
|
||||
disconnect: () => void;
|
||||
}
|
||||
|
||||
export function useWebSocket<TReceive = unknown, TSend = unknown>({
|
||||
url,
|
||||
onMessage,
|
||||
onOpen,
|
||||
onClose,
|
||||
onError,
|
||||
autoConnect = false,
|
||||
}: UseWebSocketOptions<TReceive>): UseWebSocketReturn<TSend> {
|
||||
const [status, setStatus] = useState<WebSocketStatus>("disconnected");
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
const onMessageRef = useRef(onMessage);
|
||||
const onOpenRef = useRef(onOpen);
|
||||
const onCloseRef = useRef(onClose);
|
||||
const onErrorRef = useRef(onError);
|
||||
|
||||
// Keep refs updated
|
||||
useEffect(() => {
|
||||
onMessageRef.current = onMessage;
|
||||
onOpenRef.current = onOpen;
|
||||
onCloseRef.current = onClose;
|
||||
onErrorRef.current = onError;
|
||||
}, [onMessage, onOpen, onClose, onError]);
|
||||
|
||||
const connect = useCallback(async (): Promise<void> => {
|
||||
if (
|
||||
wsRef.current?.readyState === WebSocket.OPEN ||
|
||||
wsRef.current?.readyState === WebSocket.CONNECTING
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
setStatus("connecting");
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const ws = new WebSocket(url);
|
||||
wsRef.current = ws;
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
ws.close();
|
||||
reject(new Error("WebSocket connection timeout"));
|
||||
}, 10000);
|
||||
|
||||
ws.onopen = () => {
|
||||
clearTimeout(timeout);
|
||||
setStatus("connected");
|
||||
onOpenRef.current?.();
|
||||
resolve();
|
||||
};
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
try {
|
||||
const data = JSON.parse(event.data) as TReceive;
|
||||
onMessageRef.current?.(data);
|
||||
} catch {
|
||||
// Non-JSON message, ignore or handle differently
|
||||
}
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
clearTimeout(timeout);
|
||||
setStatus("disconnected");
|
||||
onCloseRef.current?.();
|
||||
wsRef.current = null;
|
||||
};
|
||||
|
||||
ws.onerror = (error) => {
|
||||
clearTimeout(timeout);
|
||||
setStatus("error");
|
||||
onErrorRef.current?.(error);
|
||||
reject(new Error("WebSocket connection failed"));
|
||||
};
|
||||
});
|
||||
}, [url]);
|
||||
|
||||
const disconnect = useCallback(() => {
|
||||
if (wsRef.current) {
|
||||
wsRef.current.close();
|
||||
wsRef.current = null;
|
||||
}
|
||||
setStatus("disconnected");
|
||||
}, []);
|
||||
|
||||
const sendJson = useCallback((data: TSend) => {
|
||||
if (wsRef.current?.readyState === WebSocket.OPEN) {
|
||||
wsRef.current.send(JSON.stringify(data));
|
||||
}
|
||||
}, []);
|
||||
|
||||
const sendBinary = useCallback((data: Blob | ArrayBuffer) => {
|
||||
if (wsRef.current?.readyState === WebSocket.OPEN) {
|
||||
wsRef.current.send(data);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Auto-connect if enabled
|
||||
useEffect(() => {
|
||||
if (autoConnect) {
|
||||
connect().catch(() => {
|
||||
// Error handled via onError callback
|
||||
});
|
||||
}
|
||||
return () => {
|
||||
disconnect();
|
||||
};
|
||||
}, [autoConnect, connect, disconnect]);
|
||||
|
||||
return {
|
||||
status,
|
||||
sendJson,
|
||||
sendBinary,
|
||||
connect,
|
||||
disconnect,
|
||||
};
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
const VOICE_PROVIDERS_URL = "/api/admin/voice/providers";
|
||||
|
||||
export async function activateVoiceProvider(
|
||||
providerId: number,
|
||||
mode: "stt" | "tts",
|
||||
ttsModel?: string
|
||||
): Promise<Response> {
|
||||
const url = new URL(
|
||||
`${VOICE_PROVIDERS_URL}/${providerId}/activate-${mode}`,
|
||||
window.location.origin
|
||||
);
|
||||
if (mode === "tts" && ttsModel) {
|
||||
url.searchParams.set("tts_model", ttsModel);
|
||||
}
|
||||
return fetch(url.toString(), { method: "POST" });
|
||||
}
|
||||
|
||||
export async function deactivateVoiceProvider(
|
||||
providerId: number,
|
||||
mode: "stt" | "tts"
|
||||
): Promise<Response> {
|
||||
return fetch(`${VOICE_PROVIDERS_URL}/${providerId}/deactivate-${mode}`, {
|
||||
method: "POST",
|
||||
});
|
||||
}
|
||||
|
||||
export async function testVoiceProvider(request: {
|
||||
provider_type: string;
|
||||
api_key?: string;
|
||||
target_uri?: string;
|
||||
use_stored_key?: boolean;
|
||||
}): Promise<Response> {
|
||||
return fetch(`${VOICE_PROVIDERS_URL}/test`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(request),
|
||||
});
|
||||
}
|
||||
|
||||
export async function upsertVoiceProvider(
|
||||
request: Record<string, unknown>
|
||||
): Promise<Response> {
|
||||
return fetch(VOICE_PROVIDERS_URL, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(request),
|
||||
});
|
||||
}
|
||||
|
||||
export async function fetchVoicesByType(
|
||||
providerType: string
|
||||
): Promise<Response> {
|
||||
return fetch(`/api/admin/voice/voices?provider_type=${providerType}`);
|
||||
}
|
||||
|
||||
export async function fetchLLMProviders(): Promise<Response> {
|
||||
return fetch("/api/admin/llm/provider");
|
||||
}
|
||||
@@ -151,17 +151,6 @@ export function formatMmDdYyyy(d: string): string {
|
||||
return `${date.getMonth() + 1}/${date.getDate()}/${date.getFullYear()}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a duration in seconds as MM:SS (e.g. 65 → "01:05").
|
||||
*/
|
||||
export function formatElapsedTime(totalSeconds: number): string {
|
||||
const minutes = Math.floor(totalSeconds / 60);
|
||||
const seconds = totalSeconds % 60;
|
||||
return `${minutes.toString().padStart(2, "0")}:${seconds
|
||||
.toString()
|
||||
.padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
export const getFormattedDateTime = (date: Date | null) => {
|
||||
if (!date) return null;
|
||||
|
||||
|
||||
@@ -1,614 +0,0 @@
|
||||
/**
|
||||
* Real-time streaming TTS using HTTP streaming with MediaSource Extensions.
|
||||
* Plays audio chunks as they arrive for smooth, low-latency playback.
|
||||
*/
|
||||
|
||||
/**
|
||||
* HTTPStreamingTTSPlayer - Uses HTTP streaming with MediaSource Extensions
|
||||
* for smooth, gapless audio playback. This is the recommended approach for
|
||||
* real-time TTS as it properly handles MP3 frame boundaries.
|
||||
*/
|
||||
export class HTTPStreamingTTSPlayer {
|
||||
private mediaSource: MediaSource | null = null;
|
||||
private mediaSourceUrl: string | null = null;
|
||||
private sourceBuffer: SourceBuffer | null = null;
|
||||
private audioElement: HTMLAudioElement | null = null;
|
||||
private pendingChunks: Uint8Array[] = [];
|
||||
private isAppending: boolean = false;
|
||||
private isPlaying: boolean = false;
|
||||
private streamComplete: boolean = false;
|
||||
private onPlayingChange?: (playing: boolean) => void;
|
||||
private onError?: (error: string) => void;
|
||||
private abortController: AbortController | null = null;
|
||||
private isMuted: boolean = false;
|
||||
|
||||
constructor(options?: {
|
||||
onPlayingChange?: (playing: boolean) => void;
|
||||
onError?: (error: string) => void;
|
||||
}) {
|
||||
this.onPlayingChange = options?.onPlayingChange;
|
||||
this.onError = options?.onError;
|
||||
}
|
||||
|
||||
private getAPIUrl(): string {
|
||||
// Always go through the frontend proxy to ensure cookies are sent correctly
|
||||
// The Next.js proxy at /api/* forwards to the backend
|
||||
return "/api/voice/synthesize";
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak text using HTTP streaming with real-time playback.
|
||||
* Audio begins playing as soon as the first chunks arrive.
|
||||
*/
|
||||
async speak(
|
||||
text: string,
|
||||
voice?: string,
|
||||
speed: number = 1.0
|
||||
): Promise<void> {
|
||||
// Cleanup any previous playback
|
||||
this.cleanup();
|
||||
|
||||
// Create abort controller for this request
|
||||
this.abortController = new AbortController();
|
||||
|
||||
// Build URL with query params
|
||||
const params = new URLSearchParams();
|
||||
params.set("text", text);
|
||||
if (voice) params.set("voice", voice);
|
||||
params.set("speed", speed.toString());
|
||||
|
||||
const url = `${this.getAPIUrl()}?${params}`;
|
||||
|
||||
// Check if MediaSource is supported
|
||||
if (!window.MediaSource || !MediaSource.isTypeSupported("audio/mpeg")) {
|
||||
// Fallback to simple buffered playback
|
||||
return this.fallbackSpeak(url);
|
||||
}
|
||||
|
||||
// Create MediaSource and audio element
|
||||
this.mediaSource = new MediaSource();
|
||||
this.audioElement = new Audio();
|
||||
this.mediaSourceUrl = URL.createObjectURL(this.mediaSource);
|
||||
this.audioElement.src = this.mediaSourceUrl;
|
||||
this.audioElement.muted = this.isMuted;
|
||||
|
||||
// Set up audio element event handlers
|
||||
this.audioElement.onplay = () => {
|
||||
if (!this.isPlaying) {
|
||||
this.isPlaying = true;
|
||||
this.onPlayingChange?.(true);
|
||||
}
|
||||
};
|
||||
|
||||
this.audioElement.onended = () => {
|
||||
this.isPlaying = false;
|
||||
this.onPlayingChange?.(false);
|
||||
};
|
||||
|
||||
this.audioElement.onerror = () => {
|
||||
this.onError?.("Audio playback error");
|
||||
this.isPlaying = false;
|
||||
this.onPlayingChange?.(false);
|
||||
};
|
||||
|
||||
// Wait for MediaSource to be ready
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
if (!this.mediaSource) {
|
||||
reject(new Error("MediaSource not initialized"));
|
||||
return;
|
||||
}
|
||||
|
||||
this.mediaSource.onsourceopen = () => {
|
||||
try {
|
||||
// Create SourceBuffer for MP3
|
||||
this.sourceBuffer = this.mediaSource!.addSourceBuffer("audio/mpeg");
|
||||
this.sourceBuffer.mode = "sequence";
|
||||
|
||||
this.sourceBuffer.onupdateend = () => {
|
||||
this.isAppending = false;
|
||||
this.processNextChunk();
|
||||
};
|
||||
|
||||
resolve();
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
};
|
||||
|
||||
// MediaSource doesn't have onerror in all browsers, use onsourceclose as fallback
|
||||
this.mediaSource.onsourceclose = () => {
|
||||
if (this.mediaSource?.readyState === "closed") {
|
||||
reject(new Error("MediaSource closed unexpectedly"));
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
// Start fetching and streaming audio
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
signal: this.abortController.signal,
|
||||
credentials: "include", // Include cookies for authentication
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(
|
||||
`TTS request failed: ${response.status} - ${errorText}`
|
||||
);
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
throw new Error("No response body");
|
||||
}
|
||||
|
||||
// Start playback as soon as we have some data
|
||||
let firstChunk = true;
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
|
||||
if (done) {
|
||||
this.streamComplete = true;
|
||||
// End the stream when all chunks are appended
|
||||
this.finalizeStream();
|
||||
break;
|
||||
}
|
||||
|
||||
if (value) {
|
||||
this.pendingChunks.push(value);
|
||||
this.processNextChunk();
|
||||
|
||||
// Start playback after first chunk
|
||||
if (firstChunk && this.audioElement) {
|
||||
firstChunk = false;
|
||||
// Small delay to buffer a bit before starting
|
||||
setTimeout(() => {
|
||||
this.audioElement?.play().catch(() => {
|
||||
// Ignore playback start errors
|
||||
});
|
||||
}, 100);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
return;
|
||||
}
|
||||
this.onError?.(err instanceof Error ? err.message : "TTS error");
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process next chunk from the queue.
|
||||
*/
|
||||
private processNextChunk(): void {
|
||||
if (
|
||||
this.isAppending ||
|
||||
this.pendingChunks.length === 0 ||
|
||||
!this.sourceBuffer ||
|
||||
this.sourceBuffer.updating
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
const chunk = this.pendingChunks.shift();
|
||||
if (chunk) {
|
||||
this.isAppending = true;
|
||||
try {
|
||||
// Use ArrayBuffer directly for better TypeScript compatibility
|
||||
const buffer = chunk.buffer.slice(
|
||||
chunk.byteOffset,
|
||||
chunk.byteOffset + chunk.byteLength
|
||||
) as ArrayBuffer;
|
||||
this.sourceBuffer.appendBuffer(buffer);
|
||||
} catch {
|
||||
this.isAppending = false;
|
||||
// Try next chunk
|
||||
this.processNextChunk();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize the stream when all data has been received.
|
||||
*/
|
||||
private finalizeStream(): void {
|
||||
if (this.pendingChunks.length > 0 || this.isAppending) {
|
||||
// Wait for remaining chunks to be appended
|
||||
setTimeout(() => this.finalizeStream(), 50);
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
this.mediaSource &&
|
||||
this.mediaSource.readyState === "open" &&
|
||||
this.sourceBuffer &&
|
||||
!this.sourceBuffer.updating
|
||||
) {
|
||||
try {
|
||||
this.mediaSource.endOfStream();
|
||||
} catch {
|
||||
// Ignore errors when ending stream
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback for browsers that don't support MediaSource Extensions.
|
||||
* Buffers all audio before playing.
|
||||
*/
|
||||
private async fallbackSpeak(url: string): Promise<void> {
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
signal: this.abortController?.signal,
|
||||
credentials: "include", // Include cookies for authentication
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`TTS request failed: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const audioData = await response.arrayBuffer();
|
||||
|
||||
const blob = new Blob([audioData], { type: "audio/mpeg" });
|
||||
const audioUrl = URL.createObjectURL(blob);
|
||||
|
||||
this.audioElement = new Audio(audioUrl);
|
||||
this.audioElement.muted = this.isMuted;
|
||||
|
||||
this.audioElement.onplay = () => {
|
||||
this.isPlaying = true;
|
||||
this.onPlayingChange?.(true);
|
||||
};
|
||||
|
||||
this.audioElement.onended = () => {
|
||||
this.isPlaying = false;
|
||||
this.onPlayingChange?.(false);
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
};
|
||||
|
||||
this.audioElement.onerror = () => {
|
||||
this.onError?.("Audio playback error");
|
||||
};
|
||||
|
||||
await this.audioElement.play();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop playback and cleanup resources.
|
||||
*/
|
||||
stop(): void {
|
||||
// Abort any ongoing request
|
||||
if (this.abortController) {
|
||||
this.abortController.abort();
|
||||
this.abortController = null;
|
||||
}
|
||||
|
||||
this.cleanup();
|
||||
}
|
||||
|
||||
setMuted(muted: boolean): void {
|
||||
this.isMuted = muted;
|
||||
if (this.audioElement) {
|
||||
this.audioElement.muted = muted;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup all resources.
|
||||
*/
|
||||
private cleanup(): void {
|
||||
// Revoke Object URL to prevent memory leak
|
||||
if (this.mediaSourceUrl) {
|
||||
URL.revokeObjectURL(this.mediaSourceUrl);
|
||||
this.mediaSourceUrl = null;
|
||||
}
|
||||
|
||||
// Stop and cleanup audio element
|
||||
if (this.audioElement) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.src = "";
|
||||
this.audioElement = null;
|
||||
}
|
||||
|
||||
// Cleanup MediaSource
|
||||
if (this.mediaSource && this.mediaSource.readyState === "open") {
|
||||
try {
|
||||
if (this.sourceBuffer) {
|
||||
this.mediaSource.removeSourceBuffer(this.sourceBuffer);
|
||||
}
|
||||
this.mediaSource.endOfStream();
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
|
||||
this.mediaSource = null;
|
||||
this.sourceBuffer = null;
|
||||
this.pendingChunks = [];
|
||||
this.isAppending = false;
|
||||
this.streamComplete = false;
|
||||
|
||||
if (this.isPlaying) {
|
||||
this.isPlaying = false;
|
||||
this.onPlayingChange?.(false);
|
||||
}
|
||||
}
|
||||
|
||||
get playing(): boolean {
|
||||
return this.isPlaying;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* WebSocketStreamingTTSPlayer - Uses WebSocket for bidirectional streaming.
|
||||
* Useful for scenarios where you want to stream text in and get audio out
|
||||
* incrementally (e.g., as LLM generates text).
|
||||
*/
|
||||
export class WebSocketStreamingTTSPlayer {
|
||||
private websocket: WebSocket | null = null;
|
||||
private mediaSource: MediaSource | null = null;
|
||||
private mediaSourceUrl: string | null = null;
|
||||
private sourceBuffer: SourceBuffer | null = null;
|
||||
private audioElement: HTMLAudioElement | null = null;
|
||||
private pendingChunks: Uint8Array[] = [];
|
||||
private isAppending: boolean = false;
|
||||
private isPlaying: boolean = false;
|
||||
private onPlayingChange?: (playing: boolean) => void;
|
||||
private onError?: (error: string) => void;
|
||||
private hasStartedPlayback: boolean = false;
|
||||
|
||||
constructor(options?: {
|
||||
onPlayingChange?: (playing: boolean) => void;
|
||||
onError?: (error: string) => void;
|
||||
}) {
|
||||
this.onPlayingChange = options?.onPlayingChange;
|
||||
this.onError = options?.onError;
|
||||
}
|
||||
|
||||
private async getWebSocketUrl(): Promise<string> {
|
||||
// Fetch short-lived WS token
|
||||
const tokenResponse = await fetch("/api/voice/ws-token", {
|
||||
method: "POST",
|
||||
credentials: "include",
|
||||
});
|
||||
if (!tokenResponse.ok) {
|
||||
throw new Error("Failed to get WebSocket authentication token");
|
||||
}
|
||||
const { token } = await tokenResponse.json();
|
||||
|
||||
const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
|
||||
const isDev = window.location.port === "3000";
|
||||
const host = isDev ? "localhost:8080" : window.location.host;
|
||||
const path = isDev
|
||||
? "/voice/synthesize/stream"
|
||||
: "/api/voice/synthesize/stream";
|
||||
return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;
|
||||
}
|
||||
|
||||
async connect(voice?: string, speed?: number): Promise<void> {
|
||||
// Cleanup any previous connection
|
||||
this.cleanup();
|
||||
|
||||
// Check MediaSource support
|
||||
if (!window.MediaSource || !MediaSource.isTypeSupported("audio/mpeg")) {
|
||||
throw new Error("MediaSource Extensions not supported");
|
||||
}
|
||||
|
||||
// Create MediaSource and audio element
|
||||
this.mediaSource = new MediaSource();
|
||||
this.audioElement = new Audio();
|
||||
this.mediaSourceUrl = URL.createObjectURL(this.mediaSource);
|
||||
this.audioElement.src = this.mediaSourceUrl;
|
||||
|
||||
this.audioElement.onplay = () => {
|
||||
if (!this.isPlaying) {
|
||||
this.isPlaying = true;
|
||||
this.onPlayingChange?.(true);
|
||||
}
|
||||
};
|
||||
|
||||
this.audioElement.onended = () => {
|
||||
this.isPlaying = false;
|
||||
this.onPlayingChange?.(false);
|
||||
};
|
||||
|
||||
// Wait for MediaSource to be ready
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
this.mediaSource!.onsourceopen = () => {
|
||||
try {
|
||||
this.sourceBuffer = this.mediaSource!.addSourceBuffer("audio/mpeg");
|
||||
this.sourceBuffer.mode = "sequence";
|
||||
this.sourceBuffer.onupdateend = () => {
|
||||
this.isAppending = false;
|
||||
this.processNextChunk();
|
||||
};
|
||||
resolve();
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
// Connect WebSocket
|
||||
const url = await this.getWebSocketUrl();
|
||||
return new Promise((resolve, reject) => {
|
||||
this.websocket = new WebSocket(url);
|
||||
|
||||
this.websocket.onopen = () => {
|
||||
// Send initial config
|
||||
this.websocket?.send(
|
||||
JSON.stringify({
|
||||
type: "config",
|
||||
voice: voice,
|
||||
speed: speed || 1.0,
|
||||
})
|
||||
);
|
||||
resolve();
|
||||
};
|
||||
|
||||
this.websocket.onerror = () => {
|
||||
reject(new Error("WebSocket connection failed"));
|
||||
};
|
||||
|
||||
this.websocket.onmessage = async (event) => {
|
||||
if (event.data instanceof Blob) {
|
||||
// Audio chunk received
|
||||
const arrayBuffer = await event.data.arrayBuffer();
|
||||
this.pendingChunks.push(new Uint8Array(arrayBuffer));
|
||||
this.processNextChunk();
|
||||
|
||||
// Start playback after first chunk
|
||||
if (!this.hasStartedPlayback && this.audioElement) {
|
||||
this.hasStartedPlayback = true;
|
||||
setTimeout(() => {
|
||||
this.audioElement?.play().catch(() => {
|
||||
// Ignore playback errors
|
||||
});
|
||||
}, 100);
|
||||
}
|
||||
} else {
|
||||
// JSON message
|
||||
try {
|
||||
const data = JSON.parse(event.data);
|
||||
if (data.type === "audio_done") {
|
||||
this.finalizeStream();
|
||||
} else if (data.type === "error") {
|
||||
this.onError?.(data.message);
|
||||
}
|
||||
} catch {
|
||||
// Ignore parse errors
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
this.websocket.onclose = () => {
|
||||
this.finalizeStream();
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private processNextChunk(): void {
|
||||
if (
|
||||
this.isAppending ||
|
||||
this.pendingChunks.length === 0 ||
|
||||
!this.sourceBuffer ||
|
||||
this.sourceBuffer.updating
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
const chunk = this.pendingChunks.shift();
|
||||
if (chunk) {
|
||||
this.isAppending = true;
|
||||
try {
|
||||
// Use ArrayBuffer directly for better TypeScript compatibility
|
||||
const buffer = chunk.buffer.slice(
|
||||
chunk.byteOffset,
|
||||
chunk.byteOffset + chunk.byteLength
|
||||
) as ArrayBuffer;
|
||||
this.sourceBuffer.appendBuffer(buffer);
|
||||
} catch {
|
||||
this.isAppending = false;
|
||||
this.processNextChunk();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private finalizeStream(): void {
|
||||
if (this.pendingChunks.length > 0 || this.isAppending) {
|
||||
setTimeout(() => this.finalizeStream(), 50);
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
this.mediaSource &&
|
||||
this.mediaSource.readyState === "open" &&
|
||||
this.sourceBuffer &&
|
||||
!this.sourceBuffer.updating
|
||||
) {
|
||||
try {
|
||||
this.mediaSource.endOfStream();
|
||||
} catch {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async speak(text: string): Promise<void> {
|
||||
if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) {
|
||||
throw new Error("WebSocket not connected");
|
||||
}
|
||||
|
||||
this.websocket.send(
|
||||
JSON.stringify({
|
||||
type: "synthesize",
|
||||
text: text,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
this.cleanup();
|
||||
}
|
||||
|
||||
disconnect(): void {
|
||||
if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
|
||||
this.websocket.send(JSON.stringify({ type: "end" }));
|
||||
this.websocket.close();
|
||||
}
|
||||
this.cleanup();
|
||||
}
|
||||
|
||||
private cleanup(): void {
|
||||
if (this.websocket) {
|
||||
this.websocket.close();
|
||||
this.websocket = null;
|
||||
}
|
||||
|
||||
// Revoke Object URL to prevent memory leak
|
||||
if (this.mediaSourceUrl) {
|
||||
URL.revokeObjectURL(this.mediaSourceUrl);
|
||||
this.mediaSourceUrl = null;
|
||||
}
|
||||
|
||||
if (this.audioElement) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.src = "";
|
||||
this.audioElement = null;
|
||||
}
|
||||
|
||||
if (this.mediaSource && this.mediaSource.readyState === "open") {
|
||||
try {
|
||||
if (this.sourceBuffer) {
|
||||
this.mediaSource.removeSourceBuffer(this.sourceBuffer);
|
||||
}
|
||||
this.mediaSource.endOfStream();
|
||||
} catch {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
this.mediaSource = null;
|
||||
this.sourceBuffer = null;
|
||||
this.pendingChunks = [];
|
||||
this.isAppending = false;
|
||||
this.hasStartedPlayback = false;
|
||||
|
||||
if (this.isPlaying) {
|
||||
this.isPlaying = false;
|
||||
this.onPlayingChange?.(false);
|
||||
}
|
||||
}
|
||||
|
||||
get playing(): boolean {
|
||||
return this.isPlaying;
|
||||
}
|
||||
}
|
||||
|
||||
// Export the HTTP player as the default/recommended option
|
||||
export { HTTPStreamingTTSPlayer as StreamingTTSPlayer };
|
||||
@@ -32,10 +32,6 @@ interface UserPreferences {
|
||||
theme_preference: ThemePreference | null;
|
||||
chat_background: string | null;
|
||||
default_app_mode: "AUTO" | "CHAT" | "SEARCH";
|
||||
// Voice preferences
|
||||
voice_auto_send?: boolean;
|
||||
voice_auto_playback?: boolean;
|
||||
voice_playback_speed?: number;
|
||||
}
|
||||
|
||||
export interface MemoryItem {
|
||||
@@ -72,6 +68,20 @@ export const USER_ROLE_LABELS: Record<UserRole, string> = {
|
||||
[UserRole.SLACK_USER]: "Slack User",
|
||||
};
|
||||
|
||||
export enum UserStatus {
|
||||
ACTIVE = "active",
|
||||
INACTIVE = "inactive",
|
||||
INVITED = "invited",
|
||||
REQUESTED = "requested",
|
||||
}
|
||||
|
||||
export const USER_STATUS_LABELS: Record<UserStatus, string> = {
|
||||
[UserStatus.ACTIVE]: "Active",
|
||||
[UserStatus.INACTIVE]: "Inactive",
|
||||
[UserStatus.INVITED]: "Invite Pending",
|
||||
[UserStatus.REQUESTED]: "Requested",
|
||||
};
|
||||
|
||||
export const INVALID_ROLE_HOVER_TEXT: Partial<Record<UserRole, string>> = {
|
||||
[UserRole.BASIC]: "Basic users can't perform any admin actions",
|
||||
[UserRole.ADMIN]: "Admin users can perform all admin actions",
|
||||
|
||||
@@ -46,11 +46,6 @@ interface UserContextType {
|
||||
updateUserChatBackground: (chatBackground: string | null) => Promise<void>;
|
||||
updateUserDefaultModel: (defaultModel: string | null) => Promise<void>;
|
||||
updateUserDefaultAppMode: (mode: "CHAT" | "SEARCH") => Promise<void>;
|
||||
updateUserVoiceSettings: (settings: {
|
||||
auto_send?: boolean;
|
||||
auto_playback?: boolean;
|
||||
playback_speed?: number;
|
||||
}) => Promise<void>;
|
||||
}
|
||||
|
||||
const UserContext = createContext<UserContextType | undefined>(undefined);
|
||||
@@ -465,50 +460,6 @@ export function UserProvider({
|
||||
}
|
||||
};
|
||||
|
||||
const updateUserVoiceSettings = async (settings: {
|
||||
auto_send?: boolean;
|
||||
auto_playback?: boolean;
|
||||
playback_speed?: number;
|
||||
}) => {
|
||||
try {
|
||||
setUpToDateUser((prevUser) => {
|
||||
if (prevUser) {
|
||||
return {
|
||||
...prevUser,
|
||||
preferences: {
|
||||
...prevUser.preferences,
|
||||
voice_auto_send:
|
||||
settings.auto_send ?? prevUser.preferences.voice_auto_send,
|
||||
voice_auto_playback:
|
||||
settings.auto_playback ??
|
||||
prevUser.preferences.voice_auto_playback,
|
||||
voice_playback_speed:
|
||||
settings.playback_speed ??
|
||||
prevUser.preferences.voice_playback_speed,
|
||||
},
|
||||
};
|
||||
}
|
||||
return prevUser;
|
||||
});
|
||||
|
||||
const response = await fetch("/api/voice/settings", {
|
||||
method: "PATCH",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(settings),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
await refreshUser();
|
||||
throw new Error("Failed to update voice settings");
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error updating voice settings:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
const refreshUser = async () => {
|
||||
await fetchUser();
|
||||
};
|
||||
@@ -527,7 +478,6 @@ export function UserProvider({
|
||||
updateUserChatBackground,
|
||||
updateUserDefaultModel,
|
||||
updateUserDefaultAppMode,
|
||||
updateUserVoiceSettings,
|
||||
toggleAgentPinnedStatus,
|
||||
isAdmin: upToDateUser?.role === UserRole.ADMIN,
|
||||
// Curator status applies for either global or basic curator
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -210,15 +210,10 @@ describe("InputComboBox", () => {
|
||||
|
||||
await user.type(input, "app");
|
||||
|
||||
// In non-strict mode, searching shows:
|
||||
// 1) a create option for the current input and
|
||||
// 2) matched options.
|
||||
// Search should only show matching options by default
|
||||
const options = screen.getAllByRole("option");
|
||||
expect(options.length).toBe(2);
|
||||
expect(screen.getByLabelText('Create "app"')).toBeInTheDocument();
|
||||
expect(
|
||||
options.some((option) => option.textContent?.includes("Apple"))
|
||||
).toBe(true);
|
||||
expect(options.length).toBe(1);
|
||||
expect(options[0]!.textContent).toBe("Apple");
|
||||
expect(screen.queryByText("Banana")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
|
||||
@@ -130,7 +130,6 @@ const InputComboBox = ({
|
||||
leftSearchIcon = false,
|
||||
rightSection,
|
||||
separatorLabel = "Other options",
|
||||
showAddPrefix = false,
|
||||
showOtherOptions = false,
|
||||
...rest
|
||||
}: WithoutStyles<InputComboBoxProps>) => {
|
||||
@@ -158,11 +157,14 @@ const InputComboBox = ({
|
||||
const visibleUnmatchedOptions =
|
||||
hasSearchTerm && showOtherOptions ? unmatchedOptions : [];
|
||||
|
||||
// Whether to show the create option (always show when typing in non-strict mode)
|
||||
const showCreateOption = !strict && hasSearchTerm && inputValue.trim() !== "";
|
||||
// Whether to show the create option (only when no partial matches)
|
||||
const showCreateOption =
|
||||
!strict &&
|
||||
hasSearchTerm &&
|
||||
inputValue.trim() !== "" &&
|
||||
matchedOptions.length === 0;
|
||||
|
||||
// Combined list for keyboard navigation (includes create option when shown)
|
||||
// Only show matched options when searching (hide unmatched)
|
||||
const allVisibleOptions = useMemo(() => {
|
||||
const baseOptions = [...matchedOptions, ...visibleUnmatchedOptions];
|
||||
if (showCreateOption) {
|
||||
@@ -448,7 +450,6 @@ const InputComboBox = ({
|
||||
inputValue={inputValue}
|
||||
allowCreate={!strict}
|
||||
showCreateOption={showCreateOption}
|
||||
showAddPrefix={showAddPrefix}
|
||||
/>
|
||||
</>
|
||||
|
||||
|
||||
@@ -27,8 +27,6 @@ interface ComboBoxDropdownProps {
|
||||
allowCreate: boolean;
|
||||
/** Whether to show create option (pre-computed by parent) */
|
||||
showCreateOption: boolean;
|
||||
/** Show "Add" prefix in create option */
|
||||
showAddPrefix: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -60,7 +58,6 @@ export const ComboBoxDropdown = forwardRef<
|
||||
inputValue,
|
||||
allowCreate,
|
||||
showCreateOption,
|
||||
showAddPrefix,
|
||||
},
|
||||
ref
|
||||
) => {
|
||||
@@ -135,7 +132,6 @@ export const ComboBoxDropdown = forwardRef<
|
||||
inputValue={inputValue}
|
||||
allowCreate={allowCreate}
|
||||
showCreateOption={showCreateOption}
|
||||
showAddPrefix={showAddPrefix}
|
||||
/>
|
||||
</div>,
|
||||
document.body
|
||||
|
||||
@@ -24,8 +24,6 @@ interface OptionsListProps {
|
||||
allowCreate: boolean;
|
||||
/** Whether to show create option (pre-computed by parent) */
|
||||
showCreateOption: boolean;
|
||||
/** Show "Add" prefix in create option */
|
||||
showAddPrefix: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -47,7 +45,6 @@ export const OptionsList: React.FC<OptionsListProps> = ({
|
||||
inputValue,
|
||||
allowCreate,
|
||||
showCreateOption,
|
||||
showAddPrefix,
|
||||
}) => {
|
||||
// Index offset for other options when create option is shown
|
||||
const indexOffset = showCreateOption ? 1 : 0;
|
||||
@@ -73,7 +70,7 @@ export const OptionsList: React.FC<OptionsListProps> = ({
|
||||
data-index={0}
|
||||
role="option"
|
||||
aria-selected={false}
|
||||
aria-label={`${showAddPrefix ? "Add" : "Create"} "${inputValue}"`}
|
||||
aria-label={`Create "${inputValue}"`}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onSelect({ value: inputValue, label: inputValue });
|
||||
@@ -84,48 +81,19 @@ export const OptionsList: React.FC<OptionsListProps> = ({
|
||||
onMouseEnter={() => onMouseEnter(0)}
|
||||
onMouseMove={onMouseMove}
|
||||
className={cn(
|
||||
"cursor-pointer transition-colors",
|
||||
"px-3 py-2 cursor-pointer transition-colors",
|
||||
"flex items-center justify-between rounded-08",
|
||||
highlightedIndex === 0 && "bg-background-tint-02",
|
||||
"hover:bg-background-tint-02",
|
||||
showAddPrefix ? "px-1.5 py-1.5" : "px-3 py-2"
|
||||
"hover:bg-background-tint-02"
|
||||
)}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
"font-main-ui-action truncate min-w-0",
|
||||
showAddPrefix ? "px-1" : ""
|
||||
)}
|
||||
>
|
||||
{showAddPrefix ? (
|
||||
<>
|
||||
<span className="text-text-03">Add</span>
|
||||
<span className="text-text-04">{` ${inputValue}`}</span>
|
||||
</>
|
||||
) : (
|
||||
<span className="text-text-04">{inputValue}</span>
|
||||
)}
|
||||
<span className="font-main-ui-action text-text-04 truncate min-w-0">
|
||||
{inputValue}
|
||||
</span>
|
||||
<SvgPlus
|
||||
className={cn(
|
||||
"w-4 h-4 flex-shrink-0",
|
||||
showAddPrefix ? "text-text-04 mx-1" : "text-text-03 ml-2"
|
||||
)}
|
||||
/>
|
||||
<SvgPlus className="w-4 h-4 text-text-03 flex-shrink-0 ml-2" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Separator - show when there are options to display */}
|
||||
{separatorLabel &&
|
||||
(matchedOptions.length > 0 ||
|
||||
(!hasSearchTerm && unmatchedOptions.length > 0)) && (
|
||||
<div className="px-3 py-1">
|
||||
<Text as="p" text03 secondaryBody>
|
||||
{separatorLabel}
|
||||
</Text>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Matched/Filtered Options */}
|
||||
{matchedOptions.map((option, idx) => {
|
||||
const globalIndex = idx + indexOffset;
|
||||
@@ -148,27 +116,37 @@ export const OptionsList: React.FC<OptionsListProps> = ({
|
||||
);
|
||||
})}
|
||||
|
||||
{/* Unmatched Options - only show when NOT searching */}
|
||||
{!hasSearchTerm &&
|
||||
unmatchedOptions.map((option, idx) => {
|
||||
const globalIndex = matchedOptions.length + idx + indexOffset;
|
||||
const isExact = isExactMatch(option);
|
||||
return (
|
||||
<OptionItem
|
||||
key={option.value}
|
||||
option={option}
|
||||
index={globalIndex}
|
||||
fieldId={fieldId}
|
||||
isHighlighted={globalIndex === highlightedIndex}
|
||||
isSelected={value === option.value}
|
||||
isExact={isExact}
|
||||
onSelect={onSelect}
|
||||
onMouseEnter={onMouseEnter}
|
||||
onMouseMove={onMouseMove}
|
||||
searchTerm={inputValue}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
{/* Separator - only show if there are unmatched options and a search term */}
|
||||
{hasSearchTerm && unmatchedOptions.length > 0 && (
|
||||
<div className="px-3 py-2 pt-3">
|
||||
<div className="border-t border-border-01 pt-2">
|
||||
<Text as="p" text04 secondaryBody className="text-text-02">
|
||||
{separatorLabel}
|
||||
</Text>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Unmatched Options */}
|
||||
{unmatchedOptions.map((option, idx) => {
|
||||
const globalIndex = matchedOptions.length + idx + indexOffset;
|
||||
const isExact = isExactMatch(option);
|
||||
return (
|
||||
<OptionItem
|
||||
key={option.value}
|
||||
option={option}
|
||||
index={globalIndex}
|
||||
fieldId={fieldId}
|
||||
isHighlighted={globalIndex === highlightedIndex}
|
||||
isSelected={value === option.value}
|
||||
isExact={isExact}
|
||||
onSelect={onSelect}
|
||||
onMouseEnter={onMouseEnter}
|
||||
onMouseMove={onMouseMove}
|
||||
searchTerm={inputValue}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useState, useEffect, useCallback, useMemo, useRef } from "react";
|
||||
import { useState, useEffect, useCallback, useMemo, RefObject } from "react";
|
||||
import { ComboBoxOption } from "./types";
|
||||
|
||||
// =============================================================================
|
||||
@@ -19,7 +19,6 @@ export function useComboBoxState({ value, options }: UseComboBoxStateProps) {
|
||||
const [inputValue, setInputValue] = useState(value);
|
||||
const [highlightedIndex, setHighlightedIndex] = useState(-1);
|
||||
const [isKeyboardNav, setIsKeyboardNav] = useState(false);
|
||||
const prevIsOpenRef = useRef(false);
|
||||
|
||||
// Sync inputValue with the external value prop.
|
||||
// When the dropdown is closed, always reflect the controlled value.
|
||||
|
||||
@@ -40,8 +40,6 @@ export interface InputComboBoxProps
|
||||
rightSection?: React.ReactNode;
|
||||
/** Label for the separator between matched and unmatched options */
|
||||
separatorLabel?: string;
|
||||
/** Show "Add" prefix in create option (e.g., "Add [value]") */
|
||||
showAddPrefix?: boolean;
|
||||
/**
|
||||
* When true, keep non-matching options visible under a separator while searching.
|
||||
* Defaults to false so search results are strictly filtered.
|
||||
|
||||
@@ -114,11 +114,16 @@ function TableQualifier({
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center justify-center rounded-full bg-text-05",
|
||||
"flex items-center justify-center rounded-full bg-background-neutral-inverted-00",
|
||||
resolvedSize === "regular" ? "h-7 w-7" : "h-6 w-6"
|
||||
)}
|
||||
>
|
||||
<Text secondaryAction textLight05 className="select-none uppercase">
|
||||
<Text
|
||||
inverted
|
||||
secondaryAction
|
||||
text05
|
||||
className="select-none uppercase"
|
||||
>
|
||||
{initials}
|
||||
</Text>
|
||||
</div>
|
||||
|
||||
@@ -751,7 +751,6 @@ function ChatPreferencesSettings() {
|
||||
updateUserShortcuts,
|
||||
updateUserDefaultModel,
|
||||
updateUserDefaultAppMode,
|
||||
updateUserVoiceSettings,
|
||||
} = useUser();
|
||||
const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();
|
||||
const settings = useSettingsContext();
|
||||
@@ -768,43 +767,6 @@ function ChatPreferencesSettings() {
|
||||
onSuccess: () => toast.success("Preferences saved"),
|
||||
onError: () => toast.error("Failed to save preferences"),
|
||||
});
|
||||
const [draftVoicePlaybackSpeed, setDraftVoicePlaybackSpeed] = useState(
|
||||
user?.preferences.voice_playback_speed ?? 1
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
setDraftVoicePlaybackSpeed(user?.preferences.voice_playback_speed ?? 1);
|
||||
}, [user?.preferences.voice_playback_speed]);
|
||||
|
||||
const saveVoiceSettings = useCallback(
|
||||
async (settings: {
|
||||
auto_send?: boolean;
|
||||
auto_playback?: boolean;
|
||||
playback_speed?: number;
|
||||
}) => {
|
||||
try {
|
||||
await updateUserVoiceSettings(settings);
|
||||
toast.success("Preferences saved");
|
||||
} catch {
|
||||
toast.error("Failed to save preferences");
|
||||
}
|
||||
},
|
||||
[updateUserVoiceSettings]
|
||||
);
|
||||
|
||||
const commitVoicePlaybackSpeed = useCallback(() => {
|
||||
const currentSpeed = user?.preferences.voice_playback_speed ?? 1;
|
||||
if (Math.abs(currentSpeed - draftVoicePlaybackSpeed) < 0.001) {
|
||||
return;
|
||||
}
|
||||
void saveVoiceSettings({
|
||||
playback_speed: draftVoicePlaybackSpeed,
|
||||
});
|
||||
}, [
|
||||
draftVoicePlaybackSpeed,
|
||||
saveVoiceSettings,
|
||||
user?.preferences.voice_playback_speed,
|
||||
]);
|
||||
|
||||
// Wrapper to save memories and return success/failure
|
||||
const handleSaveMemories = useCallback(
|
||||
@@ -974,69 +936,6 @@ function ChatPreferencesSettings() {
|
||||
{user?.preferences?.shortcut_enabled && <PromptShortcuts />}
|
||||
</Card>
|
||||
</Section>
|
||||
|
||||
<Section gap={0.75}>
|
||||
<Content
|
||||
title="Voice"
|
||||
sizePreset="main-content"
|
||||
variant="section"
|
||||
widthVariant="full"
|
||||
/>
|
||||
<Card>
|
||||
<InputLayouts.Horizontal
|
||||
title="Auto-Send"
|
||||
description="Automatically send voice input when recording stops."
|
||||
>
|
||||
<Switch
|
||||
checked={user?.preferences.voice_auto_send ?? false}
|
||||
onCheckedChange={(checked) => {
|
||||
void saveVoiceSettings({ auto_send: checked });
|
||||
}}
|
||||
/>
|
||||
</InputLayouts.Horizontal>
|
||||
|
||||
<InputLayouts.Horizontal
|
||||
title="Auto-Playback"
|
||||
description="Automatically play voice responses."
|
||||
>
|
||||
<Switch
|
||||
checked={user?.preferences.voice_auto_playback ?? false}
|
||||
onCheckedChange={(checked) => {
|
||||
void saveVoiceSettings({ auto_playback: checked });
|
||||
}}
|
||||
/>
|
||||
</InputLayouts.Horizontal>
|
||||
|
||||
<InputLayouts.Horizontal
|
||||
title="Playback Speed"
|
||||
description="Adjust the speed of voice playback."
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<input
|
||||
type="range"
|
||||
min="0.5"
|
||||
max="2"
|
||||
step="0.1"
|
||||
value={draftVoicePlaybackSpeed}
|
||||
onChange={(e) => {
|
||||
setDraftVoicePlaybackSpeed(parseFloat(e.target.value));
|
||||
}}
|
||||
onMouseUp={commitVoicePlaybackSpeed}
|
||||
onTouchEnd={commitVoicePlaybackSpeed}
|
||||
onKeyUp={(e) => {
|
||||
if (e.key === "ArrowLeft" || e.key === "ArrowRight") {
|
||||
commitVoicePlaybackSpeed();
|
||||
}
|
||||
}}
|
||||
className="w-24 h-2 rounded-lg appearance-none cursor-pointer bg-background-neutral-02"
|
||||
/>
|
||||
<span className="text-sm text-text-02 w-10">
|
||||
{draftVoicePlaybackSpeed.toFixed(1)}x
|
||||
</span>
|
||||
</div>
|
||||
</InputLayouts.Horizontal>
|
||||
</Card>
|
||||
</Section>
|
||||
</Section>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { SvgUser, SvgUserPlus } from "@opal/icons";
|
||||
import { Button } from "@opal/components";
|
||||
import * as SettingsLayouts from "@/layouts/settings-layouts";
|
||||
import { useScimToken } from "@/hooks/useScimToken";
|
||||
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
|
||||
import useUserCounts from "@/hooks/useUserCounts";
|
||||
import { UserStatus } from "@/lib/types";
|
||||
import type { StatusFilter } from "./UsersPage/interfaces";
|
||||
|
||||
import UsersSummary from "./UsersPage/UsersSummary";
|
||||
import UsersTable from "./UsersPage/UsersTable";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Users page content
|
||||
@@ -19,7 +23,18 @@ function UsersContent() {
|
||||
const { data: scimToken } = useScimToken();
|
||||
const showScim = isEe && !!scimToken;
|
||||
|
||||
const { activeCount, invitedCount, pendingCount } = useUserCounts();
|
||||
const { activeCount, invitedCount, pendingCount, roleCounts, statusCounts } =
|
||||
useUserCounts();
|
||||
|
||||
const [selectedStatuses, setSelectedStatuses] = useState<StatusFilter>([]);
|
||||
|
||||
const toggleStatus = (target: UserStatus) => {
|
||||
setSelectedStatuses((prev) =>
|
||||
prev.includes(target)
|
||||
? prev.filter((s) => s !== target)
|
||||
: [...prev, target]
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
@@ -28,9 +43,17 @@ function UsersContent() {
|
||||
pendingInvites={invitedCount}
|
||||
requests={pendingCount}
|
||||
showScim={showScim}
|
||||
onFilterActive={() => toggleStatus(UserStatus.ACTIVE)}
|
||||
onFilterInvites={() => toggleStatus(UserStatus.INVITED)}
|
||||
onFilterRequests={() => toggleStatus(UserStatus.REQUESTED)}
|
||||
/>
|
||||
|
||||
{/* Table and filters will be added in subsequent PRs */}
|
||||
<UsersTable
|
||||
selectedStatuses={selectedStatuses}
|
||||
onStatusesChange={setSelectedStatuses}
|
||||
roleCounts={roleCounts}
|
||||
statusCounts={statusCounts}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
296
web/src/refresh-pages/admin/UsersPage/UserFilters.tsx
Normal file
296
web/src/refresh-pages/admin/UsersPage/UserFilters.tsx
Normal file
@@ -0,0 +1,296 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { SvgCheck, SvgSlack, SvgUser, SvgUsers } from "@opal/icons";
|
||||
import type { IconFunctionComponent } from "@opal/types";
|
||||
import FilterButton from "@/refresh-components/buttons/FilterButton";
|
||||
import Popover from "@/refresh-components/Popover";
|
||||
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
|
||||
import LineItem from "@/refresh-components/buttons/LineItem";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
import Separator from "@/refresh-components/Separator";
|
||||
import {
|
||||
UserRole,
|
||||
UserStatus,
|
||||
USER_ROLE_LABELS,
|
||||
USER_STATUS_LABELS,
|
||||
} from "@/lib/types";
|
||||
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
|
||||
import type { GroupOption, StatusFilter, StatusCountMap } from "./interfaces";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface UserFiltersProps {
|
||||
selectedRoles: UserRole[];
|
||||
onRolesChange: (roles: UserRole[]) => void;
|
||||
selectedGroups: number[];
|
||||
onGroupsChange: (groupIds: number[]) => void;
|
||||
groups: GroupOption[];
|
||||
selectedStatuses: StatusFilter;
|
||||
onStatusesChange: (statuses: StatusFilter) => void;
|
||||
roleCounts: Record<string, number>;
|
||||
statusCounts: StatusCountMap;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FILTERABLE_ROLES = Object.entries(USER_ROLE_LABELS).filter(
|
||||
([role]) => role !== UserRole.EXT_PERM_USER
|
||||
) as [UserRole, string][];
|
||||
|
||||
const FILTERABLE_STATUSES = (
|
||||
Object.entries(USER_STATUS_LABELS) as [UserStatus, string][]
|
||||
).filter(
|
||||
([value]) => value !== UserStatus.REQUESTED || NEXT_PUBLIC_CLOUD_ENABLED
|
||||
);
|
||||
|
||||
const ROLE_ICONS: Partial<Record<UserRole, IconFunctionComponent>> = {
|
||||
[UserRole.SLACK_USER]: SvgSlack,
|
||||
};
|
||||
|
||||
/** Map UserStatus enum values to the keys returned by the counts endpoint. */
|
||||
const STATUS_COUNT_KEY: Record<UserStatus, keyof StatusCountMap> = {
|
||||
[UserStatus.ACTIVE]: "active",
|
||||
[UserStatus.INACTIVE]: "inactive",
|
||||
[UserStatus.INVITED]: "invited",
|
||||
[UserStatus.REQUESTED]: "requested",
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function CountBadge({ count }: { count: number | undefined }) {
|
||||
return (
|
||||
<Text as="span" secondaryBody text03>
|
||||
{count ?? 0}
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export default function UserFilters({
|
||||
selectedRoles,
|
||||
onRolesChange,
|
||||
selectedGroups,
|
||||
onGroupsChange,
|
||||
groups,
|
||||
selectedStatuses,
|
||||
onStatusesChange,
|
||||
roleCounts,
|
||||
statusCounts,
|
||||
}: UserFiltersProps) {
|
||||
const hasRoleFilter = selectedRoles.length > 0;
|
||||
const hasGroupFilter = selectedGroups.length > 0;
|
||||
const hasStatusFilter = selectedStatuses.length > 0;
|
||||
const [groupSearch, setGroupSearch] = useState("");
|
||||
const [groupPopoverOpen, setGroupPopoverOpen] = useState(false);
|
||||
|
||||
const toggleRole = (role: UserRole) => {
|
||||
if (selectedRoles.includes(role)) {
|
||||
onRolesChange(selectedRoles.filter((r) => r !== role));
|
||||
} else {
|
||||
onRolesChange([...selectedRoles, role]);
|
||||
}
|
||||
};
|
||||
|
||||
const roleLabel = hasRoleFilter
|
||||
? FILTERABLE_ROLES.filter(([role]) => selectedRoles.includes(role))
|
||||
.map(([, label]) => label)
|
||||
.slice(0, 2)
|
||||
.join(", ") +
|
||||
(selectedRoles.length > 2 ? `, +${selectedRoles.length - 2}` : "")
|
||||
: "All Account Types";
|
||||
|
||||
const toggleGroup = (groupId: number) => {
|
||||
if (selectedGroups.includes(groupId)) {
|
||||
onGroupsChange(selectedGroups.filter((id) => id !== groupId));
|
||||
} else {
|
||||
onGroupsChange([...selectedGroups, groupId]);
|
||||
}
|
||||
};
|
||||
|
||||
const groupLabel = hasGroupFilter
|
||||
? groups
|
||||
.filter((g) => selectedGroups.includes(g.id))
|
||||
.map((g) => g.name)
|
||||
.slice(0, 2)
|
||||
.join(", ") +
|
||||
(selectedGroups.length > 2 ? `, +${selectedGroups.length - 2}` : "")
|
||||
: "All Groups";
|
||||
|
||||
const toggleStatus = (status: UserStatus) => {
|
||||
if (selectedStatuses.includes(status)) {
|
||||
onStatusesChange(selectedStatuses.filter((s) => s !== status));
|
||||
} else {
|
||||
onStatusesChange([...selectedStatuses, status]);
|
||||
}
|
||||
};
|
||||
|
||||
const statusLabel = hasStatusFilter
|
||||
? FILTERABLE_STATUSES.filter(([status]) =>
|
||||
selectedStatuses.includes(status)
|
||||
)
|
||||
.map(([, label]) => label)
|
||||
.slice(0, 2)
|
||||
.join(", ") +
|
||||
(selectedStatuses.length > 2 ? `, +${selectedStatuses.length - 2}` : "")
|
||||
: "All Status";
|
||||
|
||||
const filteredGroups = groupSearch
|
||||
? groups.filter((g) =>
|
||||
g.name.toLowerCase().includes(groupSearch.toLowerCase())
|
||||
)
|
||||
: groups;
|
||||
|
||||
return (
|
||||
<div className="flex gap-2">
|
||||
{/* Role filter */}
|
||||
<Popover>
|
||||
<Popover.Trigger asChild>
|
||||
<FilterButton
|
||||
leftIcon={SvgUsers}
|
||||
active={hasRoleFilter}
|
||||
onClear={() => onRolesChange([])}
|
||||
>
|
||||
{roleLabel}
|
||||
</FilterButton>
|
||||
</Popover.Trigger>
|
||||
<Popover.Content align="start">
|
||||
<div className="flex flex-col gap-1 p-1 min-w-[200px]">
|
||||
<LineItem
|
||||
icon={SvgUsers}
|
||||
selected={!hasRoleFilter}
|
||||
onClick={() => onRolesChange([])}
|
||||
>
|
||||
All Account Types
|
||||
</LineItem>
|
||||
<Separator noPadding />
|
||||
{FILTERABLE_ROLES.map(([role, label]) => {
|
||||
const isSelected = selectedRoles.includes(role);
|
||||
const roleIcon = ROLE_ICONS[role] ?? SvgUser;
|
||||
return (
|
||||
<LineItem
|
||||
key={role}
|
||||
icon={isSelected ? SvgCheck : roleIcon}
|
||||
selected={isSelected}
|
||||
onClick={() => toggleRole(role)}
|
||||
rightChildren={<CountBadge count={roleCounts[role]} />}
|
||||
>
|
||||
{label}
|
||||
</LineItem>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</Popover.Content>
|
||||
</Popover>
|
||||
|
||||
{/* Groups filter */}
|
||||
<Popover
|
||||
open={groupPopoverOpen}
|
||||
onOpenChange={(open) => {
|
||||
setGroupPopoverOpen(open);
|
||||
if (!open) setGroupSearch("");
|
||||
}}
|
||||
>
|
||||
<Popover.Trigger asChild>
|
||||
<FilterButton
|
||||
leftIcon={SvgUsers}
|
||||
active={hasGroupFilter}
|
||||
onClear={() => onGroupsChange([])}
|
||||
>
|
||||
{groupLabel}
|
||||
</FilterButton>
|
||||
</Popover.Trigger>
|
||||
<Popover.Content align="start">
|
||||
<div className="flex flex-col gap-1 p-1 min-w-[200px]">
|
||||
<div className="px-1 pt-1">
|
||||
<InputTypeIn
|
||||
value={groupSearch}
|
||||
onChange={(e) => setGroupSearch(e.target.value)}
|
||||
placeholder="Search groups..."
|
||||
leftSearchIcon
|
||||
/>
|
||||
</div>
|
||||
<LineItem
|
||||
icon={SvgUsers}
|
||||
selected={!hasGroupFilter}
|
||||
onClick={() => onGroupsChange([])}
|
||||
>
|
||||
All Groups
|
||||
</LineItem>
|
||||
<Separator noPadding />
|
||||
<div className="flex flex-col gap-1 max-h-[240px] overflow-y-auto">
|
||||
{filteredGroups.map((group) => {
|
||||
const isSelected = selectedGroups.includes(group.id);
|
||||
return (
|
||||
<LineItem
|
||||
key={group.id}
|
||||
icon={isSelected ? SvgCheck : undefined}
|
||||
selected={isSelected}
|
||||
onClick={() => toggleGroup(group.id)}
|
||||
rightChildren={<CountBadge count={group.memberCount} />}
|
||||
>
|
||||
{group.name}
|
||||
</LineItem>
|
||||
);
|
||||
})}
|
||||
{filteredGroups.length === 0 && (
|
||||
<Text as="span" secondaryBody text03 className="px-2 py-1.5">
|
||||
No groups found
|
||||
</Text>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</Popover.Content>
|
||||
</Popover>
|
||||
|
||||
{/* Status filter */}
|
||||
<Popover>
|
||||
<Popover.Trigger asChild>
|
||||
<FilterButton
|
||||
leftIcon={SvgUsers}
|
||||
active={hasStatusFilter}
|
||||
onClear={() => onStatusesChange([])}
|
||||
>
|
||||
{statusLabel}
|
||||
</FilterButton>
|
||||
</Popover.Trigger>
|
||||
<Popover.Content align="start">
|
||||
<div className="flex flex-col gap-1 p-1 min-w-[200px]">
|
||||
<LineItem
|
||||
icon={!hasStatusFilter ? SvgCheck : undefined}
|
||||
selected={!hasStatusFilter}
|
||||
onClick={() => onStatusesChange([])}
|
||||
>
|
||||
All Status
|
||||
</LineItem>
|
||||
<Separator noPadding />
|
||||
{FILTERABLE_STATUSES.map(([status, label]) => {
|
||||
const isSelected = selectedStatuses.includes(status);
|
||||
const countKey = STATUS_COUNT_KEY[status];
|
||||
return (
|
||||
<LineItem
|
||||
key={status}
|
||||
icon={isSelected ? SvgCheck : undefined}
|
||||
selected={isSelected}
|
||||
onClick={() => toggleStatus(status)}
|
||||
rightChildren={<CountBadge count={statusCounts[countKey]} />}
|
||||
>
|
||||
{label}
|
||||
</LineItem>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</Popover.Content>
|
||||
</Popover>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { SvgArrowUpRight, SvgUserSync } from "@opal/icons";
|
||||
import { SvgArrowUpRight, SvgFilter, SvgUserSync } from "@opal/icons";
|
||||
import { ContentAction } from "@opal/layouts";
|
||||
import { Button } from "@opal/components";
|
||||
import { Section } from "@/layouts/general-layouts";
|
||||
@@ -14,19 +14,33 @@ import { ADMIN_PATHS } from "@/lib/admin-routes";
|
||||
type StatCellProps = {
|
||||
value: number | null;
|
||||
label: string;
|
||||
onFilter?: () => void;
|
||||
};
|
||||
|
||||
function StatCell({ value, label }: StatCellProps) {
|
||||
function StatCell({ value, label, onFilter }: StatCellProps) {
|
||||
const display = value === null ? "\u2014" : value.toLocaleString();
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-start gap-0.5 w-full p-2">
|
||||
<div
|
||||
className={`group/stat relative flex flex-col items-start gap-0.5 w-full p-2 rounded-08 transition-colors ${
|
||||
onFilter ? "cursor-pointer hover:bg-background-tint-02" : ""
|
||||
}`}
|
||||
onClick={onFilter}
|
||||
>
|
||||
<Text as="span" mainUiAction text04>
|
||||
{display}
|
||||
</Text>
|
||||
<Text as="span" secondaryBody text03>
|
||||
{label}
|
||||
</Text>
|
||||
{onFilter && (
|
||||
<div className="absolute right-2 top-2 flex items-center gap-1 opacity-0 group-hover/stat:opacity-100 transition-opacity">
|
||||
<Text as="span" secondaryBody text03>
|
||||
Filter
|
||||
</Text>
|
||||
<SvgFilter size={16} className="text-text-03" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -66,6 +80,9 @@ type UsersSummaryProps = {
|
||||
pendingInvites: number | null;
|
||||
requests: number | null;
|
||||
showScim: boolean;
|
||||
onFilterActive?: () => void;
|
||||
onFilterInvites?: () => void;
|
||||
onFilterRequests?: () => void;
|
||||
};
|
||||
|
||||
export default function UsersSummary({
|
||||
@@ -73,9 +90,36 @@ export default function UsersSummary({
|
||||
pendingInvites,
|
||||
requests,
|
||||
showScim,
|
||||
onFilterActive,
|
||||
onFilterInvites,
|
||||
onFilterRequests,
|
||||
}: UsersSummaryProps) {
|
||||
const showRequests = requests !== null && requests > 0;
|
||||
|
||||
const statsCard = (
|
||||
<Card padding={0.5}>
|
||||
<Section flexDirection="row" gap={0}>
|
||||
<StatCell
|
||||
value={activeUsers}
|
||||
label="active users"
|
||||
onFilter={onFilterActive}
|
||||
/>
|
||||
<StatCell
|
||||
value={pendingInvites}
|
||||
label="pending invites"
|
||||
onFilter={onFilterInvites}
|
||||
/>
|
||||
{showRequests && (
|
||||
<StatCell
|
||||
value={requests}
|
||||
label="requests to join"
|
||||
onFilter={onFilterRequests}
|
||||
/>
|
||||
)}
|
||||
</Section>
|
||||
</Card>
|
||||
);
|
||||
|
||||
if (showScim) {
|
||||
return (
|
||||
<Section
|
||||
@@ -84,15 +128,7 @@ export default function UsersSummary({
|
||||
alignItems="stretch"
|
||||
gap={0.5}
|
||||
>
|
||||
<Card padding={0.5}>
|
||||
<Section flexDirection="row" gap={0}>
|
||||
<StatCell value={activeUsers} label="active users" />
|
||||
<StatCell value={pendingInvites} label="pending invites" />
|
||||
{showRequests && (
|
||||
<StatCell value={requests} label="requests to join" />
|
||||
)}
|
||||
</Section>
|
||||
</Card>
|
||||
{statsCard}
|
||||
<ScimCard />
|
||||
</Section>
|
||||
);
|
||||
@@ -102,14 +138,26 @@ export default function UsersSummary({
|
||||
return (
|
||||
<Section flexDirection="row" gap={0.5}>
|
||||
<Card padding={0.5}>
|
||||
<StatCell value={activeUsers} label="active users" />
|
||||
<StatCell
|
||||
value={activeUsers}
|
||||
label="active users"
|
||||
onFilter={onFilterActive}
|
||||
/>
|
||||
</Card>
|
||||
<Card padding={0.5}>
|
||||
<StatCell value={pendingInvites} label="pending invites" />
|
||||
<StatCell
|
||||
value={pendingInvites}
|
||||
label="pending invites"
|
||||
onFilter={onFilterInvites}
|
||||
/>
|
||||
</Card>
|
||||
{showRequests && (
|
||||
<Card padding={0.5}>
|
||||
<StatCell value={requests} label="requests to join" />
|
||||
<StatCell
|
||||
value={requests}
|
||||
label="requests to join"
|
||||
onFilter={onFilterRequests}
|
||||
/>
|
||||
</Card>
|
||||
)}
|
||||
</Section>
|
||||
|
||||
294
web/src/refresh-pages/admin/UsersPage/UsersTable.tsx
Normal file
294
web/src/refresh-pages/admin/UsersPage/UsersTable.tsx
Normal file
@@ -0,0 +1,294 @@
|
||||
"use client";
|
||||
|
||||
import { useMemo, useState } from "react";
|
||||
import DataTable from "@/refresh-components/table/DataTable";
|
||||
import { createTableColumns } from "@/refresh-components/table/columns";
|
||||
import { Content } from "@opal/layouts";
|
||||
import { SvgUser, SvgUsers, SvgSlack } from "@opal/icons";
|
||||
import SvgNoResult from "@opal/illustrations/no-result";
|
||||
import { IllustrationContent } from "@opal/layouts";
|
||||
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
|
||||
import type { IconFunctionComponent } from "@opal/types";
|
||||
import {
|
||||
UserRole,
|
||||
UserStatus,
|
||||
USER_ROLE_LABELS,
|
||||
USER_STATUS_LABELS,
|
||||
} from "@/lib/types";
|
||||
import { timeAgo } from "@/lib/time";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
|
||||
import useAdminUsers from "@/hooks/useAdminUsers";
|
||||
import useGroups from "@/hooks/useGroups";
|
||||
import UserFilters from "./UserFilters";
|
||||
import type {
|
||||
UserRow,
|
||||
UserGroupInfo,
|
||||
GroupOption,
|
||||
StatusFilter,
|
||||
StatusCountMap,
|
||||
} from "./interfaces";
|
||||
import { getInitials } from "./utils";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const ROLE_ICONS: Record<UserRole, IconFunctionComponent> = {
|
||||
[UserRole.BASIC]: SvgUser,
|
||||
[UserRole.ADMIN]: SvgUser,
|
||||
[UserRole.GLOBAL_CURATOR]: SvgUsers,
|
||||
[UserRole.CURATOR]: SvgUsers,
|
||||
[UserRole.LIMITED]: SvgUser,
|
||||
[UserRole.EXT_PERM_USER]: SvgUser,
|
||||
[UserRole.SLACK_USER]: SvgSlack,
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Column renderers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function renderNameColumn(email: string, row: UserRow) {
|
||||
return (
|
||||
<Content
|
||||
sizePreset="main-ui"
|
||||
variant="section"
|
||||
title={row.personal_name ?? email}
|
||||
description={row.personal_name ? email : undefined}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function renderGroupsColumn(groups: UserGroupInfo[]) {
|
||||
if (!groups.length) {
|
||||
return (
|
||||
<Text as="span" secondaryBody text03>
|
||||
{"\u2014"}
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
const visible = groups.slice(0, 2);
|
||||
const overflow = groups.length - visible.length;
|
||||
return (
|
||||
<div className="flex items-center gap-1 flex-nowrap overflow-hidden min-w-0">
|
||||
{visible.map((g) => (
|
||||
<span
|
||||
key={g.id}
|
||||
className="inline-flex items-center flex-shrink-0 rounded-md bg-background-tint-02 px-2 py-0.5 whitespace-nowrap"
|
||||
>
|
||||
<Text as="span" secondaryBody text03>
|
||||
{g.name}
|
||||
</Text>
|
||||
</span>
|
||||
))}
|
||||
{overflow > 0 && (
|
||||
<Text as="span" secondaryBody text03>
|
||||
+{overflow}
|
||||
</Text>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function renderRoleColumn(role: UserRole | null) {
|
||||
if (!role) {
|
||||
return (
|
||||
<Text as="span" secondaryBody text03>
|
||||
—
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
const Icon = ROLE_ICONS[role];
|
||||
return (
|
||||
<div className="flex items-center gap-1.5">
|
||||
{Icon && <Icon size={14} className="text-text-03 shrink-0" />}
|
||||
<Text as="span" mainUiBody text03>
|
||||
{USER_ROLE_LABELS[role] ?? role}
|
||||
</Text>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function renderStatusColumn(value: UserStatus, row: UserRow) {
|
||||
return (
|
||||
<div className="flex flex-col">
|
||||
<Text as="span" mainUiBody text03>
|
||||
{USER_STATUS_LABELS[value] ?? value}
|
||||
</Text>
|
||||
{row.is_scim_synced && (
|
||||
<Text as="span" secondaryBody text03>
|
||||
SCIM synced
|
||||
</Text>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function renderLastUpdatedColumn(value: string | null) {
|
||||
return (
|
||||
<Text as="span" secondaryBody text03>
|
||||
{timeAgo(value) ?? "\u2014"}
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Columns (stable reference — defined at module scope)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const tc = createTableColumns<UserRow>();
|
||||
|
||||
const columns = [
|
||||
tc.qualifier({
|
||||
content: "avatar-user",
|
||||
getInitials: (row) => getInitials(row.personal_name, row.email),
|
||||
selectable: false,
|
||||
}),
|
||||
tc.column("email", {
|
||||
header: "Name",
|
||||
weight: 22,
|
||||
minWidth: 140,
|
||||
cell: renderNameColumn,
|
||||
}),
|
||||
tc.column("groups", {
|
||||
header: "Groups",
|
||||
weight: 24,
|
||||
minWidth: 200,
|
||||
enableSorting: false,
|
||||
cell: renderGroupsColumn,
|
||||
}),
|
||||
tc.column("role", {
|
||||
header: "Account Type",
|
||||
weight: 16,
|
||||
minWidth: 180,
|
||||
cell: renderRoleColumn,
|
||||
}),
|
||||
tc.column("status", {
|
||||
header: "Status",
|
||||
weight: 14,
|
||||
minWidth: 100,
|
||||
cell: renderStatusColumn,
|
||||
}),
|
||||
tc.column("updated_at", {
|
||||
header: "Last Updated",
|
||||
weight: 14,
|
||||
minWidth: 100,
|
||||
cell: renderLastUpdatedColumn,
|
||||
}),
|
||||
tc.actions(),
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PAGE_SIZE = 8;
|
||||
|
||||
interface UsersTableProps {
|
||||
selectedStatuses: StatusFilter;
|
||||
onStatusesChange: (statuses: StatusFilter) => void;
|
||||
roleCounts: Record<string, number>;
|
||||
statusCounts: StatusCountMap;
|
||||
}
|
||||
|
||||
export default function UsersTable({
|
||||
selectedStatuses,
|
||||
onStatusesChange,
|
||||
roleCounts,
|
||||
statusCounts,
|
||||
}: UsersTableProps) {
|
||||
const [searchTerm, setSearchTerm] = useState("");
|
||||
const [selectedRoles, setSelectedRoles] = useState<UserRole[]>([]);
|
||||
const [selectedGroups, setSelectedGroups] = useState<number[]>([]);
|
||||
|
||||
const { data: allGroups } = useGroups();
|
||||
|
||||
const groupOptions: GroupOption[] = useMemo(
|
||||
() =>
|
||||
(allGroups ?? []).map((g) => ({
|
||||
id: g.id,
|
||||
name: g.name,
|
||||
memberCount: g.users.length,
|
||||
})),
|
||||
[allGroups]
|
||||
);
|
||||
|
||||
const { users, isLoading, error } = useAdminUsers();
|
||||
|
||||
// Client-side filtering
|
||||
const filteredUsers = useMemo(() => {
|
||||
let result = users;
|
||||
|
||||
if (selectedRoles.length > 0) {
|
||||
result = result.filter(
|
||||
(u) => u.role !== null && selectedRoles.includes(u.role)
|
||||
);
|
||||
}
|
||||
|
||||
if (selectedStatuses.length > 0) {
|
||||
result = result.filter((u) => selectedStatuses.includes(u.status));
|
||||
}
|
||||
|
||||
if (selectedGroups.length > 0) {
|
||||
result = result.filter((u) =>
|
||||
u.groups.some((g) => selectedGroups.includes(g.id))
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
}, [users, selectedRoles, selectedStatuses, selectedGroups]);
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="flex justify-center py-12">
|
||||
<SimpleLoader className="h-6 w-6" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<Text as="p" secondaryBody text03>
|
||||
Failed to load users. Please try refreshing the page.
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-3">
|
||||
<InputTypeIn
|
||||
value={searchTerm}
|
||||
onChange={(e) => setSearchTerm(e.target.value)}
|
||||
placeholder="Search users..."
|
||||
leftSearchIcon
|
||||
/>
|
||||
<UserFilters
|
||||
selectedRoles={selectedRoles}
|
||||
onRolesChange={setSelectedRoles}
|
||||
selectedGroups={selectedGroups}
|
||||
onGroupsChange={setSelectedGroups}
|
||||
groups={groupOptions}
|
||||
selectedStatuses={selectedStatuses}
|
||||
onStatusesChange={onStatusesChange}
|
||||
roleCounts={roleCounts}
|
||||
statusCounts={statusCounts}
|
||||
/>
|
||||
{filteredUsers.length === 0 ? (
|
||||
<IllustrationContent
|
||||
illustration={SvgNoResult}
|
||||
title="No users found"
|
||||
description="No users match the current filters."
|
||||
/>
|
||||
) : (
|
||||
<DataTable
|
||||
data={filteredUsers}
|
||||
columns={columns}
|
||||
getRowId={(row) => row.id ?? row.email}
|
||||
pageSize={PAGE_SIZE}
|
||||
searchTerm={searchTerm}
|
||||
footer={{ mode: "summary" }}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
36
web/src/refresh-pages/admin/UsersPage/interfaces.ts
Normal file
36
web/src/refresh-pages/admin/UsersPage/interfaces.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import type { UserRole, UserStatus } from "@/lib/types";
|
||||
|
||||
export interface UserGroupInfo {
|
||||
id: number;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export interface UserRow {
|
||||
id: string | null;
|
||||
email: string;
|
||||
role: UserRole | null;
|
||||
status: UserStatus;
|
||||
is_active: boolean;
|
||||
is_scim_synced: boolean;
|
||||
personal_name: string | null;
|
||||
created_at: string | null;
|
||||
updated_at: string | null;
|
||||
groups: UserGroupInfo[];
|
||||
}
|
||||
|
||||
export interface GroupOption {
|
||||
id: number;
|
||||
name: string;
|
||||
memberCount?: number;
|
||||
}
|
||||
|
||||
/** Empty array = no filter (show all). */
|
||||
export type StatusFilter = UserStatus[];
|
||||
|
||||
/** Keys match the UserStatus-derived labels used in filter badges. */
|
||||
export type StatusCountMap = {
|
||||
active?: number;
|
||||
inactive?: number;
|
||||
invited?: number;
|
||||
requested?: number;
|
||||
};
|
||||
43
web/src/refresh-pages/admin/UsersPage/utils.test.ts
Normal file
43
web/src/refresh-pages/admin/UsersPage/utils.test.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { getInitials } from "./utils";
|
||||
|
||||
describe("getInitials", () => {
|
||||
it("returns first letters of first two name parts", () => {
|
||||
expect(getInitials("Alice Smith", "alice@example.com")).toBe("AS");
|
||||
});
|
||||
|
||||
it("returns first two chars of a single-word name", () => {
|
||||
expect(getInitials("Alice", "alice@example.com")).toBe("AL");
|
||||
});
|
||||
|
||||
it("handles three-word names (uses first two)", () => {
|
||||
expect(getInitials("Alice B. Smith", "alice@example.com")).toBe("AB");
|
||||
});
|
||||
|
||||
it("falls back to email local part with dot separator", () => {
|
||||
expect(getInitials(null, "alice.smith@example.com")).toBe("AS");
|
||||
});
|
||||
|
||||
it("falls back to email local part with underscore separator", () => {
|
||||
expect(getInitials(null, "alice_smith@example.com")).toBe("AS");
|
||||
});
|
||||
|
||||
it("falls back to email local part with hyphen separator", () => {
|
||||
expect(getInitials(null, "alice-smith@example.com")).toBe("AS");
|
||||
});
|
||||
|
||||
it("uses first two chars of email local if no separator", () => {
|
||||
expect(getInitials(null, "alice@example.com")).toBe("AL");
|
||||
});
|
||||
|
||||
it("returns ? for empty email local part", () => {
|
||||
expect(getInitials(null, "@example.com")).toBe("?");
|
||||
});
|
||||
|
||||
it("uppercases the result", () => {
|
||||
expect(getInitials("john doe", "jd@test.com")).toBe("JD");
|
||||
});
|
||||
|
||||
it("trims whitespace from name", () => {
|
||||
expect(getInitials(" Alice Smith ", "a@test.com")).toBe("AS");
|
||||
});
|
||||
});
|
||||
23
web/src/refresh-pages/admin/UsersPage/utils.ts
Normal file
23
web/src/refresh-pages/admin/UsersPage/utils.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* Derive display initials from a user's name or email.
|
||||
*
|
||||
* - If a name is provided, uses the first letter of the first two words.
|
||||
* - Falls back to the email local part, splitting on `.`, `_`, or `-`.
|
||||
* - Returns at most 2 uppercase characters.
|
||||
*/
|
||||
export function getInitials(name: string | null, email: string): string {
|
||||
if (name) {
|
||||
const parts = name.trim().split(/\s+/);
|
||||
if (parts.length >= 2) {
|
||||
return ((parts[0]?.[0] ?? "") + (parts[1]?.[0] ?? "")).toUpperCase();
|
||||
}
|
||||
return name.slice(0, 2).toUpperCase();
|
||||
}
|
||||
const local = email.split("@")[0];
|
||||
if (!local) return "?";
|
||||
const parts = local.split(/[._-]/);
|
||||
if (parts.length >= 2) {
|
||||
return ((parts[0]?.[0] ?? "") + (parts[1]?.[0] ?? "")).toUpperCase();
|
||||
}
|
||||
return local.slice(0, 2).toUpperCase();
|
||||
}
|
||||
@@ -55,10 +55,6 @@ import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
|
||||
import { useQueryController } from "@/providers/QueryControllerProvider";
|
||||
import { Section } from "@/layouts/general-layouts";
|
||||
import Spacer from "@/refresh-components/Spacer";
|
||||
import MicrophoneButton from "@/sections/input/MicrophoneButton";
|
||||
import Waveform from "@/components/voice/Waveform";
|
||||
import { useVoiceMode } from "@/providers/VoiceModeProvider";
|
||||
import { useVoiceStatus } from "@/hooks/useVoiceStatus";
|
||||
|
||||
const MIN_INPUT_HEIGHT = 44;
|
||||
const MAX_INPUT_HEIGHT = 200;
|
||||
@@ -117,14 +113,6 @@ const AppInputBar = React.memo(
|
||||
}: AppInputBarProps) => {
|
||||
// Internal message state - kept local to avoid parent re-renders on every keystroke
|
||||
const [message, setMessage] = useState(initialMessage);
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [recordingCycleCount, setRecordingCycleCount] = useState(0);
|
||||
const [isMuted, setIsMuted] = useState(false);
|
||||
const [audioLevel, setAudioLevel] = useState(0);
|
||||
const stopRecordingRef = useRef<(() => Promise<string | null>) | null>(
|
||||
null
|
||||
);
|
||||
const setMutedRef = useRef<((muted: boolean) => void) | null>(null);
|
||||
const textAreaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const textAreaWrapperRef = useRef<HTMLDivElement>(null);
|
||||
const filesWrapperRef = useRef<HTMLDivElement>(null);
|
||||
@@ -135,38 +123,6 @@ const AppInputBar = React.memo(
|
||||
const isClassifying = state.phase === "classifying";
|
||||
const isSearchActive =
|
||||
state.phase === "searching" || state.phase === "search-results";
|
||||
const {
|
||||
stopTTS,
|
||||
isTTSPlaying,
|
||||
isManualTTSPlaying,
|
||||
isTTSLoading,
|
||||
isAwaitingAutoPlaybackStart,
|
||||
isTTSMuted,
|
||||
toggleTTSMute,
|
||||
} = useVoiceMode();
|
||||
const { sttEnabled } = useVoiceStatus();
|
||||
const isVoicePlaybackActive =
|
||||
isTTSPlaying || isTTSLoading || isAwaitingAutoPlaybackStart;
|
||||
const isVoicePlaybackControllable = isVoicePlaybackActive && !isRecording;
|
||||
const isTTSActuallySpeaking = isTTSPlaying || isManualTTSPlaying;
|
||||
|
||||
const handleRecordingChange = useCallback((nextIsRecording: boolean) => {
|
||||
setIsRecording((prevIsRecording) => {
|
||||
if (!prevIsRecording && nextIsRecording) {
|
||||
setRecordingCycleCount((count) => count + 1);
|
||||
}
|
||||
return nextIsRecording;
|
||||
});
|
||||
}, []);
|
||||
|
||||
// Wrapper for onSubmit that stops TTS first to prevent overlapping voices
|
||||
const handleSubmit = useCallback(
|
||||
(text: string) => {
|
||||
stopTTS();
|
||||
onSubmit(text);
|
||||
},
|
||||
[stopTTS, onSubmit]
|
||||
);
|
||||
|
||||
// Expose reset and focus methods to parent via ref
|
||||
React.useImperativeHandle(ref, () => ({
|
||||
@@ -187,14 +143,9 @@ const AppInputBar = React.memo(
|
||||
}
|
||||
}, [initialMessage]);
|
||||
const appFocus = useAppFocus();
|
||||
const isNewSession = appFocus.isNewSession();
|
||||
const appMode = state.phase === "idle" ? state.appMode : undefined;
|
||||
const isSearchMode =
|
||||
(isNewSession && appMode === "search") || isSearchActive;
|
||||
const shouldShowRecordingWaveformBelow =
|
||||
isRecording &&
|
||||
!isVoicePlaybackActive &&
|
||||
(isNewSession || recordingCycleCount === 1);
|
||||
(appFocus.isNewSession() && appMode === "search") || isSearchActive;
|
||||
|
||||
const { forcedToolIds, setForcedToolIds } = useForcedTools();
|
||||
const { currentMessageFiles, setCurrentMessageFiles, currentProjectId } =
|
||||
@@ -607,35 +558,9 @@ const AppInputBar = React.memo(
|
||||
disabled={disabled}
|
||||
/>
|
||||
</div>
|
||||
{sttEnabled && (
|
||||
<MicrophoneButton
|
||||
onTranscription={(text) => setMessage(text)}
|
||||
disabled={disabled || chatState === "streaming"}
|
||||
autoSend={user?.preferences?.voice_auto_send ?? false}
|
||||
autoListen={user?.preferences?.voice_auto_playback ?? false}
|
||||
isNewSession={isNewSession}
|
||||
chatState={chatState}
|
||||
onRecordingChange={handleRecordingChange}
|
||||
stopRecordingRef={stopRecordingRef}
|
||||
onRecordingStart={() => setMessage("")}
|
||||
onAutoSend={(text) => {
|
||||
// Guard against empty transcription
|
||||
if (text.trim()) {
|
||||
handleSubmit(text);
|
||||
setMessage("");
|
||||
}
|
||||
}}
|
||||
onMuteChange={setIsMuted}
|
||||
setMutedRef={setMutedRef}
|
||||
onAudioLevel={setAudioLevel}
|
||||
/>
|
||||
)}
|
||||
|
||||
<Disabled
|
||||
disabled={
|
||||
(chatState === "input" &&
|
||||
!isVoicePlaybackControllable &&
|
||||
!message) ||
|
||||
(chatState === "input" && !message) ||
|
||||
hasUploadingFiles ||
|
||||
isClassifying
|
||||
}
|
||||
@@ -645,16 +570,13 @@ const AppInputBar = React.memo(
|
||||
icon={
|
||||
isClassifying
|
||||
? SimpleLoader
|
||||
: chatState === "streaming" || isVoicePlaybackControllable
|
||||
? SvgStop
|
||||
: SvgArrowUp
|
||||
: chatState === "input"
|
||||
? SvgArrowUp
|
||||
: SvgStop
|
||||
}
|
||||
onClick={() => {
|
||||
if (chatState == "streaming") {
|
||||
stopTTS({ manual: true });
|
||||
stopGenerating();
|
||||
} else if (isVoicePlaybackControllable) {
|
||||
stopTTS({ manual: true });
|
||||
} else if (message) {
|
||||
onSubmit(message);
|
||||
}
|
||||
@@ -684,32 +606,6 @@ const AppInputBar = React.memo(
|
||||
// modes. See the corresponding note there for details.
|
||||
)}
|
||||
>
|
||||
{/* Voice waveform above input */}
|
||||
{isTTSActuallySpeaking ? (
|
||||
<div className="flex justify-start px-1">
|
||||
<Waveform
|
||||
variant="speaking"
|
||||
isActive={isTTSActuallySpeaking}
|
||||
isMuted={isTTSMuted}
|
||||
onMuteToggle={toggleTTSMute}
|
||||
/>
|
||||
</div>
|
||||
) : isRecording &&
|
||||
!isVoicePlaybackActive &&
|
||||
!shouldShowRecordingWaveformBelow ? (
|
||||
<div className="px-1">
|
||||
<Waveform
|
||||
variant="recording"
|
||||
isActive={isRecording}
|
||||
isMuted={isMuted}
|
||||
audioLevel={audioLevel}
|
||||
onMuteToggle={() => {
|
||||
setMutedRef.current?.(!isMuted);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
{/* Attached Files */}
|
||||
<div
|
||||
ref={filesWrapperRef}
|
||||
@@ -761,13 +657,9 @@ const AppInputBar = React.memo(
|
||||
style={{ scrollbarWidth: "thin" }}
|
||||
aria-multiline={true}
|
||||
placeholder={
|
||||
isRecording
|
||||
? "Listening..."
|
||||
: isVoicePlaybackActive
|
||||
? "Onyx is speaking..."
|
||||
: isSearchMode
|
||||
? "Search connected sources"
|
||||
: "How can I help you today?"
|
||||
isSearchMode
|
||||
? "Search connected sources"
|
||||
: "How can I help you today?"
|
||||
}
|
||||
value={message}
|
||||
onKeyDown={(event) => {
|
||||
@@ -784,7 +676,7 @@ const AppInputBar = React.memo(
|
||||
!isClassifying &&
|
||||
!hasUploadingFiles
|
||||
) {
|
||||
handleSubmit(message);
|
||||
onSubmit(message);
|
||||
}
|
||||
}
|
||||
}}
|
||||
@@ -851,7 +743,7 @@ const AppInputBar = React.memo(
|
||||
if (chatState == "streaming") {
|
||||
stopGenerating();
|
||||
} else if (message) {
|
||||
handleSubmit(message);
|
||||
onSubmit(message);
|
||||
}
|
||||
}}
|
||||
prominence="tertiary"
|
||||
@@ -863,21 +755,6 @@ const AppInputBar = React.memo(
|
||||
</div>
|
||||
|
||||
{chatControls}
|
||||
|
||||
{/* First recording cycle waveform below input */}
|
||||
{shouldShowRecordingWaveformBelow && (
|
||||
<div className="px-1">
|
||||
<Waveform
|
||||
variant="recording"
|
||||
isActive={isRecording}
|
||||
isMuted={isMuted}
|
||||
audioLevel={audioLevel}
|
||||
onMuteToggle={() => {
|
||||
setMutedRef.current?.(!isMuted);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</Disabled>
|
||||
);
|
||||
|
||||
@@ -1,297 +0,0 @@
|
||||
"use client";
|
||||
|
||||
import { useCallback, useEffect, useRef } from "react";
|
||||
import { Button } from "@opal/components";
|
||||
import { Disabled } from "@opal/core";
|
||||
import { SvgMicrophone } from "@opal/icons";
|
||||
import { useVoiceRecorder } from "@/hooks/useVoiceRecorder";
|
||||
import { useVoiceMode } from "@/providers/VoiceModeProvider";
|
||||
import { toast } from "@/hooks/useToast";
|
||||
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
|
||||
import { ChatState } from "@/app/app/interfaces";
|
||||
|
||||
interface MicrophoneButtonProps {
|
||||
onTranscription: (text: string) => void;
|
||||
disabled?: boolean;
|
||||
autoSend?: boolean;
|
||||
/** Called with transcribed text when autoSend is enabled */
|
||||
onAutoSend?: (text: string) => void;
|
||||
/**
|
||||
* Internal prop: auto-start listening when TTS finishes or chat response completes.
|
||||
* Tied to voice_auto_playback user preference.
|
||||
* Enables conversation flow: speak → AI responds → auto-listen again.
|
||||
* Note: autoSend is separate - it controls whether message auto-submits after recording.
|
||||
*/
|
||||
autoListen?: boolean;
|
||||
/** Current chat state - used to detect when response streaming finishes */
|
||||
chatState?: ChatState;
|
||||
/** Called when recording state changes */
|
||||
onRecordingChange?: (isRecording: boolean) => void;
|
||||
/** Ref to expose stop recording function to parent */
|
||||
stopRecordingRef?: React.MutableRefObject<
|
||||
(() => Promise<string | null>) | null
|
||||
>;
|
||||
/** Called when recording starts to clear input */
|
||||
onRecordingStart?: () => void;
|
||||
/** Called when mute state changes */
|
||||
onMuteChange?: (isMuted: boolean) => void;
|
||||
/** Ref to expose setMuted function to parent */
|
||||
setMutedRef?: React.MutableRefObject<((muted: boolean) => void) | null>;
|
||||
/** Called with current microphone audio level (0-1) for waveform visualization */
|
||||
onAudioLevel?: (level: number) => void;
|
||||
/** Whether current chat is a new session (used to reset auto-listen arming) */
|
||||
isNewSession?: boolean;
|
||||
}
|
||||
|
||||
function MicrophoneButton({
|
||||
onTranscription,
|
||||
disabled = false,
|
||||
autoSend = false,
|
||||
onAutoSend,
|
||||
autoListen = false,
|
||||
chatState,
|
||||
onRecordingChange,
|
||||
stopRecordingRef,
|
||||
onRecordingStart,
|
||||
onMuteChange,
|
||||
setMutedRef,
|
||||
onAudioLevel,
|
||||
isNewSession = false,
|
||||
}: MicrophoneButtonProps) {
|
||||
const {
|
||||
isTTSPlaying,
|
||||
isTTSLoading,
|
||||
isAwaitingAutoPlaybackStart,
|
||||
manualStopCount,
|
||||
} = useVoiceMode();
|
||||
|
||||
// Refs for tracking state across renders
|
||||
// Track whether TTS was actually playing audio (not just loading)
|
||||
const wasTTSActuallyPlayingRef = useRef(false);
|
||||
const manualStopRequestedRef = useRef(false);
|
||||
const lastHandledManualStopCountRef = useRef(manualStopCount);
|
||||
const autoListenCooldownTimerRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const hasManualRecordStartRef = useRef(false);
|
||||
|
||||
// Handler for VAD (Voice Activity Detection) triggered auto-send.
|
||||
// VAD runs server-side in the STT provider and detects when the user stops speaking.
|
||||
const handleFinalTranscript = useCallback(
|
||||
(text: string) => {
|
||||
onTranscription(text);
|
||||
const isManualStop = manualStopRequestedRef.current;
|
||||
// Only auto-send if chat is ready for input (not streaming)
|
||||
if (!isManualStop && autoSend && onAutoSend && chatState === "input") {
|
||||
onAutoSend(text);
|
||||
}
|
||||
},
|
||||
[onTranscription, autoSend, onAutoSend, chatState]
|
||||
);
|
||||
|
||||
const {
|
||||
isRecording,
|
||||
isProcessing,
|
||||
isMuted,
|
||||
error,
|
||||
liveTranscript,
|
||||
audioLevel,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
setMuted,
|
||||
} = useVoiceRecorder({ onFinalTranscript: handleFinalTranscript });
|
||||
|
||||
// Expose stopRecording to parent
|
||||
useEffect(() => {
|
||||
if (stopRecordingRef) {
|
||||
stopRecordingRef.current = stopRecording;
|
||||
}
|
||||
}, [stopRecording, stopRecordingRef]);
|
||||
|
||||
// Expose setMuted to parent
|
||||
useEffect(() => {
|
||||
if (setMutedRef) {
|
||||
setMutedRef.current = setMuted;
|
||||
}
|
||||
}, [setMuted, setMutedRef]);
|
||||
|
||||
// Notify parent when mute state changes
|
||||
useEffect(() => {
|
||||
onMuteChange?.(isMuted);
|
||||
}, [isMuted, onMuteChange]);
|
||||
|
||||
// Forward audio level to parent for waveform visualization
|
||||
useEffect(() => {
|
||||
onAudioLevel?.(audioLevel);
|
||||
}, [audioLevel, onAudioLevel]);
|
||||
|
||||
// Notify parent when recording state changes
|
||||
useEffect(() => {
|
||||
onRecordingChange?.(isRecording);
|
||||
}, [isRecording, onRecordingChange]);
|
||||
|
||||
// Update input with live transcript as user speaks
|
||||
useEffect(() => {
|
||||
if (isRecording && liveTranscript) {
|
||||
onTranscription(liveTranscript);
|
||||
}
|
||||
}, [isRecording, liveTranscript, onTranscription]);
|
||||
|
||||
const handleClick = useCallback(async () => {
|
||||
if (isRecording) {
|
||||
// When recording, clicking the mic button stops recording
|
||||
manualStopRequestedRef.current = true;
|
||||
try {
|
||||
const finalTranscript = await stopRecording();
|
||||
if (finalTranscript) {
|
||||
onTranscription(finalTranscript);
|
||||
}
|
||||
if (
|
||||
autoSend &&
|
||||
onAutoSend &&
|
||||
chatState === "input" &&
|
||||
finalTranscript?.trim()
|
||||
) {
|
||||
onAutoSend(finalTranscript);
|
||||
}
|
||||
} finally {
|
||||
manualStopRequestedRef.current = false;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
// Clear input before starting recording
|
||||
onRecordingStart?.();
|
||||
await startRecording();
|
||||
// Arm auto-listen only after first manual mic start in this session.
|
||||
hasManualRecordStartRef.current = true;
|
||||
} catch (err) {
|
||||
console.error("Microphone access failed:", err);
|
||||
toast.error("Could not access microphone");
|
||||
}
|
||||
}
|
||||
}, [
|
||||
isRecording,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
onRecordingStart,
|
||||
onTranscription,
|
||||
autoSend,
|
||||
onAutoSend,
|
||||
chatState,
|
||||
]);
|
||||
|
||||
// Auto-start listening shortly after TTS finishes (only if autoListen is enabled).
|
||||
// Small cooldown reduces playback bleed being re-captured by the microphone.
|
||||
// IMPORTANT: Only trigger auto-listen if TTS was actually playing audio,
|
||||
// not just loading. This prevents auto-listen from triggering when TTS fails.
|
||||
useEffect(() => {
|
||||
if (autoListenCooldownTimerRef.current) {
|
||||
clearTimeout(autoListenCooldownTimerRef.current);
|
||||
autoListenCooldownTimerRef.current = null;
|
||||
}
|
||||
|
||||
const stoppedManually =
|
||||
manualStopCount !== lastHandledManualStopCountRef.current;
|
||||
|
||||
// Only trigger auto-listen if TTS was actually playing (not just loading)
|
||||
if (
|
||||
wasTTSActuallyPlayingRef.current &&
|
||||
!isTTSPlaying &&
|
||||
!isTTSLoading &&
|
||||
!isAwaitingAutoPlaybackStart &&
|
||||
autoListen &&
|
||||
hasManualRecordStartRef.current &&
|
||||
!disabled &&
|
||||
!isRecording &&
|
||||
!stoppedManually
|
||||
) {
|
||||
autoListenCooldownTimerRef.current = setTimeout(() => {
|
||||
autoListenCooldownTimerRef.current = null;
|
||||
if (
|
||||
!autoListen ||
|
||||
disabled ||
|
||||
isRecording ||
|
||||
isTTSPlaying ||
|
||||
isTTSLoading ||
|
||||
isAwaitingAutoPlaybackStart
|
||||
) {
|
||||
return;
|
||||
}
|
||||
startRecording().catch((err) => {
|
||||
console.error("Auto-start microphone failed:", err);
|
||||
toast.error("Could not auto-start microphone");
|
||||
});
|
||||
}, 400);
|
||||
}
|
||||
|
||||
if (stoppedManually) {
|
||||
lastHandledManualStopCountRef.current = manualStopCount;
|
||||
}
|
||||
|
||||
// Only track actual playback - not loading states
|
||||
// This ensures auto-listen only triggers after audio actually played
|
||||
if (isTTSPlaying) {
|
||||
wasTTSActuallyPlayingRef.current = true;
|
||||
} else if (!isTTSPlaying && !isTTSLoading && !isAwaitingAutoPlaybackStart) {
|
||||
// Reset when TTS is completely done
|
||||
wasTTSActuallyPlayingRef.current = false;
|
||||
}
|
||||
}, [
|
||||
isTTSPlaying,
|
||||
isTTSLoading,
|
||||
isAwaitingAutoPlaybackStart,
|
||||
autoListen,
|
||||
disabled,
|
||||
isRecording,
|
||||
startRecording,
|
||||
manualStopCount,
|
||||
]);
|
||||
|
||||
// New sessions must start with an explicit manual mic press.
|
||||
useEffect(() => {
|
||||
if (isNewSession) {
|
||||
hasManualRecordStartRef.current = false;
|
||||
}
|
||||
}, [isNewSession]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (autoListenCooldownTimerRef.current) {
|
||||
clearTimeout(autoListenCooldownTimerRef.current);
|
||||
autoListenCooldownTimerRef.current = null;
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (error) {
|
||||
console.error("Voice recorder error:", error);
|
||||
toast.error(error);
|
||||
}
|
||||
}, [error]);
|
||||
|
||||
// Icon: show loader when processing, otherwise mic
|
||||
const icon = isProcessing ? SimpleLoader : SvgMicrophone;
|
||||
|
||||
// Disable when processing or TTS is playing (don't want to pick up TTS audio)
|
||||
const isDisabled =
|
||||
disabled ||
|
||||
isProcessing ||
|
||||
isTTSPlaying ||
|
||||
isTTSLoading ||
|
||||
isAwaitingAutoPlaybackStart;
|
||||
|
||||
// Recording = darkened (primary), not recording = light (tertiary)
|
||||
const prominence = isRecording ? "primary" : "tertiary";
|
||||
|
||||
return (
|
||||
<Disabled disabled={isDisabled}>
|
||||
<Button
|
||||
icon={icon}
|
||||
onClick={handleClick}
|
||||
aria-label={isRecording ? "Stop recording" : "Start recording"}
|
||||
prominence={prominence}
|
||||
/>
|
||||
</Disabled>
|
||||
);
|
||||
}
|
||||
|
||||
export default MicrophoneButton;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user