Compare commits

..

34 Commits

Author SHA1 Message Date
Bo-Onyx
9990dd450b feat(hook): Frontend connect a hook 2026-03-24 17:59:02 -07:00
Bo-Onyx
92199d2e44 address comments 2026-03-24 17:58:56 -07:00
Bo-Onyx
25efb8d1e0 feat(hook): Add frontend feature control and admin hook page 2026-03-24 17:57:45 -07:00
Bo-Onyx
b20a5ebf69 feat(hook): Add frontend feature control and admin hook page (#9575) 2026-03-25 00:37:37 +00:00
Bo-Onyx
8645adb807 fix(width): UI update model width definition. (#9613) 2026-03-25 00:11:32 +00:00
Nikolas Garza
2425bd4d8d feat(groups): add shared resources and token limit sections (#9538) 2026-03-24 23:44:44 +00:00
Raunak Bhagat
333b2b19cb refactor: fix sidebar layout (#9601)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-24 23:22:00 +00:00
Jamison Lahman
44895b3bd6 fix(ux): disable MCP Tools toggle if needs authenticated (#9607) 2026-03-24 22:45:23 +00:00
Raunak Bhagat
78c2ecf99f refactor(opal): restructure Onyx logo icons into composable parts (#9606)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-24 22:26:28 +00:00
Ciaran Sweet
e3e0e04edc fix: update values.yaml comment for opensearch admin password secretKeyRef (#9595) 2026-03-24 21:54:03 +00:00
Justin Tahara
a19fe03bd8 fix(ui): Text focused paste from PowerPoint (#9603) 2026-03-24 21:23:58 +00:00
Nikolas Garza
415c05b5f8 feat(groups): add create group page (#9515)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 20:55:18 +00:00
Nikolas Garza
352fd19f0a feat(admin): inline group renaming (#9491) 2026-03-24 20:12:17 +00:00
Raunak Bhagat
41ae039bfa refactor(opal): cleanup button types in Opal (#9598) 2026-03-24 20:06:39 +00:00
Bo-Onyx
782c734287 feat(hook): integrate query processing hook point (#9533) 2026-03-24 19:47:17 +00:00
Justin Tahara
728cdb0715 feat(helm): Adding pginto specific host (#9600) 2026-03-24 19:31:02 +00:00
Justin Tahara
baf6437117 fix(mt): Preprovision all tenants at once (#9576) 2026-03-24 19:13:10 +00:00
Raunak Bhagat
f187165077 refactor(opal): opalify FilterButton + migrate all instances away from old one (#9597) 2026-03-24 19:12:00 +00:00
Evan Lohn
727be3d663 fix: eager load chat session persona (#9577) 2026-03-24 19:03:57 +00:00
Jessica Singh
98c8f9884b fix(voice): add WebSocket upgrade headers to nginx configs (#9558)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 18:53:39 +00:00
Jamison Lahman
d79a068984 fix(fe): settings page layout shift on load (#9594) 2026-03-24 18:15:54 +00:00
Wenxi
ba0740d15f fix(fe): map snake_case auth type API response to camelCase (#9586)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 18:01:51 +00:00
Jamison Lahman
86b7bed90b chore(gha): basic test selection for external deps and connector tests (#9596)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-24 10:52:40 -07:00
Jamison Lahman
aead6ab9a5 fix(fe): properly style "Sign In" button (#9480) 2026-03-24 10:02:02 -07:00
Jamison Lahman
c9d4c186dd chore(blame): ignore ruff formatting change (#9345) 2026-03-24 10:01:23 -07:00
Jamison Lahman
70aad1ec46 fix(fe): editing an LLM provider uses the global default model (#9502) 2026-03-24 10:01:07 -07:00
Wenxi
ca3cc16ead fix(fe): stop SWR retry spam and spurious logout on auth pages (#9587) 2026-03-24 16:53:56 +00:00
Jamison Lahman
9ea1780ce5 chore(fe): memory input defaults to 1 row with max of 3 (#9563) 2026-03-24 09:35:47 -07:00
Jamison Lahman
f70e5e605e feat(ux): handle when chat session id cannot be found (#9524) 2026-03-24 16:18:52 +00:00
Jamison Lahman
84b134e226 fix(a11y): hidden buttons appear on tabbing (#9518) 2026-03-24 16:18:11 +00:00
Raunak Bhagat
b17c63a7d6 feat(admin): refresh agents page with DataTable and opal components (#9376) 2026-03-24 16:04:10 +00:00
Jamison Lahman
76c41d1b0b chore(fe): always load an empty memory card (#9560) 2026-03-24 15:47:12 +00:00
Jamison Lahman
579b86f1ce chore(fe): memories save after pressing enter (#9553)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-24 15:42:36 +00:00
Jamison Lahman
a53cf13db1 chore(fe): position memories modal at the top (#9554) 2026-03-24 15:42:32 +00:00
192 changed files with 5907 additions and 2145 deletions

View File

@@ -6,3 +6,4 @@
3134e5f840c12c8f32613ce520101a047c89dcc2 # refactor(whitespace): rm temporary react fragments (#7161)
ed3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4 # refactor(whitespace): rm react fragment #7190
7b927e79c25f4ddfd18a067f489e122acd2c89de # chore(format): format files where `ruff` and `black` agree (#9339)

View File

@@ -7,6 +7,15 @@ on:
merge_group:
pull_request:
branches: [main]
paths:
- "backend/**"
- "pyproject.toml"
- "uv.lock"
- ".github/workflows/pr-external-dependency-unit-tests.yml"
- ".github/actions/setup-python-and-install-dependencies/**"
- ".github/actions/setup-playwright/**"
- "deployment/docker_compose/docker-compose.yml"
- "deployment/docker_compose/docker-compose.dev.yml"
push:
tags:
- "v*.*.*"

View File

@@ -7,6 +7,13 @@ on:
merge_group:
pull_request:
branches: [main]
paths:
- "backend/**"
- "pyproject.toml"
- "uv.lock"
- ".github/workflows/pr-python-connector-tests.yml"
- ".github/actions/setup-python-and-install-dependencies/**"
- ".github/actions/setup-playwright/**"
push:
tags:
- "v*.*.*"

View File

@@ -25,10 +25,13 @@ from onyx.redis.redis_pool import get_redis_client
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import TENANT_ID_PREFIX
# Soft time limit for tenant pre-provisioning tasks (in seconds)
_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 5 # 5 minutes
# Hard time limit for tenant pre-provisioning tasks (in seconds)
_TENANT_PROVISIONING_TIME_LIMIT = 60 * 10 # 10 minutes
# Maximum tenants to provision in a single task run.
# Each tenant takes ~80s (alembic migrations), so 5 tenants ≈ 7 minutes.
_MAX_TENANTS_PER_RUN = 5
# Time limits sized for worst-case batch: _MAX_TENANTS_PER_RUN × ~90s + buffer.
_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 10 # 10 minutes
_TENANT_PROVISIONING_TIME_LIMIT = 60 * 15 # 15 minutes
@shared_task(
@@ -85,9 +88,26 @@ def check_available_tenants(self: Task) -> None: # noqa: ARG001
f"To provision: {tenants_to_provision}"
)
# just provision one tenant each time we run this ... increase if needed.
if tenants_to_provision > 0:
pre_provision_tenant()
batch_size = min(tenants_to_provision, _MAX_TENANTS_PER_RUN)
if batch_size < tenants_to_provision:
task_logger.info(
f"Capping batch to {batch_size} "
f"(need {tenants_to_provision}, will catch up next cycle)"
)
provisioned = 0
for i in range(batch_size):
task_logger.info(f"Provisioning tenant {i + 1}/{batch_size}")
try:
if pre_provision_tenant():
provisioned += 1
except Exception:
task_logger.exception(
f"Failed to provision tenant {i + 1}/{batch_size}, "
"continuing with remaining tenants"
)
task_logger.info(f"Provisioning complete: {provisioned}/{batch_size} succeeded")
except Exception:
task_logger.exception("Error in check_available_tenants task")
@@ -101,11 +121,13 @@ def check_available_tenants(self: Task) -> None: # noqa: ARG001
)
def pre_provision_tenant() -> None:
def pre_provision_tenant() -> bool:
"""
Pre-provision a new tenant and store it in the NewAvailableTenant table.
This function fully sets up the tenant with all necessary configurations,
so it's ready to be assigned to a user immediately.
Returns True if a tenant was successfully provisioned, False otherwise.
"""
# The MULTI_TENANT check is now done at the caller level (check_available_tenants)
# rather than inside this function
@@ -118,10 +140,10 @@ def pre_provision_tenant() -> None:
# Allow multiple pre-provisioning tasks to run, but ensure they don't overlap
if not lock_provision.acquire(blocking=False):
task_logger.debug(
"Skipping pre_provision_tenant task because it is already running"
task_logger.warning(
"Skipping pre_provision_tenant — could not acquire provision lock"
)
return
return False
tenant_id: str | None = None
try:
@@ -161,6 +183,7 @@ def pre_provision_tenant() -> None:
db_session.add(new_tenant)
db_session.commit()
task_logger.info(f"Successfully pre-provisioned tenant: {tenant_id}")
return True
except Exception:
db_session.rollback()
task_logger.error(
@@ -184,6 +207,7 @@ def pre_provision_tenant() -> None:
asyncio.run(rollback_tenant_provisioning(tenant_id))
except Exception:
task_logger.exception(f"Error during rollback for tenant: {tenant_id}")
return False
finally:
try:
lock_provision.release()

View File

@@ -800,6 +800,33 @@ def update_user_group(
return db_user_group
def rename_user_group(
db_session: Session,
user_group_id: int,
new_name: str,
) -> UserGroup:
stmt = select(UserGroup).where(UserGroup.id == user_group_id)
db_user_group = db_session.scalar(stmt)
if db_user_group is None:
raise ValueError(f"UserGroup with id '{user_group_id}' not found")
_check_user_group_is_modifiable(db_user_group)
db_user_group.name = new_name
db_user_group.time_last_modified_by_user = func.now()
# CC pair documents in Vespa contain the group name, so we need to
# trigger a sync to update them with the new name.
_mark_user_group__cc_pair_relationships_outdated__no_commit(
db_session=db_session, user_group_id=user_group_id
)
if not DISABLE_VECTOR_DB:
db_user_group.is_up_to_date = False
db_session.commit()
return db_user_group
def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> None:
stmt = select(UserGroup).where(UserGroup.id == user_group_id)
db_user_group = db_session.scalar(stmt)

View File

@@ -4,6 +4,7 @@ from fastapi import HTTPException
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from ee.onyx.db.persona import update_persona_access
from ee.onyx.db.user_group import add_users_to_user_group
from ee.onyx.db.user_group import delete_user_group as db_delete_user_group
from ee.onyx.db.user_group import fetch_user_group
@@ -11,13 +12,16 @@ from ee.onyx.db.user_group import fetch_user_groups
from ee.onyx.db.user_group import fetch_user_groups_for_user
from ee.onyx.db.user_group import insert_user_group
from ee.onyx.db.user_group import prepare_user_group_for_deletion
from ee.onyx.db.user_group import rename_user_group
from ee.onyx.db.user_group import update_user_curator_relationship
from ee.onyx.db.user_group import update_user_group
from ee.onyx.server.user_group.models import AddUsersToUserGroupRequest
from ee.onyx.server.user_group.models import MinimalUserGroupSnapshot
from ee.onyx.server.user_group.models import SetCuratorRequest
from ee.onyx.server.user_group.models import UpdateGroupAgentsRequest
from ee.onyx.server.user_group.models import UserGroup
from ee.onyx.server.user_group.models import UserGroupCreate
from ee.onyx.server.user_group.models import UserGroupRename
from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
@@ -27,6 +31,9 @@ from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.models import UserRole
from onyx.db.persona import get_persona_by_id
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -87,6 +94,32 @@ def create_user_group(
return UserGroup.from_model(db_user_group)
@router.patch("/admin/user-group/rename")
def rename_user_group_endpoint(
rename_request: UserGroupRename,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> UserGroup:
try:
return UserGroup.from_model(
rename_user_group(
db_session=db_session,
user_group_id=rename_request.id,
new_name=rename_request.name,
)
)
except IntegrityError:
raise OnyxError(
OnyxErrorCode.DUPLICATE_RESOURCE,
f"User group with name '{rename_request.name}' already exists.",
)
except ValueError as e:
msg = str(e)
if "not found" in msg.lower():
raise OnyxError(OnyxErrorCode.NOT_FOUND, msg)
raise OnyxError(OnyxErrorCode.CONFLICT, msg)
@router.patch("/admin/user-group/{user_group_id}")
def patch_user_group(
user_group_id: int,
@@ -161,3 +194,38 @@ def delete_user_group(
user_group = fetch_user_group(db_session, user_group_id)
if user_group:
db_delete_user_group(db_session, user_group)
@router.patch("/admin/user-group/{user_group_id}/agents")
def update_group_agents(
user_group_id: int,
request: UpdateGroupAgentsRequest,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> None:
for agent_id in request.added_agent_ids:
persona = get_persona_by_id(
persona_id=agent_id, user=user, db_session=db_session
)
current_group_ids = [g.id for g in persona.groups]
if user_group_id not in current_group_ids:
update_persona_access(
persona_id=agent_id,
creator_user_id=user.id,
db_session=db_session,
group_ids=current_group_ids + [user_group_id],
)
for agent_id in request.removed_agent_ids:
persona = get_persona_by_id(
persona_id=agent_id, user=user, db_session=db_session
)
current_group_ids = [g.id for g in persona.groups]
update_persona_access(
persona_id=agent_id,
creator_user_id=user.id,
db_session=db_session,
group_ids=[gid for gid in current_group_ids if gid != user_group_id],
)
db_session.commit()

View File

@@ -104,6 +104,16 @@ class AddUsersToUserGroupRequest(BaseModel):
user_ids: list[UUID]
class UserGroupRename(BaseModel):
id: int
name: str
class SetCuratorRequest(BaseModel):
user_id: UUID
is_curator: bool
class UpdateGroupAgentsRequest(BaseModel):
added_agent_ids: list[int]
removed_agent_ids: list[int]

View File

@@ -59,6 +59,7 @@ from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_or_create_root_message
from onyx.db.chat import reserve_message_id
from onyx.db.enums import HookPoint
from onyx.db.memory import get_memories
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
@@ -68,11 +69,19 @@ from onyx.db.models import UserFile
from onyx.db.projects import get_user_files_from_project
from onyx.db.tools import get_tools
from onyx.deep_research.dr_loop import run_deep_research_llm_loop
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import log_onyx_error
from onyx.error_handling.exceptions import OnyxError
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.file_store.utils import load_in_memory_chat_files
from onyx.file_store.utils import verify_user_files
from onyx.hooks.executor import execute_hook
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.query_processing import QueryProcessingPayload
from onyx.hooks.points.query_processing import QueryProcessingResponse
from onyx.llm.factory import get_llm_for_persona
from onyx.llm.factory import get_llm_token_counter
from onyx.llm.interfaces import LLM
@@ -424,6 +433,28 @@ def determine_search_params(
)
def _resolve_query_processing_hook_result(
hook_result: QueryProcessingResponse | HookSkipped | HookSoftFailed,
message_text: str,
) -> str:
"""Apply the Query Processing hook result to the message text.
Returns the (possibly rewritten) message text, or raises OnyxError with
QUERY_REJECTED if the hook signals rejection (query is null or empty).
HookSkipped and HookSoftFailed are pass-throughs — the original text is
returned unchanged.
"""
if isinstance(hook_result, (HookSkipped, HookSoftFailed)):
return message_text
if not (hook_result.query and hook_result.query.strip()):
raise OnyxError(
OnyxErrorCode.QUERY_REJECTED,
hook_result.rejection_message
or "The hook extension for query processing did not return a valid query. No rejection reason was provided.",
)
return hook_result.query.strip()
def handle_stream_message_objects(
new_msg_req: SendMessageRequest,
user: User,
@@ -474,16 +505,24 @@ def handle_stream_message_objects(
db_session=db_session,
)
yield CreateChatSessionID(chat_session_id=chat_session.id)
chat_session = get_chat_session_by_id(
chat_session_id=chat_session.id,
user_id=user_id,
db_session=db_session,
eager_load_persona=True,
)
else:
chat_session = get_chat_session_by_id(
chat_session_id=new_msg_req.chat_session_id,
user_id=user_id,
db_session=db_session,
eager_load_persona=True,
)
persona = chat_session.persona
message_text = new_msg_req.message
user_identity = LLMUserIdentity(
user_id=llm_user_identifier, session_id=str(chat_session.id)
)
@@ -575,6 +614,28 @@ def handle_stream_message_objects(
if parent_message.message_type == MessageType.USER:
user_message = parent_message
else:
# New message — run the Query Processing hook before saving to DB.
# Skipped on regeneration: the message already exists and was accepted previously.
# Skip the hook for empty/whitespace-only messages — no meaningful query
# to process, and SendMessageRequest.message has no min_length guard.
if message_text.strip():
hook_result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=QueryProcessingPayload(
query=message_text,
# Pass None for anonymous users or authenticated users without an email
# (e.g. some SSO flows). QueryProcessingPayload.user_email is str | None,
# so None is accepted and serialised as null in both cases.
user_email=None if user.is_anonymous else user.email,
chat_session_id=str(chat_session.id),
).model_dump(),
response_type=QueryProcessingResponse,
)
message_text = _resolve_query_processing_hook_result(
hook_result, message_text
)
user_message = create_new_chat_message(
chat_session_id=chat_session.id,
parent_message=parent_message,
@@ -914,6 +975,17 @@ def handle_stream_message_objects(
state_container=state_container,
)
except OnyxError as e:
if e.error_code is not OnyxErrorCode.QUERY_REJECTED:
log_onyx_error(e)
yield StreamingError(
error=e.detail,
error_code=e.error_code.code,
is_retryable=e.status_code >= 500,
)
db_session.rollback()
return
except ValueError as e:
logger.exception("Failed to process chat message.")

View File

@@ -787,10 +787,6 @@ MINI_CHUNK_SIZE = 150
# This is the number of regular chunks per large chunk
LARGE_CHUNK_RATIO = 4
# The maximum number of chunks that can be held for 1 document processing batch
# The purpose of this is to set an upper bound on memory usage
MAX_CHUNKS_PER_DOC_BATCH = int(os.environ.get("MAX_CHUNKS_PER_DOC_BATCH") or 1000)
# Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out
# We don't want the metadata to overwhelm the actual contents of the chunk
SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() == "true"

View File

@@ -16,6 +16,7 @@ from sqlalchemy import Row
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
@@ -28,6 +29,7 @@ from onyx.db.models import ChatMessage
from onyx.db.models import ChatMessage__SearchDoc
from onyx.db.models import ChatSession
from onyx.db.models import ChatSessionSharedStatus
from onyx.db.models import Persona
from onyx.db.models import SearchDoc as DBSearchDoc
from onyx.db.models import ToolCall
from onyx.db.models import User
@@ -53,9 +55,19 @@ def get_chat_session_by_id(
db_session: Session,
include_deleted: bool = False,
is_shared: bool = False,
eager_load_persona: bool = False,
) -> ChatSession:
stmt = select(ChatSession).where(ChatSession.id == chat_session_id)
if eager_load_persona:
stmt = stmt.options(
joinedload(ChatSession.persona).options(
selectinload(Persona.tools),
selectinload(Persona.user_files),
),
joinedload(ChatSession.project),
)
if is_shared:
stmt = stmt.where(ChatSession.shared_status == ChatSessionSharedStatus.PUBLIC)
else:

View File

@@ -5,7 +5,6 @@ accidentally reaches the vector DB layer will fail loudly instead of timing
out against a nonexistent Vespa/OpenSearch instance.
"""
from collections.abc import Iterable
from typing import Any
from onyx.context.search.models import IndexFilters
@@ -67,7 +66,7 @@ class DisabledDocumentIndex(DocumentIndex):
# ------------------------------------------------------------------
def index(
self,
chunks: Iterable[DocMetadataAwareIndexChunk], # noqa: ARG002
chunks: list[DocMetadataAwareIndexChunk], # noqa: ARG002
index_batch_params: IndexBatchParams, # noqa: ARG002
) -> set[DocumentInsertionRecord]:
raise RuntimeError(VECTOR_DB_DISABLED_ERROR)

View File

@@ -1,5 +1,4 @@
import abc
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import datetime
from typing import Any
@@ -207,7 +206,7 @@ class Indexable(abc.ABC):
@abc.abstractmethod
def index(
self,
chunks: Iterable[DocMetadataAwareIndexChunk],
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[DocumentInsertionRecord]:
"""
@@ -227,8 +226,8 @@ class Indexable(abc.ABC):
it is done automatically outside of this code.
Parameters:
- chunks: Document chunks with all of the information needed for
indexing to the document index.
- chunks: Document chunks with all of the information needed for indexing to the document
index.
- tenant_id: The tenant id of the user whose chunks are being indexed
- large_chunks_enabled: Whether large chunks are enabled

View File

@@ -1,5 +1,4 @@
import abc
from collections.abc import Iterable
from typing import Self
from pydantic import BaseModel
@@ -210,10 +209,10 @@ class Indexable(abc.ABC):
@abc.abstractmethod
def index(
self,
chunks: Iterable[DocMetadataAwareIndexChunk],
chunks: list[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata,
) -> list[DocumentInsertionRecord]:
"""Indexes an iterable of document chunks into the document index.
"""Indexes a list of document chunks into the document index.
This is often a batch operation including chunks from multiple
documents.

View File

@@ -1,12 +1,11 @@
import json
from collections.abc import Iterable
from collections import defaultdict
from typing import Any
import httpx
from opensearchpy import NotFoundError
from onyx.access.models import DocumentAccess
from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -351,7 +350,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
def index(
self,
chunks: Iterable[DocMetadataAwareIndexChunk],
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
@@ -647,10 +646,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
def index(
self,
chunks: Iterable[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata,
chunks: list[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata, # noqa: ARG002
) -> list[DocumentInsertionRecord]:
"""Indexes an iterable of document chunks into the document index.
"""Indexes a list of document chunks into the document index.
Groups chunks by document ID and for each document, deletes existing
chunks and indexes the new chunks in bulk.
@@ -673,34 +672,29 @@ class OpenSearchDocumentIndex(DocumentIndex):
document is newly indexed or had already existed and was just
updated.
"""
total_chunks = sum(
cc.new_chunk_cnt
for cc in indexing_metadata.doc_id_to_chunk_cnt_diff.values()
# Group chunks by document ID.
doc_id_to_chunks: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(
list
)
for chunk in chunks:
doc_id_to_chunks[chunk.source_document.id].append(chunk)
logger.debug(
f"[OpenSearchDocumentIndex] Indexing {total_chunks} chunks from {len(indexing_metadata.doc_id_to_chunk_cnt_diff)} "
f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks from {len(doc_id_to_chunks)} "
f"documents for index {self._index_name}."
)
document_indexing_results: list[DocumentInsertionRecord] = []
deleted_doc_ids: set[str] = set()
# Buffer chunks per document as they arrive from the iterable.
# When the document ID changes flush the buffered chunks.
current_doc_id: str | None = None
current_chunks: list[DocMetadataAwareIndexChunk] = []
def _flush_chunks(doc_chunks: list[DocMetadataAwareIndexChunk]) -> None:
assert len(doc_chunks) > 0, "doc_chunks is empty"
# Try to index per-document.
for _, chunks in doc_id_to_chunks.items():
# Create a batch of OpenSearch-formatted chunks for bulk insertion.
# Since we are doing this in batches, an error occurring midway
# can result in a state where chunks are deleted and not all the
# new chunks have been indexed.
# Do this before deleting existing chunks to reduce the amount of
# time the document index has no content for a given document, and
# to reduce the chance of entering a state where we delete chunks,
# then some error happens, and never successfully index new chunks.
chunk_batch: list[DocumentChunk] = [
_convert_onyx_chunk_to_opensearch_document(chunk)
for chunk in doc_chunks
_convert_onyx_chunk_to_opensearch_document(chunk) for chunk in chunks
]
onyx_document: Document = doc_chunks[0].source_document
onyx_document: Document = chunks[0].source_document
# First delete the doc's chunks from the index. This is so that
# there are no dangling chunks in the index, in the event that the
# new document's content contains fewer chunks than the previous
@@ -709,43 +703,22 @@ class OpenSearchDocumentIndex(DocumentIndex):
# if the chunk count has actually decreased. This assumes that
# overlapping chunks are perfectly overwritten. If we can't
# guarantee that then we need the code as-is.
if onyx_document.id not in deleted_doc_ids:
num_chunks_deleted = self.delete(
onyx_document.id, onyx_document.chunk_count
)
deleted_doc_ids.add(onyx_document.id)
# If we see that chunks were deleted we assume the doc already
# existed. We record the result before bulk_index_documents
# runs. If indexing raises, this entire result list is discarded
# by the caller's retry logic, so early recording is safe.
document_indexing_results.append(
DocumentInsertionRecord(
document_id=onyx_document.id,
already_existed=num_chunks_deleted > 0,
)
)
num_chunks_deleted = self.delete(
onyx_document.id, onyx_document.chunk_count
)
# If we see that chunks were deleted we assume the doc already
# existed.
document_insertion_record = DocumentInsertionRecord(
document_id=onyx_document.id,
already_existed=num_chunks_deleted > 0,
)
# Now index. This will raise if a chunk of the same ID exists, which
# we do not expect because we should have deleted all chunks.
self._client.bulk_index_documents(
documents=chunk_batch,
tenant_state=self._tenant_state,
)
for chunk in chunks:
doc_id = chunk.source_document.id
if doc_id != current_doc_id:
if current_chunks:
_flush_chunks(current_chunks)
current_doc_id = doc_id
current_chunks = [chunk]
elif len(current_chunks) >= MAX_CHUNKS_PER_DOC_BATCH:
_flush_chunks(current_chunks)
current_chunks = [chunk]
else:
current_chunks.append(chunk)
if current_chunks:
_flush_chunks(current_chunks)
document_indexing_results.append(document_insertion_record)
return document_indexing_results

View File

@@ -6,7 +6,6 @@ import re
import time
import urllib
import zipfile
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import datetime
from datetime import timedelta
@@ -462,7 +461,7 @@ class VespaIndex(DocumentIndex):
def index(
self,
chunks: Iterable[DocMetadataAwareIndexChunk],
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""

View File

@@ -1,8 +1,6 @@
import concurrent.futures
import logging
import random
from collections.abc import Generator
from collections.abc import Iterable
from typing import Any
from uuid import UUID
@@ -10,7 +8,6 @@ import httpx
from pydantic import BaseModel
from retry import retry
from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
from onyx.configs.app_configs import RECENCY_BIAS_MULTIPLIER
from onyx.configs.app_configs import RERANK_COUNT
from onyx.configs.chat_configs import DOC_TIME_DECAY
@@ -321,7 +318,7 @@ class VespaDocumentIndex(DocumentIndex):
def index(
self,
chunks: Iterable[DocMetadataAwareIndexChunk],
chunks: list[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata,
) -> list[DocumentInsertionRecord]:
doc_id_to_chunk_cnt_diff = indexing_metadata.doc_id_to_chunk_cnt_diff
@@ -341,31 +338,22 @@ class VespaDocumentIndex(DocumentIndex):
# Vespa has restrictions on valid characters, yet document IDs come from
# external w.r.t. this class. We need to sanitize them.
#
# Instead of materializing all cleaned chunks upfront, we stream them
# through a generator that cleans IDs and builds the original-ID mapping
# incrementally as chunks flow into Vespa.
def _clean_and_track(
chunks_iter: Iterable[DocMetadataAwareIndexChunk],
id_map: dict[str, str],
seen_ids: set[str],
) -> Generator[DocMetadataAwareIndexChunk, None, None]:
"""Cleans chunk IDs and builds the original-ID mapping
incrementally as chunks flow through, avoiding a separate
materialization pass."""
for chunk in chunks_iter:
original_id = chunk.source_document.id
cleaned = clean_chunk_id_copy(chunk)
cleaned_id = cleaned.source_document.id
# Needed so the final DocumentInsertionRecord returned can have
# the original document ID. cleaned_chunks might not contain IDs
# exactly as callers supplied them.
id_map[cleaned_id] = original_id
seen_ids.add(cleaned_id)
yield cleaned
cleaned_chunks: list[DocMetadataAwareIndexChunk] = [
clean_chunk_id_copy(chunk) for chunk in chunks
]
assert len(cleaned_chunks) == len(
chunks
), "Bug: Cleaned chunks and input chunks have different lengths."
new_document_id_to_original_document_id: dict[str, str] = {}
all_cleaned_doc_ids: set[str] = set()
# Needed so the final DocumentInsertionRecord returned can have the
# original document ID. cleaned_chunks might not contain IDs exactly as
# callers supplied them.
new_document_id_to_original_document_id: dict[str, str] = dict()
for i, cleaned_chunk in enumerate(cleaned_chunks):
old_chunk = chunks[i]
new_document_id_to_original_document_id[
cleaned_chunk.source_document.id
] = old_chunk.source_document.id
existing_docs: set[str] = set()
@@ -421,16 +409,8 @@ class VespaDocumentIndex(DocumentIndex):
executor=executor,
)
# Insert new Vespa documents, streaming through the cleaning
# pipeline so chunks are never fully materialized.
cleaned_chunks = _clean_and_track(
chunks,
new_document_id_to_original_document_id,
all_cleaned_doc_ids,
)
for chunk_batch in batch_generator(
cleaned_chunks, min(BATCH_SIZE, MAX_CHUNKS_PER_DOC_BATCH)
):
# Insert new Vespa documents.
for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE):
batch_index_vespa_chunks(
chunks=chunk_batch,
index_name=self._index_name,
@@ -439,6 +419,10 @@ class VespaDocumentIndex(DocumentIndex):
executor=executor,
)
all_cleaned_doc_ids: set[str] = {
chunk.source_document.id for chunk in cleaned_chunks
}
return [
DocumentInsertionRecord(
document_id=new_document_id_to_original_document_id[cleaned_doc_id],

View File

@@ -44,6 +44,7 @@ class OnyxErrorCode(Enum):
VALIDATION_ERROR = ("VALIDATION_ERROR", 400)
INVALID_INPUT = ("INVALID_INPUT", 400)
MISSING_REQUIRED_FIELD = ("MISSING_REQUIRED_FIELD", 400)
QUERY_REJECTED = ("QUERY_REJECTED", 400)
# ------------------------------------------------------------------
# Not Found (404)

View File

@@ -5,6 +5,7 @@ Usage (Celery tasks and FastAPI handlers):
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
response_type=QueryProcessingResponse,
)
if isinstance(result, HookSkipped):
@@ -14,7 +15,7 @@ Usage (Celery tasks and FastAPI handlers):
# hook failed but fail strategy is SOFT — continue with original behavior
...
else:
# result is the response payload dict from the customer's endpoint
# result is a validated Pydantic model instance (spec.response_model)
...
is_reachable update policy
@@ -53,9 +54,11 @@ The executor uses three sessions:
import json
import time
from typing import Any
from typing import TypeVar
import httpx
from pydantic import BaseModel
from pydantic import ValidationError
from sqlalchemy.orm import Session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -81,6 +84,9 @@ class HookSoftFailed:
"""Hook was called but failed with SOFT fail strategy — continuing."""
T = TypeVar("T", bound=BaseModel)
# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------
@@ -268,22 +274,21 @@ def _persist_result(
# ---------------------------------------------------------------------------
def execute_hook(
*,
db_session: Session,
hook_point: HookPoint,
def _execute_hook_inner(
hook: Hook,
payload: dict[str, Any],
) -> dict[str, Any] | HookSkipped | HookSoftFailed:
"""Execute the hook for the given hook point synchronously."""
hook = _lookup_hook(db_session, hook_point)
if isinstance(hook, HookSkipped):
return hook
response_type: type[T],
) -> T | HookSoftFailed:
"""Make the HTTP call, validate the response, and return a typed model.
Raises OnyxError on HARD failure. Returns HookSoftFailed on SOFT failure.
"""
timeout = hook.timeout_seconds
hook_id = hook.id
fail_strategy = hook.fail_strategy
endpoint_url = hook.endpoint_url
current_is_reachable: bool | None = hook.is_reachable
if not endpoint_url:
raise ValueError(
f"hook_id={hook_id} is active but has no endpoint_url — "
@@ -300,13 +305,36 @@ def execute_hook(
headers: dict[str, str] = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
with httpx.Client(timeout=timeout) as client:
with httpx.Client(
timeout=timeout, follow_redirects=False
) as client: # SSRF guard: never follow redirects
response = client.post(endpoint_url, json=payload, headers=headers)
except Exception as e:
exc = e
duration_ms = int((time.monotonic() - start) * 1000)
outcome = _process_response(response=response, exc=exc, timeout=timeout)
# Validate the response payload against response_type.
# A validation failure downgrades the outcome to a failure so it is logged,
# is_reachable is left unchanged (server responded — just a bad payload),
# and fail_strategy is respected below.
validated_model: T | None = None
if outcome.is_success and outcome.response_payload is not None:
try:
validated_model = response_type.model_validate(outcome.response_payload)
except ValidationError as e:
msg = (
f"Hook response failed validation against {response_type.__name__}: {e}"
)
outcome = _HttpOutcome(
is_success=False,
updated_is_reachable=None, # server responded — reachability unchanged
status_code=outcome.status_code,
error_message=msg,
response_payload=None,
)
# Skip the is_reachable write when the value would not change — avoids a
# no-op DB round-trip on every call when the hook is already in the expected state.
if outcome.updated_is_reachable == current_is_reachable:
@@ -323,8 +351,41 @@ def execute_hook(
f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
)
return HookSoftFailed()
if outcome.response_payload is None:
raise ValueError(
f"response_payload is None for successful hook call (hook_id={hook_id})"
if validated_model is None:
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
f"validated_model is None for successful hook call (hook_id={hook_id})",
)
return outcome.response_payload
return validated_model
def execute_hook(
*,
db_session: Session,
hook_point: HookPoint,
payload: dict[str, Any],
response_type: type[T],
) -> T | HookSkipped | HookSoftFailed:
"""Execute the hook for the given hook point synchronously.
Returns HookSkipped if no active hook is configured, HookSoftFailed if the
hook failed with SOFT fail strategy, or a validated response model on success.
Raises OnyxError on HARD failure or if the hook is misconfigured.
"""
hook = _lookup_hook(db_session, hook_point)
if isinstance(hook, HookSkipped):
return hook
fail_strategy = hook.fail_strategy
hook_id = hook.id
try:
return _execute_hook_inner(hook, payload, response_type)
except Exception:
if fail_strategy == HookFailStrategy.SOFT:
logger.exception(
f"Unexpected error in hook execution (soft fail) for hook_id={hook_id}"
)
return HookSoftFailed()
raise

View File

@@ -91,6 +91,8 @@ class HookResponse(BaseModel):
# Nullable to match the DB column — endpoint_url is required on creation but
# future hook point types may not use an external endpoint (e.g. built-in handlers).
endpoint_url: str | None
# Partially-masked API key (e.g. "abcd••••••••wxyz"), or None if no key is set.
api_key_masked: str | None
fail_strategy: HookFailStrategy
timeout_seconds: float # always resolved — None from request is replaced with spec default before DB write
is_active: bool

View File

@@ -51,13 +51,12 @@ class HookPointSpec:
output_schema: ClassVar[dict[str, Any]]
def __init_subclass__(cls, **kwargs: object) -> None:
"""Enforce that every concrete subclass declares all required class attributes.
"""Enforce that every subclass declares all required class attributes.
Called automatically by Python whenever a class inherits from HookPointSpec.
Abstract subclasses (those still carrying unimplemented abstract methods) are
skipped — they are intermediate base classes and may not yet define everything.
Only fully concrete subclasses are validated, ensuring a clear TypeError at
import time rather than a confusing AttributeError at runtime.
Raises TypeError at import time if any required attribute is missing or if
payload_model / response_model are not Pydantic BaseModel subclasses.
input_schema and output_schema are derived automatically from the models.
"""
super().__init_subclass__(**kwargs)
missing = [attr for attr in _REQUIRED_ATTRS if not hasattr(cls, attr)]

View File

@@ -26,6 +26,8 @@ class DocumentIngestionSpec(HookPointSpec):
default_timeout_seconds = 30.0
fail_hard_description = "The document will not be indexed."
default_fail_strategy = HookFailStrategy.HARD
# TODO(Bo-Onyx): update later
docs_url = "https://docs.google.com/document/d/1pGhB8Wcnhhj8rS4baEJL6CX05yFhuIDNk1gbBRiWu94/edit?tab=t.ue263ual5vdi"
payload_model = DocumentIngestionPayload
response_model = DocumentIngestionResponse

View File

@@ -15,7 +15,7 @@ class QueryProcessingPayload(BaseModel):
description="Email of the user submitting the query, or null if unauthenticated."
)
chat_session_id: str = Field(
description="UUID of the chat session. Always present — the session is guaranteed to exist by the time this hook fires."
description="UUID of the chat session, formatted as a hyphenated lowercase string (e.g. '550e8400-e29b-41d4-a716-446655440000'). Always present — the session is guaranteed to exist by the time this hook fires."
)
@@ -25,7 +25,7 @@ class QueryProcessingResponse(BaseModel):
default=None,
description=(
"The query to use in the pipeline. "
"Null, empty string, or absent = reject the query."
"Null, empty string, whitespace-only, or absent = reject the query."
),
)
rejection_message: str | None = Field(
@@ -65,6 +65,8 @@ class QueryProcessingSpec(HookPointSpec):
"The query will be blocked and the user will see an error message."
)
default_fail_strategy = HookFailStrategy.HARD
# TODO(Bo-Onyx): update later
docs_url = "https://docs.google.com/document/d/1pGhB8Wcnhhj8rS4baEJL6CX05yFhuIDNk1gbBRiWu94/edit?tab=t.g2r1a1699u87"
payload_model = QueryProcessingPayload
response_model = QueryProcessingResponse

View File

@@ -1,5 +1,3 @@
from __future__ import annotations
import contextlib
from collections.abc import Generator
@@ -21,8 +19,7 @@ from onyx.db.document import update_docs_updated_at__no_commit
from onyx.db.document_set import fetch_document_sets_for_documents
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.indexing.models import ChunkEnrichmentContext
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import BuildMetadataAwareChunksResult
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import UpdatableChunkData
@@ -88,21 +85,14 @@ class DocumentIndexingBatchAdapter:
) as transaction:
yield transaction
def prepare_enrichment(
def build_metadata_aware_chunks(
self,
context: DocumentBatchPrepareContext,
chunks_with_embeddings: list[IndexChunk],
chunk_content_scores: list[float],
tenant_id: str,
chunks: list[DocAwareChunk],
) -> DocumentChunkEnricher:
"""Do all DB lookups once and return a per-chunk enricher."""
updatable_ids = [doc.id for doc in context.updatable_docs]
doc_id_to_new_chunk_cnt: dict[str, int] = {
doc_id: 0 for doc_id in updatable_ids
}
for chunk in chunks:
if chunk.source_document.id in doc_id_to_new_chunk_cnt:
doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
context: DocumentBatchPrepareContext,
) -> BuildMetadataAwareChunksResult:
"""Enrich chunks with access, document sets, boosts, token counts, and hierarchy."""
no_access = DocumentAccess.build(
user_emails=[],
@@ -112,30 +102,67 @@ class DocumentIndexingBatchAdapter:
is_public=False,
)
return DocumentChunkEnricher(
doc_id_to_access_info=get_access_for_documents(
updatable_ids = [doc.id for doc in context.updatable_docs]
doc_id_to_access_info = get_access_for_documents(
document_ids=updatable_ids, db_session=self.db_session
)
doc_id_to_document_set = {
document_id: document_sets
for document_id, document_sets in fetch_document_sets_for_documents(
document_ids=updatable_ids, db_session=self.db_session
),
doc_id_to_document_set={
document_id: document_sets
for document_id, document_sets in fetch_document_sets_for_documents(
document_ids=updatable_ids, db_session=self.db_session
)
},
doc_id_to_ancestor_ids=self._get_ancestor_ids_for_documents(
context.updatable_docs, tenant_id
),
id_to_boost_map=context.id_to_boost_map,
doc_id_to_previous_chunk_cnt={
document_id: chunk_count
for document_id, chunk_count in fetch_chunk_counts_for_documents(
document_ids=updatable_ids,
db_session=self.db_session,
)
},
doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
no_access=no_access,
tenant_id=tenant_id,
)
}
doc_id_to_previous_chunk_cnt: dict[str, int] = {
document_id: chunk_count
for document_id, chunk_count in fetch_chunk_counts_for_documents(
document_ids=updatable_ids,
db_session=self.db_session,
)
}
doc_id_to_new_chunk_cnt: dict[str, int] = {
doc_id: 0 for doc_id in updatable_ids
}
for chunk in chunks_with_embeddings:
if chunk.source_document.id in doc_id_to_new_chunk_cnt:
doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
# Get ancestor hierarchy node IDs for each document
doc_id_to_ancestor_ids = self._get_ancestor_ids_for_documents(
context.updatable_docs, tenant_id
)
access_aware_chunks = [
DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
document_sets=set(
doc_id_to_document_set.get(chunk.source_document.id, [])
),
user_project=[],
personas=[],
boost=(
context.id_to_boost_map[chunk.source_document.id]
if chunk.source_document.id in context.id_to_boost_map
else DEFAULT_BOOST
),
tenant_id=tenant_id,
aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
ancestor_hierarchy_node_ids=doc_id_to_ancestor_ids[
chunk.source_document.id
],
)
for chunk_num, chunk in enumerate(chunks_with_embeddings)
]
return BuildMetadataAwareChunksResult(
chunks=access_aware_chunks,
doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
user_file_id_to_raw_text={},
user_file_id_to_token_count={},
)
def _get_ancestor_ids_for_documents(
@@ -176,7 +203,7 @@ class DocumentIndexingBatchAdapter:
context: DocumentBatchPrepareContext,
updatable_chunk_data: list[UpdatableChunkData],
filtered_documents: list[Document],
enrichment: ChunkEnrichmentContext,
result: BuildMetadataAwareChunksResult,
) -> None:
"""Finalize DB updates, store plaintext, and mark docs as indexed."""
updatable_ids = [doc.id for doc in context.updatable_docs]
@@ -200,7 +227,7 @@ class DocumentIndexingBatchAdapter:
update_docs_chunk_count__no_commit(
document_ids=updatable_ids,
doc_id_to_chunk_count=enrichment.doc_id_to_new_chunk_cnt,
doc_id_to_chunk_count=result.doc_id_to_new_chunk_cnt,
db_session=self.db_session,
)
@@ -222,52 +249,3 @@ class DocumentIndexingBatchAdapter:
)
self.db_session.commit()
class DocumentChunkEnricher:
"""Pre-computed metadata for per-chunk enrichment of connector documents."""
def __init__(
self,
doc_id_to_access_info: dict[str, DocumentAccess],
doc_id_to_document_set: dict[str, list[str]],
doc_id_to_ancestor_ids: dict[str, list[int]],
id_to_boost_map: dict[str, int],
doc_id_to_previous_chunk_cnt: dict[str, int],
doc_id_to_new_chunk_cnt: dict[str, int],
no_access: DocumentAccess,
tenant_id: str,
) -> None:
self._doc_id_to_access_info = doc_id_to_access_info
self._doc_id_to_document_set = doc_id_to_document_set
self._doc_id_to_ancestor_ids = doc_id_to_ancestor_ids
self._id_to_boost_map = id_to_boost_map
self._no_access = no_access
self._tenant_id = tenant_id
self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
def enrich_chunk(
self, chunk: IndexChunk, score: float
) -> DocMetadataAwareIndexChunk:
return DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
access=self._doc_id_to_access_info.get(
chunk.source_document.id, self._no_access
),
document_sets=set(
self._doc_id_to_document_set.get(chunk.source_document.id, [])
),
user_project=[],
personas=[],
boost=(
self._id_to_boost_map[chunk.source_document.id]
if chunk.source_document.id in self._id_to_boost_map
else DEFAULT_BOOST
),
tenant_id=self._tenant_id,
aggregated_chunk_boost_factor=score,
ancestor_hierarchy_node_ids=self._doc_id_to_ancestor_ids[
chunk.source_document.id
],
)

View File

@@ -1,9 +1,6 @@
from __future__ import annotations
import contextlib
import datetime
import time
from collections import defaultdict
from collections.abc import Generator
from uuid import UUID
@@ -27,8 +24,7 @@ from onyx.db.user_file import fetch_persona_ids_for_user_files
from onyx.db.user_file import fetch_user_project_ids_for_user_files
from onyx.file_store.utils import store_user_file_plaintext
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.models import ChunkEnrichmentContext
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import BuildMetadataAwareChunksResult
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import UpdatableChunkData
@@ -105,20 +101,13 @@ class UserFileIndexingAdapter:
f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for user files: {[doc.id for doc in documents]}"
)
def prepare_enrichment(
def build_metadata_aware_chunks(
self,
context: DocumentBatchPrepareContext,
chunks_with_embeddings: list[IndexChunk],
chunk_content_scores: list[float],
tenant_id: str,
chunks: list[DocAwareChunk],
) -> UserFileChunkEnricher:
"""Do all DB lookups and pre-compute file metadata from chunks."""
updatable_ids = [doc.id for doc in context.updatable_docs]
doc_id_to_new_chunk_cnt: dict[str, int] = defaultdict(int)
content_by_file: dict[str, list[str]] = defaultdict(list)
for chunk in chunks:
doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
content_by_file[chunk.source_document.id].append(chunk.content)
context: DocumentBatchPrepareContext,
) -> BuildMetadataAwareChunksResult:
no_access = DocumentAccess.build(
user_emails=[],
@@ -128,6 +117,7 @@ class UserFileIndexingAdapter:
is_public=False,
)
updatable_ids = [doc.id for doc in context.updatable_docs]
user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(
user_file_ids=updatable_ids,
db_session=self.db_session,
@@ -148,6 +138,17 @@ class UserFileIndexingAdapter:
)
}
user_file_id_to_new_chunk_cnt: dict[str, int] = {
user_file_id: len(
[
chunk
for chunk in chunks_with_embeddings
if chunk.source_document.id == user_file_id
]
)
for user_file_id in updatable_ids
}
# Initialize tokenizer used for token count calculation
try:
llm = get_default_llm()
@@ -162,9 +163,15 @@ class UserFileIndexingAdapter:
user_file_id_to_raw_text: dict[str, str] = {}
user_file_id_to_token_count: dict[str, int | None] = {}
for user_file_id in updatable_ids:
contents = content_by_file.get(user_file_id)
if contents:
combined_content = " ".join(contents)
user_file_chunks = [
chunk
for chunk in chunks_with_embeddings
if chunk.source_document.id == user_file_id
]
if user_file_chunks:
combined_content = " ".join(
[chunk.content for chunk in user_file_chunks]
)
user_file_id_to_raw_text[str(user_file_id)] = combined_content
token_count = (
len(llm_tokenizer.encode(combined_content)) if llm_tokenizer else 0
@@ -174,16 +181,28 @@ class UserFileIndexingAdapter:
user_file_id_to_raw_text[str(user_file_id)] = ""
user_file_id_to_token_count[str(user_file_id)] = None
return UserFileChunkEnricher(
user_file_id_to_access=user_file_id_to_access,
user_file_id_to_project_ids=user_file_id_to_project_ids,
user_file_id_to_persona_ids=user_file_id_to_persona_ids,
access_aware_chunks = [
DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
access=user_file_id_to_access.get(chunk.source_document.id, no_access),
document_sets=set(),
user_project=user_file_id_to_project_ids.get(
chunk.source_document.id, []
),
personas=user_file_id_to_persona_ids.get(chunk.source_document.id, []),
boost=DEFAULT_BOOST,
tenant_id=tenant_id,
aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
)
for chunk_num, chunk in enumerate(chunks_with_embeddings)
]
return BuildMetadataAwareChunksResult(
chunks=access_aware_chunks,
doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
doc_id_to_new_chunk_cnt=user_file_id_to_new_chunk_cnt,
user_file_id_to_raw_text=user_file_id_to_raw_text,
user_file_id_to_token_count=user_file_id_to_token_count,
no_access=no_access,
tenant_id=tenant_id,
)
def _notify_assistant_owners_if_files_ready(
@@ -227,9 +246,8 @@ class UserFileIndexingAdapter:
context: DocumentBatchPrepareContext,
updatable_chunk_data: list[UpdatableChunkData], # noqa: ARG002
filtered_documents: list[Document], # noqa: ARG002
enrichment: ChunkEnrichmentContext,
result: BuildMetadataAwareChunksResult,
) -> None:
assert isinstance(enrichment, UserFileChunkEnricher)
user_file_ids = [doc.id for doc in context.updatable_docs]
user_files = (
@@ -245,10 +263,8 @@ class UserFileIndexingAdapter:
user_file.last_project_sync_at = datetime.datetime.now(
datetime.timezone.utc
)
user_file.chunk_count = enrichment.doc_id_to_new_chunk_cnt.get(
str(user_file.id), 0
)
user_file.token_count = enrichment.user_file_id_to_token_count[
user_file.chunk_count = result.doc_id_to_new_chunk_cnt[str(user_file.id)]
user_file.token_count = result.user_file_id_to_token_count[
str(user_file.id)
]
@@ -260,54 +276,8 @@ class UserFileIndexingAdapter:
# Store the plaintext in the file store for faster retrieval
# NOTE: this creates its own session to avoid committing the overall
# transaction.
for user_file_id, raw_text in enrichment.user_file_id_to_raw_text.items():
for user_file_id, raw_text in result.user_file_id_to_raw_text.items():
store_user_file_plaintext(
user_file_id=UUID(user_file_id),
plaintext_content=raw_text,
)
class UserFileChunkEnricher:
"""Pre-computed metadata for per-chunk enrichment of user-uploaded files."""
def __init__(
self,
user_file_id_to_access: dict[str, DocumentAccess],
user_file_id_to_project_ids: dict[str, list[int]],
user_file_id_to_persona_ids: dict[str, list[int]],
doc_id_to_previous_chunk_cnt: dict[str, int],
doc_id_to_new_chunk_cnt: dict[str, int],
user_file_id_to_raw_text: dict[str, str],
user_file_id_to_token_count: dict[str, int | None],
no_access: DocumentAccess,
tenant_id: str,
) -> None:
self._user_file_id_to_access = user_file_id_to_access
self._user_file_id_to_project_ids = user_file_id_to_project_ids
self._user_file_id_to_persona_ids = user_file_id_to_persona_ids
self._no_access = no_access
self._tenant_id = tenant_id
self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
self.user_file_id_to_raw_text = user_file_id_to_raw_text
self.user_file_id_to_token_count = user_file_id_to_token_count
def enrich_chunk(
self, chunk: IndexChunk, score: float
) -> DocMetadataAwareIndexChunk:
return DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=chunk,
access=self._user_file_id_to_access.get(
chunk.source_document.id, self._no_access
),
document_sets=set(),
user_project=self._user_file_id_to_project_ids.get(
chunk.source_document.id, []
),
personas=self._user_file_id_to_persona_ids.get(
chunk.source_document.id, []
),
boost=DEFAULT_BOOST,
tenant_id=self._tenant_id,
aggregated_chunk_boost_factor=score,
)

View File

@@ -1,11 +1,5 @@
import pickle
import tempfile
from collections import defaultdict
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterator
from contextlib import contextmanager
from pathlib import Path
from typing import Protocol
from pydantic import BaseModel
@@ -15,7 +9,6 @@ from sqlalchemy.orm import Session
from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME
from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER
from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
from onyx.configs.app_configs import USE_CHUNK_SUMMARY
@@ -54,8 +47,6 @@ from onyx.indexing.chunker import Chunker
from onyx.indexing.embedder import embed_chunks_with_failure_handling
from onyx.indexing.embedder import IndexingEmbedder
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import IndexingBatchAdapter
from onyx.indexing.models import UpdatableChunkData
from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
@@ -72,7 +63,6 @@ from onyx.natural_language_processing.utils import tokenizer_trim_middle
from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1
from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2
from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT
from onyx.utils.batching import batch_generator
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_documents_for_postgres
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
@@ -101,21 +91,6 @@ class IndexingPipelineResult(BaseModel):
failures: list[ConnectorFailure]
@classmethod
def empty(cls, total_docs: int) -> "IndexingPipelineResult":
return cls(
new_docs=0,
total_docs=total_docs,
total_chunks=0,
failures=[],
)
class ChunkEmbeddingResult(BaseModel):
embedding_path: Path
successful_chunk_ids: list[tuple[int, str]] # (chunk_id, document_id)
connector_failures: list[ConnectorFailure]
class IndexingPipelineProtocol(Protocol):
def __call__(
@@ -164,105 +139,6 @@ def _upsert_documents_in_db(
)
def embed_chunks_in_batches(
chunks: list[DocAwareChunk],
embedder: IndexingEmbedder,
tenant_id: str,
request_id: str | None,
) -> ChunkEmbeddingResult:
"""Embeds chunks in batches of MAX_CHUNKS_PER_DOC_BATCH, spilling each batch to disk.
For each batch:
1. Embed the chunks via embed_chunks_with_failure_handling
2. Pickle the resulting IndexChunks to a temp file
3. Clear the batch from memory
Returns:
- Path to the temp directory containing one pickle file per batch
- Accumulated embedding failures across all batches
"""
tmpdir = Path(tempfile.mkdtemp(prefix="onyx_embeddings_"))
successful_chunk_ids: list[tuple[int, str]] = []
all_embedding_failures: list[ConnectorFailure] = []
for batch_idx, chunk_batch in enumerate(
batch_generator(chunks, MAX_CHUNKS_PER_DOC_BATCH)
):
logger.debug(f"Embedding batch {batch_idx}: {len(chunk_batch)} chunks")
chunks_with_embeddings, embedding_failures = embed_chunks_with_failure_handling(
chunks=chunk_batch,
embedder=embedder,
tenant_id=tenant_id,
request_id=request_id,
)
all_embedding_failures.extend(embedding_failures)
# Track which chunks succeeded by excluding failed doc IDs
failed_doc_ids = {
f.failed_document.document_id
for f in embedding_failures
if f.failed_document
}
successful_chunk_ids.extend(
(c.chunk_id, c.source_document.id)
for c in chunk_batch
if c.source_document.id not in failed_doc_ids
)
# Spill embeddings to disk
batch_file = tmpdir / f"batch_{batch_idx}.pkl"
with open(batch_file, "wb") as f:
pickle.dump(chunks_with_embeddings, f)
# Free memory
del chunks_with_embeddings
return ChunkEmbeddingResult(
embedding_path=tmpdir,
successful_chunk_ids=successful_chunk_ids,
connector_failures=all_embedding_failures,
)
class EmbedStream:
def __init__(self, tmpdir: Path) -> None:
self._tmpdir = tmpdir
def stream(self) -> Iterator[IndexChunk]:
for batch_file in sorted(
self._tmpdir.glob("batch_*.pkl"),
key=lambda p: int(p.stem.removeprefix("batch_")),
):
with open(batch_file, "rb") as f:
batch: list[IndexChunk] = pickle.load(f)
yield from batch
@contextmanager
def use_embed_stream(
tmpdir: Path,
) -> Generator[EmbedStream, None, None]:
"""Context manager that provides a factory for creating chunk iterators.
Each call to stream() returns a fresh generator over the embedded chunks
on disk, so the data can be iterated multiple times (e.g. once per
document_index). Files are cleaned up when the context manager exits.
Usage:
with use_embed_stream(embedding_path) as embed_stream:
for document_index in document_indices:
for chunk in embed_stream.stream():
...
"""
try:
yield EmbedStream(tmpdir)
finally:
for batch_file in tmpdir.glob("batch_*.pkl"):
batch_file.unlink(missing_ok=True)
tmpdir.rmdir()
def get_doc_ids_to_update(
documents: list[Document], db_docs: list[DBDocument]
) -> list[Document]:
@@ -761,29 +637,6 @@ def add_contextual_summaries(
return chunks
def _verify_indexing_completeness(
insertion_records: list[DocumentInsertionRecord],
write_failures: list[ConnectorFailure],
embedding_failed_doc_ids: set[str],
updatable_ids: list[str],
document_index_name: str,
) -> None:
"""Verify that every updatable document was either indexed or reported as failed."""
all_returned_doc_ids = (
{r.document_id for r in insertion_records}
| {f.failed_document.document_id for f in write_failures if f.failed_document}
| embedding_failed_doc_ids
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Returned IDs: {all_returned_doc_ids}. "
f"This should never happen. "
f"This occured for document index {document_index_name}"
)
@log_function_time(debug_only=True)
def index_doc_batch(
*,
@@ -819,7 +672,12 @@ def index_doc_batch(
filtered_documents = filter_fnc(document_batch)
context = adapter.prepare(filtered_documents, ignore_time_skip)
if not context:
return IndexingPipelineResult.empty(len(filtered_documents))
return IndexingPipelineResult(
new_docs=0,
total_docs=len(filtered_documents),
total_chunks=0,
failures=[],
)
# Convert documents to IndexingDocument objects with processed section
# logger.debug("Processing image sections")
@@ -858,98 +716,117 @@ def index_doc_batch(
)
logger.debug("Starting embedding")
embedding_result = embed_chunks_in_batches(
chunks=chunks,
embedder=embedder,
tenant_id=tenant_id,
request_id=request_id,
chunks_with_embeddings, embedding_failures = (
embed_chunks_with_failure_handling(
chunks=chunks,
embedder=embedder,
tenant_id=tenant_id,
request_id=request_id,
)
if chunks
else ([], [])
)
chunk_content_scores = [1.0] * len(chunks_with_embeddings)
updatable_ids = [doc.id for doc in context.updatable_docs]
updatable_chunk_data = [
UpdatableChunkData(
chunk_id=chunk_id,
document_id=document_id,
boost_score=1.0,
chunk_id=chunk.chunk_id,
document_id=chunk.source_document.id,
boost_score=score,
)
for chunk_id, document_id in embedding_result.successful_chunk_ids
for chunk, score in zip(chunks_with_embeddings, chunk_content_scores)
]
# Acquires a lock on the documents so that no other process can modify them
# NOTE: don't need to acquire till here, since this is when the actual race condition
# with Vespa can occur.
with (
adapter.lock_context(context.updatable_docs),
use_embed_stream(embedding_result.embedding_path) as embed_stream,
):
enricher = adapter.prepare_enrichment(
with adapter.lock_context(context.updatable_docs):
# we're concerned about race conditions where multiple simultaneous indexings might result
# in one set of metadata overwriting another one in vespa.
# we still write data here for the immediate and most likely correct sync, but
# to resolve this, an update of the last modified field at the end of this loop
# always triggers a final metadata sync via the celery queue
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=chunks_with_embeddings,
chunk_content_scores=chunk_content_scores,
tenant_id=tenant_id,
context=context,
tenant_id=tenant_id,
chunks=chunks,
)
index_batch_params = IndexBatchParams(
doc_id_to_previous_chunk_cnt=enricher.doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=enricher.doc_id_to_new_chunk_cnt,
tenant_id=tenant_id,
large_chunks_enabled=chunker.enable_large_chunks,
)
embedding_failed_doc_ids = {
f.failed_document.document_id
for f in embedding_result.connector_failures
if f.failed_document
}
short_descriptor_list = [chunk.to_short_descriptor() for chunk in result.chunks]
short_descriptor_log = str(short_descriptor_list)[:1024]
logger.debug(f"Indexing the following chunks: {short_descriptor_log}")
primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
for document_index in document_indices:
# A document will not be spread across different batches, so all the
# documents with chunks in this set, are fully represented by the chunks
# in this set
def _enriched_stream() -> Iterator[DocMetadataAwareIndexChunk]:
for chunk in embed_stream.stream():
yield enricher.enrich_chunk(chunk, 1.0)
insertion_records, write_failures = write_chunks_to_vector_db_with_backoff(
(
insertion_records,
vector_db_write_failures,
) = write_chunks_to_vector_db_with_backoff(
document_index=document_index,
chunks=_enriched_stream(),
index_batch_params=index_batch_params,
chunks=result.chunks,
index_batch_params=IndexBatchParams(
doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
tenant_id=tenant_id,
large_chunks_enabled=chunker.enable_large_chunks,
),
)
_verify_indexing_completeness(
insertion_records=insertion_records,
write_failures=write_failures,
embedding_failed_doc_ids=embedding_failed_doc_ids,
updatable_ids=updatable_ids,
document_index_name=document_index.__class__.__name__,
all_returned_doc_ids: set[str] = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
f"This occured for document index {document_index.__class__.__name__}"
)
# We treat the first document index we got as the primary one used
# for reporting the state of indexing.
if primary_doc_idx_insertion_records is None:
primary_doc_idx_insertion_records = insertion_records
if primary_doc_idx_vector_db_write_failures is None:
primary_doc_idx_vector_db_write_failures = write_failures
primary_doc_idx_vector_db_write_failures = vector_db_write_failures
adapter.post_index(
context=context,
updatable_chunk_data=updatable_chunk_data,
filtered_documents=filtered_documents,
enrichment=enricher,
result=result,
)
assert primary_doc_idx_insertion_records is not None
assert primary_doc_idx_vector_db_write_failures is not None
return IndexingPipelineResult(
new_docs=sum(
1 for r in primary_doc_idx_insertion_records if not r.already_existed
new_docs=len(
[r for r in primary_doc_idx_insertion_records if not r.already_existed]
),
total_docs=len(filtered_documents),
total_chunks=len(embedding_result.successful_chunk_ids),
failures=primary_doc_idx_vector_db_write_failures
+ embedding_result.connector_failures,
total_chunks=len(chunks_with_embeddings),
failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
)

View File

@@ -235,16 +235,12 @@ class UpdatableChunkData(BaseModel):
boost_score: float
class ChunkEnrichmentContext(Protocol):
"""Returned by prepare_enrichment. Holds pre-computed metadata lookups
and provides per-chunk enrichment."""
class BuildMetadataAwareChunksResult(BaseModel):
chunks: list[DocMetadataAwareIndexChunk]
doc_id_to_previous_chunk_cnt: dict[str, int]
doc_id_to_new_chunk_cnt: dict[str, int]
def enrich_chunk(
self, chunk: IndexChunk, score: float
) -> DocMetadataAwareIndexChunk: ...
user_file_id_to_raw_text: dict[str, str]
user_file_id_to_token_count: dict[str, int | None]
class IndexingBatchAdapter(Protocol):
@@ -258,24 +254,18 @@ class IndexingBatchAdapter(Protocol):
) -> Generator[TransactionalContext, None, None]:
"""Provide a transaction/row-lock context for critical updates."""
def prepare_enrichment(
def build_metadata_aware_chunks(
self,
context: "DocumentBatchPrepareContext",
chunks_with_embeddings: list[IndexChunk],
chunk_content_scores: list[float],
tenant_id: str,
chunks: list[DocAwareChunk],
) -> ChunkEnrichmentContext:
"""Prepare per-chunk enrichment data (access, document sets, boost, etc.).
Precondition: ``chunks`` have already been through the embedding step
(i.e. they are ``IndexChunk`` instances with populated embeddings,
passed here as the base ``DocAwareChunk`` type).
"""
...
context: "DocumentBatchPrepareContext",
) -> BuildMetadataAwareChunksResult: ...
def post_index(
self,
context: "DocumentBatchPrepareContext",
updatable_chunk_data: list[UpdatableChunkData],
filtered_documents: list[Document],
enrichment: ChunkEnrichmentContext,
result: BuildMetadataAwareChunksResult,
) -> None: ...

View File

@@ -62,6 +62,9 @@ def _hook_to_response(hook: Hook, creator_email: str | None = None) -> HookRespo
name=hook.name,
hook_point=hook.hook_point,
endpoint_url=hook.endpoint_url,
api_key_masked=(
hook.api_key.get_value(apply_mask=True) if hook.api_key else None
),
fail_strategy=hook.fail_strategy,
timeout_seconds=hook.timeout_seconds,
is_active=hook.is_active,

View File

@@ -17,6 +17,7 @@ from onyx.db.models import User
from onyx.db.notification import dismiss_all_notifications
from onyx.db.notification import get_notifications
from onyx.db.notification import update_notification_last_shown
from onyx.hooks.utils import HOOKS_AVAILABLE
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.features.build.utils import is_onyx_craft_enabled
@@ -80,6 +81,7 @@ def fetch_settings(
needs_reindexing=needs_reindexing,
onyx_craft_enabled=onyx_craft_enabled_for_user,
vector_db_enabled=not DISABLE_VECTOR_DB,
hooks_enabled=HOOKS_AVAILABLE,
version=onyx_version,
)

View File

@@ -104,5 +104,7 @@ class UserSettings(Settings):
# False when DISABLE_VECTOR_DB is set — connectors, RAG search, and
# document sets are unavailable.
vector_db_enabled: bool = True
# True when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
hooks_enabled: bool = False
# Application version, read from the ONYX_VERSION env var at startup.
version: str | None = None

View File

@@ -1,7 +1,7 @@
"""
External dependency unit tests for UserFileIndexingAdapter metadata writing.
Validates that prepare_enrichment produces DocMetadataAwareIndexChunk
Validates that build_metadata_aware_chunks produces DocMetadataAwareIndexChunk
objects with both `user_project` and `personas` fields populated correctly
based on actual DB associations.
@@ -127,7 +127,7 @@ def _make_index_chunk(user_file: UserFile) -> IndexChunk:
class TestAdapterWritesBothMetadataFields:
"""prepare_enrichment must populate user_project AND personas."""
"""build_metadata_aware_chunks must populate user_project AND personas."""
@patch(
"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
@@ -153,13 +153,15 @@ class TestAdapterWritesBothMetadataFields:
doc = chunk.source_document
context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})
enricher = adapter.prepare_enrichment(
context=context,
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
chunks=[chunk],
context=context,
)
aware_chunk = enricher.enrich_chunk(chunk, 1.0)
assert len(result.chunks) == 1
aware_chunk = result.chunks[0]
assert persona.id in aware_chunk.personas
assert aware_chunk.user_project == []
@@ -188,13 +190,15 @@ class TestAdapterWritesBothMetadataFields:
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
enricher = adapter.prepare_enrichment(
context=context,
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
chunks=[chunk],
context=context,
)
aware_chunk = enricher.enrich_chunk(chunk, 1.0)
assert len(result.chunks) == 1
aware_chunk = result.chunks[0]
assert project.id in aware_chunk.user_project
assert aware_chunk.personas == []
@@ -225,13 +229,14 @@ class TestAdapterWritesBothMetadataFields:
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
enricher = adapter.prepare_enrichment(
context=context,
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
chunks=[chunk],
context=context,
)
aware_chunk = enricher.enrich_chunk(chunk, 1.0)
aware_chunk = result.chunks[0]
assert persona.id in aware_chunk.personas
assert project.id in aware_chunk.user_project
@@ -256,13 +261,14 @@ class TestAdapterWritesBothMetadataFields:
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
enricher = adapter.prepare_enrichment(
context=context,
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
chunks=[chunk],
context=context,
)
aware_chunk = enricher.enrich_chunk(chunk, 1.0)
aware_chunk = result.chunks[0]
assert aware_chunk.personas == []
assert aware_chunk.user_project == []
@@ -294,11 +300,12 @@ class TestAdapterWritesBothMetadataFields:
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
enricher = adapter.prepare_enrichment(
context=context,
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
chunks=[chunk],
context=context,
)
aware_chunk = enricher.enrich_chunk(chunk, 1.0)
aware_chunk = result.chunks[0]
assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}

View File

@@ -0,0 +1,47 @@
from sqlalchemy import inspect
from sqlalchemy.orm import Session
from onyx.db.chat import create_chat_session
from onyx.db.chat import get_chat_session_by_id
from onyx.db.models import Persona
from onyx.db.models import UserProject
from tests.external_dependency_unit.conftest import create_test_user
def test_eager_load_persona_loads_relationships(db_session: Session) -> None:
"""Verify that eager_load_persona pre-loads persona, its collections, and project."""
user = create_test_user(db_session, "eager-load")
persona = Persona(name="eager-load-test", description="test")
project = UserProject(name="eager-load-project", user_id=user.id)
db_session.add_all([persona, project])
db_session.flush()
chat_session = create_chat_session(
db_session=db_session,
description="test",
user_id=None,
persona_id=persona.id,
project_id=project.id,
)
loaded = get_chat_session_by_id(
chat_session_id=chat_session.id,
user_id=None,
db_session=db_session,
eager_load_persona=True,
)
try:
tmp = inspect(loaded)
assert tmp is not None
unloaded = tmp.unloaded
assert "persona" not in unloaded
assert "project" not in unloaded
tmp = inspect(loaded.persona)
assert tmp is not None
persona_unloaded = tmp.unloaded
assert "tools" not in persona_unloaded
assert "user_files" not in persona_unloaded
finally:
db_session.rollback()

View File

@@ -6,7 +6,6 @@ These tests assume Vespa and OpenSearch are running.
import time
import uuid
from collections.abc import Generator
from collections.abc import Iterator
import httpx
import pytest
@@ -22,7 +21,6 @@ from onyx.document_index.opensearch.opensearch_document_index import (
)
from onyx.document_index.vespa.index import VespaIndex
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import DocMetadataAwareIndexChunk
from tests.external_dependency_unit.constants import TEST_TENANT_ID
from tests.external_dependency_unit.document_index.conftest import EMBEDDING_DIM
from tests.external_dependency_unit.document_index.conftest import make_chunk
@@ -203,25 +201,3 @@ class TestDocumentIndexNew:
assert len(result_map) == 2
assert result_map[existing_doc] is True
assert result_map[new_doc] is False
def test_index_accepts_generator(
self,
document_indices: list[DocumentIndexNew],
tenant_context: None, # noqa: ARG002
) -> None:
"""index() accepts a generator (any iterable), not just a list."""
for document_index in document_indices:
doc_id = f"test_gen_{uuid.uuid4().hex[:8]}"
metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[3])
def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
for i in range(3):
yield make_chunk(doc_id, chunk_id=i)
results = document_index.index(
chunks=chunk_gen(), indexing_metadata=metadata
)
assert len(results) == 1
assert results[0].document_id == doc_id
assert results[0].already_existed is False

View File

@@ -5,7 +5,6 @@ These tests assume Vespa and OpenSearch are running.
import time
from collections.abc import Generator
from collections.abc import Iterator
import pytest
@@ -167,29 +166,3 @@ class TestDocumentIndexOld:
batch_retrieval=True,
)
assert len(inference_chunks) == 0
def test_index_accepts_generator(
self,
document_indices: list[DocumentIndex],
tenant_context: None, # noqa: ARG002
) -> None:
"""index() accepts a generator (any iterable), not just a list."""
for document_index in document_indices:
def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
for i in range(3):
yield make_chunk("test_doc_gen", chunk_id=i)
index_batch_params = IndexBatchParams(
doc_id_to_previous_chunk_cnt={"test_doc_gen": 0},
doc_id_to_new_chunk_cnt={"test_doc_gen": 3},
tenant_id=get_current_tenant_id(),
large_chunks_enabled=False,
)
results = document_index.index(chunk_gen(), index_batch_params)
assert len(results) == 1
record = results.pop()
assert record.document_id == "test_doc_gen"
assert record.already_existed is False

View File

@@ -143,8 +143,8 @@ def use_mock_search_pipeline(
db_session: Session | None = None, # noqa: ARG001
auto_detect_filters: bool = False, # noqa: ARG001
llm: LLM | None = None, # noqa: ARG001
project_id: int | None = None, # noqa: ARG001
persona_id: int | None = None, # noqa: ARG001
project_id_filter: int | None = None, # noqa: ARG001
persona_id_filter: int | None = None, # noqa: ARG001
# Pre-fetched data (used by SearchTool to avoid DB access in parallel calls)
acl_filters: list[str] | None = None, # noqa: ARG001
embedding_model: EmbeddingModel | None = None, # noqa: ARG001

View File

@@ -0,0 +1,53 @@
"""Tests for user group rename DB operation."""
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from ee.onyx.db.user_group import rename_user_group
from onyx.db.models import UserGroup
class TestRenameUserGroup:
"""Tests for rename_user_group function."""
@patch("ee.onyx.db.user_group.DISABLE_VECTOR_DB", False)
@patch(
"ee.onyx.db.user_group._mark_user_group__cc_pair_relationships_outdated__no_commit"
)
def test_rename_succeeds_and_triggers_sync(
self, mock_mark_outdated: MagicMock
) -> None:
mock_session = MagicMock()
mock_group = MagicMock(spec=UserGroup)
mock_group.name = "Old Name"
mock_group.is_up_to_date = True
mock_session.scalar.return_value = mock_group
result = rename_user_group(mock_session, user_group_id=1, new_name="New Name")
assert result.name == "New Name"
assert result.is_up_to_date is False
mock_mark_outdated.assert_called_once()
mock_session.commit.assert_called_once()
def test_rename_group_not_found(self) -> None:
mock_session = MagicMock()
mock_session.scalar.return_value = None
with pytest.raises(ValueError, match="not found"):
rename_user_group(mock_session, user_group_id=999, new_name="New Name")
mock_session.commit.assert_not_called()
def test_rename_group_syncing_raises(self) -> None:
mock_session = MagicMock()
mock_group = MagicMock(spec=UserGroup)
mock_group.is_up_to_date = False
mock_session.scalar.return_value = mock_group
with pytest.raises(ValueError, match="currently syncing"):
rename_user_group(mock_session, user_group_id=1, new_name="New Name")
mock_session.commit.assert_not_called()

View File

@@ -0,0 +1,216 @@
"""
Unit tests for the check_available_tenants task.
Tests verify:
- Provisioning loop calls pre_provision_tenant the correct number of times
- Batch size is capped at _MAX_TENANTS_PER_RUN
- A failure in one provisioning call does not stop subsequent calls
- No provisioning happens when pool is already full
- TARGET_AVAILABLE_TENANTS is respected
"""
from unittest.mock import MagicMock
import pytest
from ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (
_MAX_TENANTS_PER_RUN,
)
from ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (
check_available_tenants,
)
# Access the underlying function directly, bypassing Celery's task wrapper
# which injects `self` as the first argument when bind=True.
_check_available_tenants = check_available_tenants.run
@pytest.fixture()
def _enable_multi_tenant(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.MULTI_TENANT",
True,
)
@pytest.fixture()
def mock_redis(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
mock_lock = MagicMock()
mock_lock.acquire.return_value = True
mock_client = MagicMock()
mock_client.lock.return_value = mock_lock
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.get_redis_client",
lambda tenant_id: mock_client, # noqa: ARG005
)
return mock_client
@pytest.fixture()
def mock_pre_provision(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
mock = MagicMock(return_value=True)
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.pre_provision_tenant",
mock,
)
return mock
def _mock_available_count(monkeypatch: pytest.MonkeyPatch, count: int) -> None:
"""Set up the DB session mock to return a specific available tenant count."""
mock_session = MagicMock()
mock_session.__enter__ = MagicMock(return_value=mock_session)
mock_session.__exit__ = MagicMock(return_value=False)
mock_session.query.return_value.count.return_value = count
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.get_session_with_shared_schema",
lambda: mock_session,
)
@pytest.mark.usefixtures("_enable_multi_tenant", "mock_redis")
class TestCheckAvailableTenants:
def test_provisions_all_needed_tenants(
self,
monkeypatch: pytest.MonkeyPatch,
mock_pre_provision: MagicMock,
) -> None:
"""When pool has 2 and target is 5, should provision 3."""
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
5,
)
_mock_available_count(monkeypatch, 2)
_check_available_tenants()
assert mock_pre_provision.call_count == 3
def test_batch_capped_at_max_per_run(
self,
monkeypatch: pytest.MonkeyPatch,
mock_pre_provision: MagicMock,
) -> None:
"""When pool needs more than _MAX_TENANTS_PER_RUN, cap the batch."""
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
20,
)
_mock_available_count(monkeypatch, 0)
_check_available_tenants()
assert mock_pre_provision.call_count == _MAX_TENANTS_PER_RUN
def test_no_provisioning_when_pool_full(
self,
monkeypatch: pytest.MonkeyPatch,
mock_pre_provision: MagicMock,
) -> None:
"""When pool already meets target, should not provision anything."""
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
5,
)
_mock_available_count(monkeypatch, 5)
_check_available_tenants()
assert mock_pre_provision.call_count == 0
def test_no_provisioning_when_pool_exceeds_target(
self,
monkeypatch: pytest.MonkeyPatch,
mock_pre_provision: MagicMock,
) -> None:
"""When pool exceeds target, should not provision anything."""
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
5,
)
_mock_available_count(monkeypatch, 8)
_check_available_tenants()
assert mock_pre_provision.call_count == 0
def test_failure_does_not_stop_remaining(
self,
monkeypatch: pytest.MonkeyPatch,
mock_pre_provision: MagicMock,
) -> None:
"""If one provisioning fails, the rest should still be attempted."""
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
5,
)
_mock_available_count(monkeypatch, 0)
# Fail on calls 2 and 4 (1-indexed)
call_count = 0
def side_effect() -> bool:
nonlocal call_count
call_count += 1
if call_count in (2, 4):
raise RuntimeError("provisioning failed")
return True
mock_pre_provision.side_effect = side_effect
_check_available_tenants()
# All 5 should be attempted despite 2 failures
assert mock_pre_provision.call_count == 5
def test_skips_when_not_multi_tenant(
self,
monkeypatch: pytest.MonkeyPatch,
mock_pre_provision: MagicMock,
) -> None:
"""Should not provision when multi-tenancy is disabled."""
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.MULTI_TENANT",
False,
)
_check_available_tenants()
assert mock_pre_provision.call_count == 0
def test_skips_when_lock_not_acquired(
self,
mock_redis: MagicMock,
mock_pre_provision: MagicMock,
) -> None:
"""Should skip when another instance holds the lock."""
mock_redis.lock.return_value.acquire.return_value = False
_check_available_tenants()
assert mock_pre_provision.call_count == 0
def test_lock_release_failure_does_not_raise(
self,
monkeypatch: pytest.MonkeyPatch,
mock_redis: MagicMock,
mock_pre_provision: MagicMock,
) -> None:
"""LockNotOwnedError on release should be caught, not propagated."""
from redis.exceptions import LockNotOwnedError
monkeypatch.setattr(
"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS",
5,
)
_mock_available_count(monkeypatch, 4)
mock_redis.lock.return_value.release.side_effect = LockNotOwnedError("expired")
# Should not raise
_check_available_tenants()
assert mock_pre_provision.call_count == 1

View File

@@ -1,4 +1,12 @@
import pytest
from onyx.chat.process_message import _resolve_query_processing_hook_result
from onyx.chat.process_message import remove_answer_citations
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.query_processing import QueryProcessingResponse
def test_remove_answer_citations_strips_http_markdown_citation() -> None:
@@ -32,3 +40,81 @@ def test_remove_answer_citations_preserves_non_citation_markdown_links() -> None
remove_answer_citations(answer)
== "See [reference](https://example.com/Function_(mathematics)) for context."
)
# ---------------------------------------------------------------------------
# Query Processing hook response handling (_resolve_query_processing_hook_result)
# ---------------------------------------------------------------------------
def test_hook_skipped_leaves_message_text_unchanged() -> None:
result = _resolve_query_processing_hook_result(HookSkipped(), "original query")
assert result == "original query"
def test_hook_soft_failed_leaves_message_text_unchanged() -> None:
result = _resolve_query_processing_hook_result(HookSoftFailed(), "original query")
assert result == "original query"
def test_null_query_raises_query_rejected() -> None:
with pytest.raises(OnyxError) as exc_info:
_resolve_query_processing_hook_result(
QueryProcessingResponse(query=None), "original query"
)
assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED
def test_empty_string_query_raises_query_rejected() -> None:
"""Empty string is falsy — must be treated as rejection, same as None."""
with pytest.raises(OnyxError) as exc_info:
_resolve_query_processing_hook_result(
QueryProcessingResponse(query=""), "original query"
)
assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED
def test_whitespace_only_query_raises_query_rejected() -> None:
"""Whitespace-only string is truthy but meaningless — must be treated as rejection."""
with pytest.raises(OnyxError) as exc_info:
_resolve_query_processing_hook_result(
QueryProcessingResponse(query=" "), "original query"
)
assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED
def test_absent_query_field_raises_query_rejected() -> None:
"""query defaults to None when not provided."""
with pytest.raises(OnyxError) as exc_info:
_resolve_query_processing_hook_result(
QueryProcessingResponse(), "original query"
)
assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED
def test_rejection_message_surfaced_in_error_when_provided() -> None:
with pytest.raises(OnyxError) as exc_info:
_resolve_query_processing_hook_result(
QueryProcessingResponse(
query=None, rejection_message="Queries about X are not allowed."
),
"original query",
)
assert "Queries about X are not allowed." in str(exc_info.value)
def test_fallback_rejection_message_when_none() -> None:
"""No rejection_message → generic fallback used in OnyxError detail."""
with pytest.raises(OnyxError) as exc_info:
_resolve_query_processing_hook_result(
QueryProcessingResponse(query=None, rejection_message=None),
"original query",
)
assert "No rejection reason was provided." in str(exc_info.value)
def test_nonempty_query_rewrites_message_text() -> None:
result = _resolve_query_processing_hook_result(
QueryProcessingResponse(query="rewritten query"), "original query"
)
assert result == "rewritten query"

View File

@@ -1,226 +0,0 @@
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.access.models import DocumentAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchDocumentIndex,
)
from onyx.indexing.models import DocMetadataAwareIndexChunk
def _make_chunk(
doc_id: str,
chunk_id: int,
) -> DocMetadataAwareIndexChunk:
"""Creates a minimal DocMetadataAwareIndexChunk for testing."""
doc = Document(
id=doc_id,
sections=[TextSection(text="test", link="http://test.com")],
source=DocumentSource.FILE,
semantic_identifier="test_doc",
metadata={},
)
access = DocumentAccess.build(
user_emails=[],
user_groups=[],
external_user_emails=[],
external_user_group_ids=[],
is_public=True,
)
return DocMetadataAwareIndexChunk(
chunk_id=chunk_id,
blurb="test",
content="test content",
source_links={0: "http://test.com"},
image_file_id=None,
section_continuation=False,
source_document=doc,
title_prefix="",
metadata_suffix_semantic="",
metadata_suffix_keyword="",
mini_chunk_texts=None,
large_chunk_id=None,
doc_summary="",
chunk_context="",
contextual_rag_reserved_tokens=0,
embeddings={"full_embedding": [0.1] * 10, "mini_chunk_embeddings": []},
title_embedding=[0.1] * 10,
tenant_id="test_tenant",
access=access,
document_sets=set(),
user_project=[],
personas=[],
boost=0,
aggregated_chunk_boost_factor=1.0,
ancestor_hierarchy_node_ids=[],
)
def _make_index() -> OpenSearchDocumentIndex:
"""Creates an OpenSearchDocumentIndex with a mocked client."""
mock_client = MagicMock()
mock_client.bulk_index_documents = MagicMock()
tenant_state = TenantState(tenant_id="test_tenant", multitenant=False)
index = OpenSearchDocumentIndex.__new__(OpenSearchDocumentIndex)
index._index_name = "test_index"
index._client = mock_client
index._tenant_state = tenant_state
return index
def _make_metadata(doc_id: str, chunk_count: int) -> IndexingMetadata:
return IndexingMetadata(
doc_id_to_chunk_cnt_diff={
doc_id: IndexingMetadata.ChunkCounts(
old_chunk_cnt=0,
new_chunk_cnt=chunk_count,
),
},
)
@patch(
"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
100,
)
def test_single_doc_under_batch_limit_flushes_once() -> None:
"""A document with fewer chunks than MAX_CHUNKS_PER_DOC_BATCH should flush once."""
index = _make_index()
doc_id = "doc_1"
num_chunks = 50
chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
metadata = _make_metadata(doc_id, num_chunks)
with patch.object(index, "delete", return_value=0):
index.index(chunks, metadata)
assert index._client.bulk_index_documents.call_count == 1
batch_arg = index._client.bulk_index_documents.call_args_list[0]
assert len(batch_arg.kwargs["documents"]) == num_chunks
@patch(
"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
100,
)
def test_single_doc_over_batch_limit_flushes_multiple_times() -> None:
"""A document with more chunks than MAX_CHUNKS_PER_DOC_BATCH should flush multiple times."""
index = _make_index()
doc_id = "doc_1"
num_chunks = 250
chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
metadata = _make_metadata(doc_id, num_chunks)
with patch.object(index, "delete", return_value=0):
index.index(chunks, metadata)
# 250 chunks / 100 per batch = 3 flushes (100 + 100 + 50)
assert index._client.bulk_index_documents.call_count == 3
batch_sizes = [
len(call.kwargs["documents"])
for call in index._client.bulk_index_documents.call_args_list
]
assert batch_sizes == [100, 100, 50]
@patch(
"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
100,
)
def test_single_doc_exactly_at_batch_limit() -> None:
"""A document with exactly MAX_CHUNKS_PER_DOC_BATCH chunks should flush once
(the flush happens on the next chunk, not at the boundary)."""
index = _make_index()
doc_id = "doc_1"
num_chunks = 100
chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
metadata = _make_metadata(doc_id, num_chunks)
with patch.object(index, "delete", return_value=0):
index.index(chunks, metadata)
# 100 chunks hit the >= check on chunk 101 which doesn't exist,
# so final flush handles all 100
# Actually: the elif fires when len(current_chunks) >= 100, which happens
# when current_chunks has 100 items and the 101st chunk arrives.
# With exactly 100 chunks, the 100th chunk makes len == 99, then appended -> 100.
# No 101st chunk arrives, so the final flush handles all 100.
assert index._client.bulk_index_documents.call_count == 1
@patch(
"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
100,
)
def test_single_doc_one_over_batch_limit() -> None:
"""101 chunks for one doc: first 100 flushed when the 101st arrives, then
the 101st is flushed at the end."""
index = _make_index()
doc_id = "doc_1"
num_chunks = 101
chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
metadata = _make_metadata(doc_id, num_chunks)
with patch.object(index, "delete", return_value=0):
index.index(chunks, metadata)
assert index._client.bulk_index_documents.call_count == 2
batch_sizes = [
len(call.kwargs["documents"])
for call in index._client.bulk_index_documents.call_args_list
]
assert batch_sizes == [100, 1]
@patch(
"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
100,
)
def test_multiple_docs_each_under_limit_flush_per_doc() -> None:
"""Multiple documents each under the batch limit should flush once per document."""
index = _make_index()
chunks = []
for doc_idx in range(3):
doc_id = f"doc_{doc_idx}"
for chunk_idx in range(50):
chunks.append(_make_chunk(doc_id, chunk_idx))
metadata = IndexingMetadata(
doc_id_to_chunk_cnt_diff={
f"doc_{i}": IndexingMetadata.ChunkCounts(old_chunk_cnt=0, new_chunk_cnt=50)
for i in range(3)
},
)
with patch.object(index, "delete", return_value=0):
index.index(chunks, metadata)
# 3 documents = 3 flushes (one per doc boundary + final)
assert index._client.bulk_index_documents.call_count == 3
@patch(
"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
100,
)
def test_delete_called_once_per_document() -> None:
"""Even with multiple flushes for a single document, delete should only be
called once per document."""
index = _make_index()
doc_id = "doc_1"
num_chunks = 250
chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
metadata = _make_metadata(doc_id, num_chunks)
with patch.object(index, "delete", return_value=0) as mock_delete:
index.index(chunks, metadata)
mock_delete.assert_called_once_with(doc_id, None)

View File

@@ -1,152 +0,0 @@
"""Unit tests for VespaDocumentIndex.index().
These tests mock all external I/O (HTTP calls, thread pools) and verify
the streaming logic, ID cleaning/mapping, and DocumentInsertionRecord
construction.
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.access.models import DocumentAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
def _make_chunk(
doc_id: str,
chunk_id: int = 0,
content: str = "test content",
) -> DocMetadataAwareIndexChunk:
doc = Document(
id=doc_id,
semantic_identifier="test_doc",
sections=[TextSection(text=content, link=None)],
source=DocumentSource.NOT_APPLICABLE,
metadata={},
)
index_chunk = IndexChunk(
chunk_id=chunk_id,
blurb=content[:50],
content=content,
source_links=None,
image_file_id=None,
section_continuation=False,
source_document=doc,
title_prefix="",
metadata_suffix_semantic="",
metadata_suffix_keyword="",
contextual_rag_reserved_tokens=0,
doc_summary="",
chunk_context="",
mini_chunk_texts=None,
large_chunk_id=None,
embeddings=ChunkEmbedding(
full_embedding=[0.1] * 10,
mini_chunk_embeddings=[],
),
title_embedding=None,
)
access = DocumentAccess.build(
user_emails=[],
user_groups=[],
external_user_emails=[],
external_user_group_ids=[],
is_public=True,
)
return DocMetadataAwareIndexChunk.from_index_chunk(
index_chunk=index_chunk,
access=access,
document_sets=set(),
user_project=[],
personas=[],
boost=0,
aggregated_chunk_boost_factor=1.0,
tenant_id="test_tenant",
)
def _make_indexing_metadata(
doc_ids: list[str],
old_counts: list[int],
new_counts: list[int],
) -> IndexingMetadata:
return IndexingMetadata(
doc_id_to_chunk_cnt_diff={
doc_id: IndexingMetadata.ChunkCounts(
old_chunk_cnt=old,
new_chunk_cnt=new,
)
for doc_id, old, new in zip(doc_ids, old_counts, new_counts)
}
)
def _stub_enrich(
doc_id: str,
old_chunk_cnt: int,
) -> EnrichedDocumentIndexingInfo:
"""Build an EnrichedDocumentIndexingInfo that says 'no chunks to delete'
when old_chunk_cnt == 0, or 'has existing chunks' otherwise."""
return EnrichedDocumentIndexingInfo(
doc_id=doc_id,
chunk_start_index=0,
old_version=False,
chunk_end_index=old_chunk_cnt,
)
@patch("onyx.document_index.vespa.vespa_document_index.batch_index_vespa_chunks")
@patch("onyx.document_index.vespa.vespa_document_index.delete_vespa_chunks")
@patch(
"onyx.document_index.vespa.vespa_document_index.get_document_chunk_ids",
return_value=[],
)
@patch("onyx.document_index.vespa.vespa_document_index._enrich_basic_chunk_info")
@patch(
"onyx.document_index.vespa.vespa_document_index.BATCH_SIZE",
3,
)
def test_index_respects_batch_size(
mock_enrich: MagicMock,
mock_get_chunk_ids: MagicMock, # noqa: ARG001
mock_delete: MagicMock, # noqa: ARG001
mock_batch_index: MagicMock,
) -> None:
"""When chunks exceed BATCH_SIZE, batch_index_vespa_chunks is called
multiple times with correctly sized batches."""
mock_enrich.return_value = _stub_enrich("doc1", old_chunk_cnt=0)
index = VespaDocumentIndex(
index_name="test_index",
tenant_state=TenantState(tenant_id="test_tenant", multitenant=False),
large_chunks_enabled=False,
httpx_client=MagicMock(),
)
chunks = [_make_chunk("doc1", chunk_id=i) for i in range(7)]
metadata = _make_indexing_metadata(["doc1"], old_counts=[0], new_counts=[7])
results = index.index(chunks=chunks, indexing_metadata=metadata)
assert len(results) == 1
# With BATCH_SIZE=3 and 7 chunks: batches of 3, 3, 1
assert mock_batch_index.call_count == 3
batch_sizes = [len(c.kwargs["chunks"]) for c in mock_batch_index.call_args_list]
assert batch_sizes == [3, 3, 1]
# Verify all chunks are accounted for and in order
all_indexed = [
chunk for c in mock_batch_index.call_args_list for chunk in c.kwargs["chunks"]
]
assert len(all_indexed) == 7
assert [c.chunk_id for c in all_indexed] == list(range(7))

View File

@@ -7,6 +7,7 @@ from unittest.mock import patch
import httpx
import pytest
from pydantic import BaseModel
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
@@ -15,13 +16,15 @@ from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.executor import execute_hook
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
from onyx.hooks.points.query_processing import QueryProcessingResponse
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_PAYLOAD: dict[str, Any] = {"query": "test", "user_email": "u@example.com"}
_RESPONSE_PAYLOAD: dict[str, Any] = {"rewritten_query": "better test"}
# A valid QueryProcessingResponse payload — used by success-path tests.
_RESPONSE_PAYLOAD: dict[str, Any] = {"query": "better test"}
def _make_hook(
@@ -33,6 +36,7 @@ def _make_hook(
fail_strategy: HookFailStrategy = HookFailStrategy.SOFT,
hook_id: int = 1,
is_reachable: bool | None = None,
hook_point: HookPoint = HookPoint.QUERY_PROCESSING,
) -> MagicMock:
hook = MagicMock()
hook.is_active = is_active
@@ -42,6 +46,7 @@ def _make_hook(
hook.id = hook_id
hook.fail_strategy = fail_strategy
hook.is_reachable = is_reachable
hook.hook_point = hook_point
return hook
@@ -140,6 +145,7 @@ def test_early_exit_returns_skipped_with_no_db_writes(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert isinstance(result, HookSkipped)
@@ -152,7 +158,9 @@ def test_early_exit_returns_skipped_with_no_db_writes(
# ---------------------------------------------------------------------------
def test_success_returns_payload_and_sets_reachable(db_session: MagicMock) -> None:
def test_success_returns_validated_model_and_sets_reachable(
db_session: MagicMock,
) -> None:
hook = _make_hook()
with (
@@ -171,9 +179,11 @@ def test_success_returns_payload_and_sets_reachable(db_session: MagicMock) -> No
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert result == _RESPONSE_PAYLOAD
assert isinstance(result, QueryProcessingResponse)
assert result.query == _RESPONSE_PAYLOAD["query"]
_, update_kwargs = mock_update.call_args
assert update_kwargs["is_reachable"] is True
mock_log.assert_not_called()
@@ -200,9 +210,11 @@ def test_success_skips_reachable_write_when_already_true(db_session: MagicMock)
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert result == _RESPONSE_PAYLOAD
assert isinstance(result, QueryProcessingResponse)
assert result.query == _RESPONSE_PAYLOAD["query"]
mock_update.assert_not_called()
@@ -230,6 +242,7 @@ def test_non_dict_json_response_is_a_failure(db_session: MagicMock) -> None:
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert isinstance(result, HookSoftFailed)
@@ -265,6 +278,7 @@ def test_json_decode_failure_is_a_failure(db_session: MagicMock) -> None:
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert isinstance(result, HookSoftFailed)
@@ -388,6 +402,7 @@ def test_http_failure_paths(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
else:
@@ -395,6 +410,7 @@ def test_http_failure_paths(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert isinstance(result, expected_type)
@@ -442,6 +458,7 @@ def test_authorization_header(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
_, call_kwargs = mock_client.post.call_args
@@ -457,16 +474,16 @@ def test_authorization_header(
@pytest.mark.parametrize(
"http_exception,expected_result",
"http_exception,expect_onyx_error",
[
pytest.param(None, _RESPONSE_PAYLOAD, id="success_path"),
pytest.param(httpx.ConnectError("refused"), OnyxError, id="hard_fail_path"),
pytest.param(None, False, id="success_path"),
pytest.param(httpx.ConnectError("refused"), True, id="hard_fail_path"),
],
)
def test_persist_session_failure_is_swallowed(
db_session: MagicMock,
http_exception: Exception | None,
expected_result: Any,
expect_onyx_error: bool,
) -> None:
"""DB session failure in _persist_result must not mask the real return value or OnyxError."""
hook = _make_hook(fail_strategy=HookFailStrategy.HARD)
@@ -489,12 +506,13 @@ def test_persist_session_failure_is_swallowed(
side_effect=http_exception,
)
if expected_result is OnyxError:
if expect_onyx_error:
with pytest.raises(OnyxError) as exc_info:
execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
else:
@@ -502,8 +520,131 @@ def test_persist_session_failure_is_swallowed(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert result == expected_result
assert isinstance(result, QueryProcessingResponse)
assert result.query == _RESPONSE_PAYLOAD["query"]
# ---------------------------------------------------------------------------
# Response model validation
# ---------------------------------------------------------------------------
class _StrictResponse(BaseModel):
"""Strict model used to reliably trigger a ValidationError in tests."""
required_field: str # no default → missing key raises ValidationError
@pytest.mark.parametrize(
"fail_strategy,expected_type",
[
pytest.param(
HookFailStrategy.SOFT, HookSoftFailed, id="validation_failure_soft"
),
pytest.param(HookFailStrategy.HARD, OnyxError, id="validation_failure_hard"),
],
)
def test_response_validation_failure_respects_fail_strategy(
db_session: MagicMock,
fail_strategy: HookFailStrategy,
expected_type: type,
) -> None:
"""A response that fails response_model validation is treated like any other
hook failure: logged, is_reachable left unchanged, fail_strategy respected."""
hook = _make_hook(fail_strategy=fail_strategy)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
patch("httpx.Client") as mock_client_cls,
):
# Response payload is missing required_field → ValidationError
_setup_client(mock_client_cls, response=_make_response(json_return={}))
if expected_type is OnyxError:
with pytest.raises(OnyxError) as exc_info:
execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=_StrictResponse,
)
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
else:
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=_StrictResponse,
)
assert isinstance(result, HookSoftFailed)
# is_reachable must not be updated — server responded correctly
mock_update.assert_not_called()
# failure must be logged
mock_log.assert_called_once()
_, log_kwargs = mock_log.call_args
assert log_kwargs["is_success"] is False
assert "validation" in (log_kwargs["error_message"] or "").lower()
# ---------------------------------------------------------------------------
# Outer soft-fail guard in execute_hook
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"fail_strategy,expected_type",
[
pytest.param(HookFailStrategy.SOFT, HookSoftFailed, id="unexpected_exc_soft"),
pytest.param(HookFailStrategy.HARD, ValueError, id="unexpected_exc_hard"),
],
)
def test_unexpected_exception_in_inner_respects_fail_strategy(
db_session: MagicMock,
fail_strategy: HookFailStrategy,
expected_type: type,
) -> None:
"""An unexpected exception raised by _execute_hook_inner (not an OnyxError from
HARD fail — e.g. a bug or an assertion error) must be swallowed and return
HookSoftFailed for SOFT strategy, or re-raised for HARD strategy."""
hook = _make_hook(fail_strategy=fail_strategy)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch(
"onyx.hooks.executor._execute_hook_inner",
side_effect=ValueError("unexpected bug"),
),
):
if expected_type is HookSoftFailed:
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert isinstance(result, HookSoftFailed)
else:
with pytest.raises(ValueError, match="unexpected bug"):
execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
def test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> None:
@@ -535,6 +676,7 @@ def test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> Non
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
response_type=QueryProcessingResponse,
)
assert isinstance(result, HookSoftFailed)

View File

@@ -116,7 +116,7 @@ def _run_adapter_build(
project_ids_map: dict[str, list[int]],
persona_ids_map: dict[str, list[int]],
) -> list[DocMetadataAwareIndexChunk]:
"""Helper that runs UserFileIndexingAdapter.prepare_enrichment + enrich_chunk
"""Helper that runs UserFileIndexingAdapter.build_metadata_aware_chunks
with all external dependencies mocked."""
from onyx.indexing.adapters.user_file_indexing_adapter import (
UserFileIndexingAdapter,
@@ -155,16 +155,18 @@ def _run_adapter_build(
side_effect=Exception("no LLM in tests"),
),
):
enricher = adapter.prepare_enrichment(
context=context,
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id="test_tenant",
chunks=[chunk],
context=context,
)
return [enricher.enrich_chunk(chunk, 1.0)]
return result.chunks
def test_prepare_enrichment_includes_persona_ids() -> None:
"""UserFileIndexingAdapter.prepare_enrichment writes persona IDs
def test_build_metadata_aware_chunks_includes_persona_ids() -> None:
"""UserFileIndexingAdapter.build_metadata_aware_chunks writes persona IDs
fetched from the DB into each chunk's metadata."""
file_id = str(uuid4())
persona_ids = [5, 12]
@@ -181,7 +183,7 @@ def test_prepare_enrichment_includes_persona_ids() -> None:
assert chunks[0].user_project == project_ids
def test_prepare_enrichment_missing_file_defaults_to_empty() -> None:
def test_build_metadata_aware_chunks_missing_file_defaults_to_empty() -> None:
"""When a file has no persona or project associations in the DB, the
adapter should default to empty lists (not KeyError or None)."""
file_id = str(uuid4())

View File

@@ -23,6 +23,12 @@ upstream web_server {
# Conditionally include MCP upstream configuration
include /etc/nginx/conf.d/mcp_upstream.conf.inc;
# WebSocket support: only set Connection "upgrade" for actual upgrade requests
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 80 default_server;
@@ -46,8 +52,10 @@ server {
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
# need to use 1.1 to support chunked transfers
# need to use 1.1 to support chunked transfers and WebSocket
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_buffering off;
# timeout settings

View File

@@ -23,6 +23,12 @@ upstream web_server {
# Conditionally include MCP upstream configuration
include /etc/nginx/conf.d/mcp_upstream.conf.inc;
# WebSocket support: only set Connection "upgrade" for actual upgrade requests
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 80 default_server;
@@ -47,8 +53,10 @@ server {
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
# need to use 1.1 to support chunked transfers
# need to use 1.1 to support chunked transfers and WebSocket
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_buffering off;
# we don't want nginx trying to do something clever with
@@ -92,6 +100,8 @@ server {
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_buffering off;
# we don't want nginx trying to do something clever with
# redirects, we set the Host: header above already.

View File

@@ -23,6 +23,12 @@ upstream web_server {
# Conditionally include MCP upstream configuration
include /etc/nginx/conf.d/mcp_upstream.conf.inc;
# WebSocket support: only set Connection "upgrade" for actual upgrade requests
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 80 default_server;
@@ -47,8 +53,10 @@ server {
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
# need to use 1.1 to support chunked transfers
# need to use 1.1 to support chunked transfers and WebSocket
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_buffering off;
# timeout settings
@@ -106,6 +114,8 @@ server {
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_buffering off;
# timeout settings

View File

@@ -28,6 +28,12 @@ data:
}
{{- end }}
# WebSocket support: only set Connection "upgrade" for actual upgrade requests
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server.conf: |
server {
listen 1024;
@@ -65,6 +71,8 @@ data:
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_buffering off;
proxy_redirect off;
# timeout settings

View File

@@ -10,7 +10,7 @@ data:
#!/usr/bin/env sh
set -eu
HOST="${POSTGRES_HOST:-localhost}"
HOST="${PGINTO_HOST:-${POSTGRES_HOST:-localhost}}"
PORT="${POSTGRES_PORT:-5432}"
USER="${POSTGRES_USER:-postgres}"
DB="${POSTGRES_DB:-postgres}"

View File

@@ -103,7 +103,7 @@ opensearch:
- name: OPENSEARCH_INITIAL_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: onyx-opensearch # Must match auth.opensearch.secretName.
name: onyx-opensearch # Must match auth.opensearch.secretName or auth.opensearch.existingSecret if defined.
key: opensearch_admin_password # Must match auth.opensearch.secretKeys value.
resources:
@@ -282,7 +282,7 @@ nginx:
# The ingress-nginx subchart doesn't auto-detect our custom ConfigMap changes.
# Workaround: Helm upgrade will restart if the following annotation value changes.
podAnnotations:
onyx.app/nginx-config-version: "1"
onyx.app/nginx-config-version: "2"
# Propagate DOMAIN into nginx so server_name continues to use the same env var
extraEnvs:

View File

@@ -83,6 +83,14 @@
"scope": [],
"rule": "Code changes must consider both regular Onyx deployments and Onyx lite deployments. Lite deployments disable the vector DB, Redis, model servers, and background workers by default, use PostgreSQL-backed cache/auth/file storage, and rely on the API server to handle background work. Do not assume those services are available unless the code path is explicitly limited to full deployments."
},
{
"scope": ["web/**"],
"rule": "In Onyx's Next.js app, the `app/ee/admin/` directory is a filesystem convention for Enterprise Edition route overrides — it does NOT add an `/ee/` prefix to the URL. Both `app/admin/groups/page.tsx` and `app/ee/admin/groups/page.tsx` serve the same URL `/admin/groups`. Hardcoded `/admin/...` paths in router.push() calls are correct and do NOT break EE deployments. Do not flag hardcoded admin paths as bugs."
},
{
"scope": ["web/**"],
"rule": "In Onyx, each API key creates a unique user row in the database with a unique `user_id` (UUID). There is a 1:1 mapping between API keys and their backing user records. Multiple API keys do NOT share the same `user_id`. Do not flag potential duplicate row IDs when using `user_id` from API key descriptors."
},
{
"scope": ["backend/**/*.py"],
"rule": "Never raise HTTPException directly in business code. Use `raise OnyxError(OnyxErrorCode.XXX, \"message\")` from `onyx.error_handling.exceptions`. A global FastAPI exception handler converts OnyxError into structured JSON responses with {\"error_code\": \"...\", \"detail\": \"...\"}. Error codes are defined in `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors with dynamic HTTP status codes, use `status_code_override`: `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`."

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
tmp/figma/hooks-card.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 910 KiB

View File

@@ -1,5 +1,9 @@
import "@opal/components/tooltip.css";
import { Interactive, type InteractiveStatelessProps } from "@opal/core";
import {
Disabled,
Interactive,
type InteractiveStatelessProps,
} from "@opal/core";
import type { ContainerSizeVariants, ExtremaSizeVariants } from "@opal/types";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
@@ -32,9 +36,6 @@ type ButtonProps = InteractiveStatelessProps &
*/
size?: ContainerSizeVariants;
/** HTML button type. When provided, Container renders a `<button>` element. */
type?: "submit" | "button" | "reset";
/** Tooltip text shown on hover. */
tooltip?: string;
@@ -43,6 +44,9 @@ type ButtonProps = InteractiveStatelessProps &
/** Which side the tooltip appears on. */
tooltipSide?: TooltipSide;
/** Wraps the button in a Disabled context. `false` overrides parent contexts. */
disabled?: boolean;
};
// ---------------------------------------------------------------------------
@@ -59,6 +63,7 @@ function Button({
tooltip,
tooltipSide = "top",
responsiveHideText = false,
disabled,
...interactiveProps
}: ButtonProps) {
const isLarge = size === "lg";
@@ -76,7 +81,7 @@ function Button({
) : null;
const button = (
<Interactive.Stateless {...interactiveProps}>
<Interactive.Stateless type={type} {...interactiveProps}>
<Interactive.Container
type={type}
border={interactiveProps.prominence === "secondary"}
@@ -102,9 +107,7 @@ function Button({
</Interactive.Stateless>
);
if (!tooltip) return button;
return (
const result = tooltip ? (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
@@ -117,7 +120,15 @@ function Button({
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
) : (
button
);
if (disabled != null) {
return <Disabled disabled={disabled}>{result}</Disabled>;
}
return result;
}
export { Button, type ButtonProps };

View File

@@ -0,0 +1,8 @@
.opal-button-chevron {
transition: rotate 200ms ease;
}
.interactive[data-interaction="hover"] .opal-button-chevron,
.interactive[data-interaction="active"] .opal-button-chevron {
rotate: -180deg;
}

View File

@@ -0,0 +1,22 @@
import "@opal/components/buttons/chevron.css";
import type { IconProps } from "@opal/types";
import { SvgChevronDownSmall } from "@opal/icons";
import { cn } from "@opal/utils";
/**
* Chevron icon that rotates 180° when its parent `.interactive` enters
* hover / active state. Shared by OpenButton, FilterButton, and any
* future button that needs an animated dropdown indicator.
*
* Stable component identity — never causes React to remount the SVG.
*/
function ChevronIcon({ className, ...props }: IconProps) {
return (
<SvgChevronDownSmall
className={cn(className, "opal-button-chevron")}
{...props}
/>
);
}
export { ChevronIcon };

View File

@@ -0,0 +1,107 @@
import type { Meta, StoryObj } from "@storybook/react";
import { FilterButton } from "@opal/components";
import { Disabled as DisabledProvider } from "@opal/core";
import { SvgUser, SvgActions, SvgTag } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
const meta: Meta<typeof FilterButton> = {
title: "opal/components/FilterButton",
component: FilterButton,
tags: ["autodocs"],
decorators: [
(Story) => (
<TooltipPrimitive.Provider>
<Story />
</TooltipPrimitive.Provider>
),
],
};
export default meta;
type Story = StoryObj<typeof FilterButton>;
export const Empty: Story = {
args: {
icon: SvgUser,
children: "Everyone",
},
};
export const Active: Story = {
args: {
icon: SvgUser,
active: true,
children: "By alice@example.com",
onClear: () => console.log("clear"),
},
};
export const Open: Story = {
args: {
icon: SvgActions,
interaction: "hover",
children: "All Actions",
},
};
export const ActiveOpen: Story = {
args: {
icon: SvgActions,
active: true,
interaction: "hover",
children: "2 selected",
onClear: () => console.log("clear"),
},
};
export const Disabled: Story = {
args: {
icon: SvgTag,
children: "All Tags",
},
decorators: [
(Story) => (
<DisabledProvider disabled>
<Story />
</DisabledProvider>
),
],
};
export const DisabledActive: Story = {
args: {
icon: SvgTag,
active: true,
children: "2 tags",
onClear: () => console.log("clear"),
},
decorators: [
(Story) => (
<DisabledProvider disabled>
<Story />
</DisabledProvider>
),
],
};
export const StateComparison: Story = {
render: () => (
<div style={{ display: "flex", gap: 12, alignItems: "center" }}>
<FilterButton icon={SvgUser} onClear={() => undefined}>
Everyone
</FilterButton>
<FilterButton icon={SvgUser} active onClear={() => console.log("clear")}>
By alice@example.com
</FilterButton>
</div>
),
};
export const WithTooltip: Story = {
args: {
icon: SvgUser,
children: "Everyone",
tooltip: "Filter by creator",
tooltipSide: "bottom",
},
};

View File

@@ -0,0 +1,70 @@
# FilterButton
**Import:** `import { FilterButton, type FilterButtonProps } from "@opal/components";`
A stateful filter trigger with a built-in chevron (when empty) and a clear button (when selected). Hardcodes `variant="select-filter"` and delegates to `Interactive.Stateful`, adding automatic open-state detection from Radix `data-state`. Designed to sit inside a `Popover.Trigger` for filter dropdowns.
## Relationship to OpenButton
FilterButton shares a similar call stack to `OpenButton`:
```
Interactive.Stateful → Interactive.Container → content row (icon + label + trailing indicator)
```
FilterButton is a **narrower, filter-specific** variant:
- It hardcodes `variant="select-filter"` (OpenButton uses `"select-heavy"`)
- It exposes `active?: boolean` instead of the raw `state` prop (maps to `"selected"` / `"empty"` internally)
- When active, the chevron is hidden via `visibility` and an absolutely-positioned clear `Button` with `prominence="tertiary"` overlays it — placed as a sibling outside the `<button>` to avoid nesting buttons
- It uses the shared `ChevronIcon` from `buttons/chevron` (same as OpenButton)
- It does not support `foldable`, `size`, or `width` — it is always `"lg"`
## Architecture
```
div.relative <- bounding wrapper
Interactive.Stateful <- variant="select-filter", interaction, state
└─ Interactive.Container (button) <- height="lg", default rounding/padding
└─ div.interactive-foreground
├─ div > Icon (interactive-foreground-icon)
├─ <span> label text
└─ ChevronIcon (when empty)
OR spacer div (when selected — reserves chevron space)
div.absolute <- clear Button overlay (when selected)
└─ Button (SvgX, size="2xs", prominence="tertiary")
```
- **Open-state detection** reads `data-state="open"` injected by Radix triggers (e.g. `Popover.Trigger`), falling back to the explicit `interaction` prop.
- **Chevron rotation** uses the shared `ChevronIcon` component and `buttons/chevron.css`, which rotates 180deg when `data-interaction="hover"`.
- **Clear button** is absolutely positioned outside the `<button>` element tree to avoid invalid nested `<button>` elements. An invisible spacer inside the button reserves the same space so layout doesn't shift between states.
## Props
| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `icon` | `IconFunctionComponent` | **required** | Left icon component |
| `children` | `string` | **required** | Label text between icon and trailing indicator |
| `active` | `boolean` | `false` | Whether the filter has an active selection |
| `onClear` | `() => void` | **required** | Called when the clear (X) button is clicked |
| `interaction` | `"rest" \| "hover" \| "active"` | auto | JS-controlled interaction override. Falls back to Radix `data-state="open"`. |
| `tooltip` | `string` | — | Tooltip text shown on hover |
| `tooltipSide` | `TooltipSide` | `"top"` | Which side the tooltip appears on |
## Usage
```tsx
import { FilterButton } from "@opal/components";
import { SvgUser } from "@opal/icons";
// Inside a Popover (auto-detects open state)
<Popover.Trigger asChild>
<FilterButton
icon={SvgUser}
active={hasSelection}
onClear={() => clearSelection()}
>
{hasSelection ? selectionLabel : "Everyone"}
</FilterButton>
</Popover.Trigger>
```

View File

@@ -0,0 +1,120 @@
import {
Interactive,
type InteractiveStatefulInteraction,
type InteractiveStatefulProps,
} from "@opal/core";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
import { SvgX } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
import { ChevronIcon } from "@opal/components/buttons/chevron";
import { Button } from "@opal/components/buttons/button/components";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface FilterButtonProps
extends Omit<InteractiveStatefulProps, "variant" | "state"> {
/** Left icon — always visible. */
icon: IconFunctionComponent;
/** Label text between icon and trailing indicator. */
children: string;
/** Whether the filter has an active selection. @default false */
active?: boolean;
/** Called when the clear (X) button is clicked in active state. */
onClear: () => void;
/** Tooltip text shown on hover. */
tooltip?: string;
/** Which side the tooltip appears on. */
tooltipSide?: TooltipSide;
}
// ---------------------------------------------------------------------------
// FilterButton
// ---------------------------------------------------------------------------
function FilterButton({
icon: Icon,
children,
onClear,
tooltip,
tooltipSide = "top",
active = false,
interaction,
...statefulProps
}: FilterButtonProps) {
// Derive open state: explicit prop > Radix data-state (injected via Slot chain)
const dataState = (statefulProps as Record<string, unknown>)["data-state"] as
| string
| undefined;
const resolvedInteraction: InteractiveStatefulInteraction =
interaction ?? (dataState === "open" ? "hover" : "rest");
const button = (
<div className="relative">
<Interactive.Stateful
{...statefulProps}
variant="select-filter"
interaction={resolvedInteraction}
state={active ? "selected" : "empty"}
>
<Interactive.Container type="button">
<div className="interactive-foreground flex flex-row items-center gap-1">
{iconWrapper(Icon, "lg", true)}
<span className="whitespace-nowrap font-main-ui-action">
{children}
</span>
<div style={{ visibility: active ? "hidden" : "visible" }}>
{iconWrapper(ChevronIcon, "lg", true)}
</div>
</div>
</Interactive.Container>
</Interactive.Stateful>
{active && (
<div className="absolute right-2 top-1/2 -translate-y-1/2">
{/* Force hover state so the X stays visually prominent against
the inverted selected background — without this it renders
dimmed and looks disabled. */}
<Button
icon={SvgX}
size="2xs"
prominence="tertiary"
tooltip="Clear filter"
interaction="hover"
onClick={(e) => {
e.stopPropagation();
onClear();
}}
/>
</div>
)}
</div>
);
if (!tooltip) return button;
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
{tooltip}
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
);
}
export { FilterButton, type FilterButtonProps };

View File

@@ -1,8 +1,5 @@
import "@opal/components/tooltip.css";
import {
Interactive,
type InteractiveStatefulState,
type InteractiveStatefulInteraction,
type InteractiveStatefulProps,
InteractiveContainerRoundingVariant,
} from "@opal/core";
@@ -22,40 +19,26 @@ type ContentPassthroughProps = DistributiveOmit<
"paddingVariant" | "widthVariant" | "ref" | "withInteractive"
>;
type LineItemButtonOwnProps = {
type LineItemButtonOwnProps = Pick<
InteractiveStatefulProps,
| "state"
| "interaction"
| "onClick"
| "href"
| "target"
| "group"
| "ref"
| "type"
> & {
/** Interactive select variant. @default "select-light" */
selectVariant?: "select-light" | "select-heavy";
/** Value state. @default "empty" */
state?: InteractiveStatefulState;
/** JS-controllable interaction state override. @default "rest" */
interaction?: InteractiveStatefulInteraction;
/** Click handler. */
onClick?: InteractiveStatefulProps["onClick"];
/** When provided, renders an anchor instead of a div. */
href?: string;
/** Anchor target (e.g. "_blank"). */
target?: string;
/** Interactive group key. */
group?: string;
/** Forwarded ref. */
ref?: React.Ref<HTMLElement>;
/** Corner rounding preset (height is always content-driven). @default "default" */
roundingVariant?: InteractiveContainerRoundingVariant;
/** Container width. @default "full" */
width?: ExtremaSizeVariants;
/** HTML button type. @default "button" */
type?: "submit" | "button" | "reset";
/** Tooltip text shown on hover. */
tooltip?: string;
@@ -79,11 +62,11 @@ function LineItemButton({
target,
group,
ref,
type = "button",
// Sizing
roundingVariant = "default",
width = "full",
type = "button",
tooltip,
tooltipSide = "top",

View File

@@ -40,13 +40,6 @@ export const Open: Story = {
},
};
export const Disabled: Story = {
args: {
disabled: true,
children: "Disabled",
},
};
export const Foldable: Story = {
args: {
foldable: true,

View File

@@ -1,5 +1,3 @@
import "@opal/components/buttons/open-button/styles.css";
import "@opal/components/tooltip.css";
import {
Interactive,
useDisabled,
@@ -9,24 +7,11 @@ import {
import type { ContainerSizeVariants, ExtremaSizeVariants } from "@opal/types";
import type { InteractiveContainerRoundingVariant } from "@opal/core";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent, IconProps } from "@opal/types";
import { SvgChevronDownSmall } from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { cn } from "@opal/utils";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
// ---------------------------------------------------------------------------
// Chevron (stable identity — never causes React to remount the SVG)
// ---------------------------------------------------------------------------
function ChevronIcon({ className, ...props }: IconProps) {
return (
<SvgChevronDownSmall
className={cn(className, "opal-open-button-chevron")}
{...props}
/>
);
}
import { ChevronIcon } from "@opal/components/buttons/chevron";
// ---------------------------------------------------------------------------
// Types

View File

@@ -1,8 +0,0 @@
.opal-open-button-chevron {
transition: rotate 200ms ease;
}
.interactive[data-interaction="hover"] .opal-open-button-chevron,
.interactive[data-interaction="active"] .opal-open-button-chevron {
rotate: -180deg;
}

View File

@@ -1,5 +1,4 @@
import "@opal/components/buttons/select-button/styles.css";
import "@opal/components/tooltip.css";
import {
Interactive,
useDisabled,
@@ -50,9 +49,6 @@ type SelectButtonProps = InteractiveStatefulProps &
*/
size?: ContainerSizeVariants;
/** HTML button type. Container renders a `<button>` element. */
type?: "submit" | "button" | "reset";
/** Tooltip text shown on hover. */
tooltip?: string;

View File

@@ -1,3 +1,5 @@
import "@opal/components/tooltip.css";
/* Shared types */
export type TooltipSide = "top" | "bottom" | "left" | "right";
@@ -19,6 +21,12 @@ export {
type OpenButtonProps,
} from "@opal/components/buttons/open-button/components";
/* FilterButton */
export {
FilterButton,
type FilterButtonProps,
} from "@opal/components/buttons/filter-button/components";
/* LineItemButton */
export {
LineItemButton,

View File

@@ -32,7 +32,13 @@ function ColumnVisibilityPopover<TData extends RowData>({
// User-defined columns only (exclude internal qualifier/actions)
const dataColumns = table
.getAllLeafColumns()
.filter((col) => !col.id.startsWith("__") && col.id !== "qualifier");
.filter(
(col) =>
!col.id.startsWith("__") &&
col.id !== "qualifier" &&
typeof col.columnDef.header === "string" &&
col.columnDef.header.trim() !== ""
);
return (
<Popover open={open} onOpenChange={setOpen}>

View File

@@ -88,9 +88,12 @@ function HoverableRoot({
ref,
onMouseEnter: consumerMouseEnter,
onMouseLeave: consumerMouseLeave,
onFocusCapture: consumerFocusCapture,
onBlurCapture: consumerBlurCapture,
...props
}: HoverableRootProps) {
const [hovered, setHovered] = useState(false);
const [focused, setFocused] = useState(false);
const onMouseEnter = useCallback(
(e: React.MouseEvent<HTMLDivElement>) => {
@@ -108,16 +111,40 @@ function HoverableRoot({
[consumerMouseLeave]
);
const onFocusCapture = useCallback(
(e: React.FocusEvent<HTMLDivElement>) => {
setFocused(true);
consumerFocusCapture?.(e);
},
[consumerFocusCapture]
);
const onBlurCapture = useCallback(
(e: React.FocusEvent<HTMLDivElement>) => {
if (
!(e.relatedTarget instanceof Node) ||
!e.currentTarget.contains(e.relatedTarget)
) {
setFocused(false);
}
consumerBlurCapture?.(e);
},
[consumerBlurCapture]
);
const active = hovered || focused;
const GroupContext = getOrCreateContext(group);
return (
<GroupContext.Provider value={hovered}>
<GroupContext.Provider value={active}>
<div
{...props}
ref={ref}
className={cn(widthVariants[widthVariant])}
onMouseEnter={onMouseEnter}
onMouseLeave={onMouseLeave}
onFocusCapture={onFocusCapture}
onBlurCapture={onBlurCapture}
>
{children}
</div>

View File

@@ -16,3 +16,15 @@
.hoverable-item[data-hoverable-variant="opacity-on-hover"][data-hoverable-local="true"]:hover {
opacity: 1;
}
/* Focus — item (or a focusable descendant) receives keyboard focus */
.hoverable-item[data-hoverable-variant="opacity-on-hover"]:has(:focus-visible) {
opacity: 1;
}
/* Focus ring on keyboard focus */
.hoverable-item:focus-visible {
outline: 2px solid var(--border-04);
outline-offset: 2px;
border-radius: 0.25rem;
}

View File

@@ -3,7 +3,7 @@ import type { Route } from "next";
import "@opal/core/interactive/shared.css";
import React from "react";
import { cn } from "@opal/utils";
import type { WithoutStyles } from "@opal/types";
import type { ButtonType, WithoutStyles } from "@opal/types";
import {
containerSizeVariants,
type ContainerSizeVariants,
@@ -52,7 +52,7 @@ interface InteractiveContainerProps
*
* Mutually exclusive with `href`.
*/
type?: "submit" | "button" | "reset";
type?: ButtonType;
/**
* When `true`, applies a 1px border using the theme's border color.

View File

@@ -8,7 +8,7 @@ Stateful interactive surface primitive for elements that maintain a value state
| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `variant` | `"select-light" \| "select-heavy" \| "sidebar"` | `"select-heavy"` | Color variant |
| `variant` | `"select-light" \| "select-heavy" \| "select-tinted" \| "select-filter" \| "sidebar"` | `"select-heavy"` | Color variant |
| `state` | `"empty" \| "filled" \| "selected"` | `"empty"` | Current value state |
| `interaction` | `"rest" \| "hover" \| "active"` | `"rest"` | JS-controlled interaction override |
| `group` | `string` | — | Tailwind group class for `group-hover:*` |

View File

@@ -4,7 +4,7 @@ import React from "react";
import { Slot } from "@radix-ui/react-slot";
import { cn } from "@opal/utils";
import { useDisabled } from "@opal/core/disabled/components";
import type { WithoutStyles } from "@opal/types";
import type { ButtonType, WithoutStyles } from "@opal/types";
// ---------------------------------------------------------------------------
// Types
@@ -14,6 +14,7 @@ type InteractiveStatefulVariant =
| "select-light"
| "select-heavy"
| "select-tinted"
| "select-filter"
| "sidebar";
type InteractiveStatefulState = "empty" | "filled" | "selected";
type InteractiveStatefulInteraction = "rest" | "hover" | "active";
@@ -30,6 +31,8 @@ interface InteractiveStatefulProps
*
* - `"select-light"` — transparent selected background (for inline toggles)
* - `"select-heavy"` — tinted selected background (for list rows, model pickers)
* - `"select-tinted"` — like select-heavy but with a tinted rest background
* - `"select-filter"` — like select-tinted for empty/filled; selected state uses inverted tint backgrounds and inverted text (for filter buttons)
* - `"sidebar"` — for sidebar navigation items
*
* @default "select-heavy"
@@ -63,6 +66,13 @@ interface InteractiveStatefulProps
*/
group?: string;
/**
* HTML button type. When set to `"submit"`, `"button"`, or `"reset"`, the
* element is treated as inherently interactive for cursor styling purposes
* even without an explicit `onClick` or `href`.
*/
type?: ButtonType;
/**
* URL to navigate to when clicked. Passed through Slot to the child.
*/
@@ -94,6 +104,7 @@ function InteractiveStateful({
state = "empty",
interaction = "rest",
group,
type,
href,
target,
...props
@@ -104,7 +115,7 @@ function InteractiveStateful({
// so Radix Slot-injected handlers don't bypass this guard.
const classes = cn(
"interactive",
!props.onClick && !href && "!cursor-default !select-auto",
!props.onClick && !href && !type && "!cursor-default !select-auto",
group
);

View File

@@ -308,6 +308,89 @@
--interactive-foreground-icon: var(--action-link-03);
}
/* ===========================================================================
Select-Filter — empty/filled identical to Select-Tinted;
selected uses inverted tint backgrounds and inverted text
=========================================================================== */
/* ---------------------------------------------------------------------------
Select-Filter — Empty & Filled (identical colors)
--------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-filter"]:is(
[data-interactive-state="empty"],
[data-interactive-state="filled"]
) {
@apply bg-background-tint-01;
--interactive-foreground: var(--text-02);
--interactive-foreground-icon: var(--text-02);
}
.interactive[data-interactive-variant="select-filter"]:is(
[data-interactive-state="empty"],
[data-interactive-state="filled"]
):hover:not([data-disabled]),
.interactive[data-interactive-variant="select-filter"]:is(
[data-interactive-state="empty"],
[data-interactive-state="filled"]
)[data-interaction="hover"]:not([data-disabled]) {
@apply bg-background-tint-02;
--interactive-foreground: var(--text-04);
--interactive-foreground-icon: var(--text-04);
}
.interactive[data-interactive-variant="select-filter"]:is(
[data-interactive-state="empty"],
[data-interactive-state="filled"]
):active:not([data-disabled]),
.interactive[data-interactive-variant="select-filter"]:is(
[data-interactive-state="empty"],
[data-interactive-state="filled"]
)[data-interaction="active"]:not([data-disabled]) {
@apply bg-background-neutral-00;
--interactive-foreground: var(--text-05);
--interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-filter"]:is(
[data-interactive-state="empty"],
[data-interactive-state="filled"]
)[data-disabled] {
@apply bg-transparent;
--interactive-foreground: var(--text-01);
--interactive-foreground-icon: var(--text-01);
}
/* ---------------------------------------------------------------------------
Select-Filter — Selected
--------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"] {
@apply bg-background-tint-inverted-03;
--interactive-foreground: var(--text-inverted-05);
--interactive-foreground-icon: var(--text-inverted-05);
}
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"]:hover:not(
[data-disabled]
),
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"][data-interaction="hover"]:not(
[data-disabled]
) {
@apply bg-background-tint-inverted-04;
--interactive-foreground: var(--text-inverted-05);
--interactive-foreground-icon: var(--text-inverted-05);
}
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"]:active:not(
[data-disabled]
),
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"][data-interaction="active"]:not(
[data-disabled]
) {
@apply bg-background-tint-inverted-04;
--interactive-foreground: var(--text-inverted-04);
--interactive-foreground-icon: var(--text-inverted-04);
}
.interactive[data-interactive-variant="select-filter"][data-interactive-state="selected"][data-disabled] {
@apply bg-background-neutral-04;
--interactive-foreground: var(--text-inverted-04);
--interactive-foreground-icon: var(--text-inverted-02);
}
/* ===========================================================================
Sidebar
=========================================================================== */

View File

@@ -4,7 +4,7 @@ import React from "react";
import { Slot } from "@radix-ui/react-slot";
import { cn } from "@opal/utils";
import { useDisabled } from "@opal/core/disabled/components";
import type { WithoutStyles } from "@opal/types";
import type { ButtonType, WithoutStyles } from "@opal/types";
// ---------------------------------------------------------------------------
// Types
@@ -53,6 +53,13 @@ interface InteractiveStatelessProps
*/
group?: string;
/**
* HTML button type. When set to `"submit"`, `"button"`, or `"reset"`, the
* element is treated as inherently interactive for cursor styling purposes
* even without an explicit `onClick` or `href`.
*/
type?: ButtonType;
/**
* URL to navigate to when clicked. Passed through Slot to the child.
*/
@@ -85,6 +92,7 @@ function InteractiveStateless({
prominence = "primary",
interaction = "rest",
group,
type,
href,
target,
...props
@@ -95,7 +103,7 @@ function InteractiveStateless({
// so Radix Slot-injected handlers don't bypass this guard.
const classes = cn(
"interactive",
!props.onClick && !href && "!cursor-default !select-auto",
!props.onClick && !href && !type && "!cursor-default !select-auto",
group
);

View File

@@ -0,0 +1,20 @@
import type { IconProps } from "@opal/types";
const SvgEyeOff = ({ size, ...props }: IconProps) => (
<svg
width={size}
height={size}
viewBox="0 0 16 16"
fill="none"
xmlns="http://www.w3.org/2000/svg"
stroke="currentColor"
{...props}
>
<path
d="M11.78 11.78C10.6922 12.6092 9.36761 13.0685 8 13.0909C3.54545 13.0909 1 8 1 8C1.79157 6.52484 2.88945 5.23602 4.22 4.22M11.78 11.78L9.34909 9.34909M11.78 11.78L15 15M4.22 4.22L1 1M4.22 4.22L6.65091 6.65091M6.66364 3.06182C7.10167 2.95929 7.55013 2.90803 8 2.90909C12.4545 2.90909 15 8 15 8C14.6137 8.72266 14.153 9.40301 13.6255 10.03M9.34909 9.34909L6.65091 6.65091M9.34909 9.34909C8.99954 9.72422 8.49873 9.94737 7.98606 9.95641C6.922 9.97519 6.02481 9.078 6.04358 8.01394C6.05263 7.50127 6.27578 7.00046 6.65091 6.65091"
strokeWidth={1.5}
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
);
export default SvgEyeOff;

View File

@@ -0,0 +1,21 @@
import type { IconProps } from "@opal/types";
const SvgFileBroadcast = ({ size, ...props }: IconProps) => (
<svg
width={size}
height={size}
viewBox="0 0 18 18"
fill="none"
xmlns="http://www.w3.org/2000/svg"
stroke="currentColor"
{...props}
>
<path
d="M6.1875 2.25003H2.625C1.808 2.25003 1.125 2.93303 1.125 3.75003L1.125 14.25C1.125 15.067 1.808 15.75 2.625 15.75L9.37125 15.75C10.1883 15.75 10.8713 15.067 10.8713 14.25L10.8713 6.94128M6.1875 2.25003L10.8713 6.94128M6.1875 2.25003V6.94128H10.8713M10.3069 2.25L13.216 5.15914C13.6379 5.5811 13.875 6.15339 13.875 6.75013V13.875C13.875 14.5212 13.737 15.2081 13.4392 15.7538M16.4391 15.7538C16.737 15.2081 16.875 14.5213 16.875 13.8751L16.875 7.02481C16.875 5.53418 16.2833 4.10451 15.23 3.04982L14.4301 2.25003"
strokeWidth={1.5}
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
);
export default SvgFileBroadcast;

View File

@@ -0,0 +1,21 @@
import type { IconProps } from "@opal/types";
const SvgHookNodes = ({ size, ...props }: IconProps) => (
<svg
width={size}
height={size}
viewBox="0 0 16 16"
fill="none"
xmlns="http://www.w3.org/2000/svg"
stroke="currentColor"
{...props}
>
<path
d="M10.0002 4C10.0002 3.99708 10.0002 3.99415 10.0001 3.99123C9.99542 2.8907 9.10181 2 8.00016 2C6.89559 2 6.00016 2.89543 6.00016 4C6.00016 4.73701 6.39882 5.38092 6.99226 5.72784L4.67276 9.70412M11.6589 13.7278C11.9549 13.9009 12.2993 14 12.6668 14C13.7714 14 14.6668 13.1046 14.6668 12C14.6668 10.8954 13.7714 10 12.6668 10C12.2993 10 11.9549 10.0991 11.6589 10.2722L9.33943 6.29588M2.33316 10.2678C1.73555 10.6136 1.3335 11.2599 1.3335 12C1.3335 13.1046 2.22893 14 3.3335 14C4.43807 14 5.3335 13.1046 5.3335 12H10.0002"
strokeWidth={1.5}
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
);
export default SvgHookNodes;

View File

@@ -68,8 +68,10 @@ export { default as SvgExpand } from "@opal/icons/expand";
export { default as SvgExternalLink } from "@opal/icons/external-link";
export { default as SvgEye } from "@opal/icons/eye";
export { default as SvgEyeClosed } from "@opal/icons/eye-closed";
export { default as SvgFiles } from "@opal/icons/files";
export { default as SvgEyeOff } from "@opal/icons/eye-off";
export { default as SvgFileBraces } from "@opal/icons/file-braces";
export { default as SvgFileBroadcast } from "@opal/icons/file-broadcast";
export { default as SvgFiles } from "@opal/icons/files";
export { default as SvgFileChartPie } from "@opal/icons/file-chart-pie";
export { default as SvgFileSmall } from "@opal/icons/file-small";
export { default as SvgFileText } from "@opal/icons/file-text";
@@ -89,6 +91,7 @@ export { default as SvgHashSmall } from "@opal/icons/hash-small";
export { default as SvgHash } from "@opal/icons/hash";
export { default as SvgHeadsetMic } from "@opal/icons/headset-mic";
export { default as SvgHistory } from "@opal/icons/history";
export { default as SvgHookNodes } from "@opal/icons/hook-nodes";
export { default as SvgHourglass } from "@opal/icons/hourglass";
export { default as SvgImage } from "@opal/icons/image";
export { default as SvgImageSmall } from "@opal/icons/image-small";
@@ -120,7 +123,9 @@ export { default as SvgNetworkGraph } from "@opal/icons/network-graph";
export { default as SvgNotificationBubble } from "@opal/icons/notification-bubble";
export { default as SvgOllama } from "@opal/icons/ollama";
export { default as SvgOnyxLogo } from "@opal/icons/onyx-logo";
export { default as SvgOnyxLogoTyped } from "@opal/icons/onyx-logo-typed";
export { default as SvgOnyxOctagon } from "@opal/icons/onyx-octagon";
export { default as SvgOnyxTyped } from "@opal/icons/onyx-typed";
export { default as SvgOpenai } from "@opal/icons/openai";
export { default as SvgOpenrouter } from "@opal/icons/openrouter";
export { default as SvgOrganization } from "@opal/icons/organization";

View File

@@ -0,0 +1,27 @@
import SvgOnyxLogo from "@opal/icons/onyx-logo";
import SvgOnyxTyped from "@opal/icons/onyx-typed";
import { cn } from "@opal/utils";
interface OnyxLogoTypedProps {
size?: number;
className?: string;
}
// # NOTE(@raunakab):
// This ratio is not some random, magical number; it is available on Figma.
const HEIGHT_TO_GAP_RATIO = 5 / 16;
const SvgOnyxLogoTyped = ({ size: height, className }: OnyxLogoTypedProps) => {
const gap = height != null ? height * HEIGHT_TO_GAP_RATIO : undefined;
return (
<div
className={cn(`flex flex-row items-center`, className)}
style={{ gap }}
>
<SvgOnyxLogo size={height} />
<SvgOnyxTyped size={height} />
</div>
);
};
export default SvgOnyxLogoTyped;

View File

@@ -1,19 +1,27 @@
import type { IconProps } from "@opal/types";
const SvgOnyxLogo = ({ size, ...props }: IconProps) => (
<svg
width={size}
height={size}
viewBox="0 0 56 56"
viewBox="0 0 64 64"
fill="none"
xmlns="http://www.w3.org/2000/svg"
stroke="currentColor"
{...props}
>
<path
fillRule="evenodd"
clipRule="evenodd"
d="M28 0 10.869 7.77 28 15.539l17.131-7.77L28 0Zm0 40.461-17.131 7.77L28 56l17.131-7.77L28 40.461Zm20.231-29.592L56 28.001l-7.769 17.131L40.462 28l7.769-17.131ZM15.538 28 7.77 10.869 0 28l7.769 17.131L15.538 28Z"
fill="currentColor"
d="M10.4014 13.25L18.875 32L10.3852 50.75L2 32L10.4014 13.25Z"
fill="var(--theme-primary-05)"
/>
<path
d="M53.5264 13.25L62 32L53.5102 50.75L45.125 32L53.5264 13.25Z"
fill="var(--theme-primary-05)"
/>
<path
d="M32 45.125L50.75 53.5625L32 62L13.25 53.5625L32 45.125Z"
fill="var(--theme-primary-05)"
/>
<path
d="M32 2L50.75 10.4375L32 18.875L13.25 10.4375L32 2Z"
fill="var(--theme-primary-05)"
/>
</svg>
);

View File

@@ -0,0 +1,28 @@
import type { IconProps } from "@opal/types";
const SvgOnyxTyped = ({ size, ...props }: IconProps) => (
<svg
height={size}
viewBox="0 0 152 64"
fill="none"
xmlns="http://www.w3.org/2000/svg"
{...props}
>
<path
d="M19.1795 51.2136C15.6695 51.2136 12.4353 50.3862 9.47691 48.7315C6.56865 47.0768 4.2621 44.8454 2.55726 42.0374C0.85242 39.1793 0 36.0955 0 32.7861C0 30.279 0.451281 27.9223 1.35384 25.716C2.30655 23.4596 3.76068 21.3285 5.71623 19.3228L11.8085 13.08C12.4604 12.6789 13.4131 12.3529 14.6666 12.1022C15.9202 11.8014 17.2991 11.6509 18.8034 11.6509C22.3134 11.6509 25.5225 12.4783 28.4307 14.133C31.3891 15.7877 33.7208 18.0441 35.4256 20.9023C37.1304 23.7103 37.9829 26.794 37.9829 30.1536C37.9829 32.6106 37.5065 34.9673 36.5538 37.2237C35.6512 39.4802 34.147 41.6864 32.041 43.8426L26.3248 49.7845C25.3219 50.2358 24.2188 50.5868 23.0154 50.8375C21.8621 51.0882 20.5835 51.2136 19.1795 51.2136ZM20.1572 43.8426C21.8621 43.8426 23.4917 43.4164 25.0461 42.5639C26.6005 41.6614 27.8541 40.3577 28.8068 38.6528C29.8097 36.948 30.3111 34.9172 30.3111 32.5605C30.3111 30.0032 29.6843 27.6966 28.4307 25.6408C27.2273 23.5849 25.6478 21.9803 23.6923 20.8271C21.7869 19.6236 19.8313 19.0219 17.8256 19.0219C16.0706 19.0219 14.4159 19.4732 12.8615 20.3758C11.3573 21.2282 10.1288 22.5068 9.17606 24.2117C8.22335 25.9166 7.747 27.9473 7.747 30.304C7.747 32.8613 8.34871 35.1679 9.55212 37.2237C10.7555 39.2796 12.31 40.9092 14.2154 42.1127C16.1709 43.2659 18.1515 43.8426 20.1572 43.8426Z"
fill="var(--theme-primary-05)"
/>
<path
d="M42.6413 50.4614V12.4031H50.6891V17.7433L55.5028 12.7039C56.0544 12.4532 56.8065 12.2276 57.7592 12.027C58.7621 11.7763 59.8903 11.6509 61.1438 11.6509C64.0521 11.6509 66.5843 12.3028 68.7404 13.6065C70.9467 14.8601 72.6264 16.6401 73.7797 18.9467C74.9831 21.2533 75.5848 23.961 75.5848 27.0698V50.4614H67.6122V29.1006C67.6122 26.9946 67.2612 25.1895 66.5592 23.6852C65.9074 22.1308 64.9547 20.9775 63.7011 20.2253C62.4977 19.4231 61.0686 19.0219 59.4139 19.0219C56.7564 19.0219 54.6253 19.9245 53.0208 21.7296C51.4663 23.4846 50.6891 25.9416 50.6891 29.1006V50.4614H42.6413Z"
fill="var(--theme-primary-05)"
/>
<path
d="M82.3035 64V56.0273H89.9753C91.2288 56.0273 92.2066 55.7264 92.9086 55.1247C93.6607 54.523 94.2625 53.5452 94.7137 52.1913L108.027 12.4031H116.751L103.664 49.4084C103.062 51.1634 102.461 52.5173 101.859 53.47C101.307 54.4227 100.53 55.4506 99.5274 56.5538L92.4573 64H82.3035ZM90.7274 46.6255L76.9633 12.4031H85.989L99.4522 46.6255H90.7274Z"
fill="var(--theme-primary-05)"
/>
<path
d="M115.657 50.4614L129.045 31.2066L116.033 12.4031H125.435L134.085 24.8134L142.358 12.4031H151.308L138.372 31.0562L151.684 50.4614H142.358L133.332 37.3742L124.683 50.4614H115.657Z"
fill="var(--theme-primary-05)"
/>
</svg>
);
export default SvgOnyxTyped;

View File

@@ -32,6 +32,8 @@ interface ContentMdPresetConfig {
optionalFont: string;
/** Aux icon size = lineHeight 2 × p-0.5. */
auxIconSize: string;
/** Left indent for the description so it aligns with the title (past the icon). */
descriptionIndent: string;
}
interface ContentMdProps {
@@ -85,6 +87,7 @@ const CONTENT_MD_PRESETS: Record<ContentMdSizePreset, ContentMdPresetConfig> = {
editButtonPadding: "p-0",
optionalFont: "font-main-content-muted",
auxIconSize: "1.25rem",
descriptionIndent: "1.625rem",
},
"main-ui": {
iconSize: "1rem",
@@ -97,6 +100,7 @@ const CONTENT_MD_PRESETS: Record<ContentMdSizePreset, ContentMdPresetConfig> = {
editButtonPadding: "p-0",
optionalFont: "font-main-ui-muted",
auxIconSize: "1rem",
descriptionIndent: "1.375rem",
},
secondary: {
iconSize: "0.75rem",
@@ -109,6 +113,7 @@ const CONTENT_MD_PRESETS: Record<ContentMdSizePreset, ContentMdPresetConfig> = {
editButtonPadding: "p-0",
optionalFont: "font-secondary-action",
auxIconSize: "0.75rem",
descriptionIndent: "1.125rem",
},
};
@@ -163,22 +168,25 @@ function ContentMd({
data-interactive={withInteractive || undefined}
style={{ gap: config.gap }}
>
{Icon && (
<div
className={cn(
"opal-content-md-icon-container shrink-0",
config.iconContainerPadding
)}
style={{ minHeight: config.lineHeight }}
>
<Icon
className={cn("opal-content-md-icon", config.iconColorClass)}
style={{ width: config.iconSize, height: config.iconSize }}
/>
</div>
)}
<div
className="opal-content-md-header"
data-editing={editing || undefined}
>
{Icon && (
<div
className={cn(
"opal-content-md-icon-container shrink-0",
config.iconContainerPadding
)}
style={{ minHeight: config.lineHeight }}
>
<Icon
className={cn("opal-content-md-icon", config.iconColorClass)}
style={{ width: config.iconSize, height: config.iconSize }}
/>
</div>
)}
<div className="opal-content-md-body">
<div className="opal-content-md-title-row">
{editing ? (
<div className="opal-content-md-input-sizer">
@@ -274,13 +282,16 @@ function ContentMd({
</div>
)}
</div>
{description && (
<div className="opal-content-md-description font-secondary-body text-text-03">
{description}
</div>
)}
</div>
{description && (
<div
className="opal-content-md-description font-secondary-body text-text-03"
style={Icon ? { paddingLeft: config.descriptionIndent } : undefined}
>
{description}
</div>
)}
</div>
);
}

View File

@@ -224,7 +224,16 @@
--------------------------------------------------------------------------- */
.opal-content-md {
@apply flex flex-row items-start;
@apply flex flex-col items-start;
}
.opal-content-md-header {
@apply flex flex-row items-center w-full;
}
.opal-content-md-header[data-editing] {
@apply rounded-08;
box-shadow: inset 0 0 0 1px var(--border-02);
}
/* ---------------------------------------------------------------------------
@@ -237,15 +246,6 @@
justify-content: center;
}
/* ---------------------------------------------------------------------------
Body column
--------------------------------------------------------------------------- */
.opal-content-md-body {
@apply flex flex-1 flex-col items-start;
min-width: 0.0625rem;
}
/* ---------------------------------------------------------------------------
Title row — title (or input) + edit button
--------------------------------------------------------------------------- */
@@ -267,6 +267,7 @@
.opal-content-md-input-sizer {
display: inline-grid;
align-items: stretch;
width: 100%;
}
.opal-content-md-input-sizer > * {

View File

@@ -86,6 +86,15 @@ export interface IconProps extends SVGProps<SVGSVGElement> {
/** Strips `className` and `style` from a props type to enforce design-system styling. */
export type WithoutStyles<T> = Omit<T, "className" | "style">;
/**
* HTML button `type` attribute values.
*
* Used by interactive primitives and button-like components to indicate that
* the element is inherently interactive for cursor-styling purposes, even
* without an explicit `onClick` or `href`.
*/
export type ButtonType = "submit" | "button" | "reset";
/** Like `Omit` but distributes over union types, preserving discriminated unions. */
export type DistributiveOmit<T, K extends keyof any> = T extends any
? Omit<T, K>

View File

@@ -1,320 +0,0 @@
"use client";
import Text from "@/refresh-components/texts/Text";
import { Persona } from "./interfaces";
import { useRouter } from "next/navigation";
import Checkbox from "@/refresh-components/inputs/Checkbox";
import { toast } from "@/hooks/useToast";
import { useState, useMemo, useEffect } from "react";
import { UniqueIdentifier } from "@dnd-kit/core";
import { DraggableTable } from "@/components/table/DraggableTable";
import {
deletePersona,
personaComparator,
togglePersonaFeatured,
togglePersonaVisibility,
} from "./lib";
import { FiEdit2 } from "react-icons/fi";
import { useUser } from "@/providers/UserProvider";
import { Button } from "@opal/components";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { SvgAlertCircle, SvgTrash } from "@opal/icons";
import type { Route } from "next";
function PersonaTypeDisplay({ persona }: { persona: Persona }) {
if (persona.builtin_persona) {
return <Text as="p">Built-In</Text>;
}
if (persona.is_featured) {
return <Text as="p">Featured</Text>;
}
if (persona.is_public) {
return <Text as="p">Public</Text>;
}
if (persona.groups.length > 0 || persona.users.length > 0) {
return <Text as="p">Shared</Text>;
}
return (
<Text as="p">Personal {persona.owner && <>({persona.owner.email})</>}</Text>
);
}
export function PersonasTable({
personas,
refreshPersonas,
currentPage,
pageSize,
}: {
personas: Persona[];
refreshPersonas: () => void;
currentPage: number;
pageSize: number;
}) {
const router = useRouter();
const { refreshUser, isAdmin } = useUser();
const editablePersonas = useMemo(() => {
return personas.filter((p) => !p.builtin_persona);
}, [personas]);
const editablePersonaIds = useMemo(() => {
return new Set(editablePersonas.map((p) => p.id.toString()));
}, [editablePersonas]);
const [finalPersonas, setFinalPersonas] = useState<Persona[]>([]);
const [deleteModalOpen, setDeleteModalOpen] = useState(false);
const [personaToDelete, setPersonaToDelete] = useState<Persona | null>(null);
const [defaultModalOpen, setDefaultModalOpen] = useState(false);
const [personaToToggleDefault, setPersonaToToggleDefault] =
useState<Persona | null>(null);
useEffect(() => {
const editable = editablePersonas.sort(personaComparator);
const nonEditable = personas
.filter((p) => !editablePersonaIds.has(p.id.toString()))
.sort(personaComparator);
setFinalPersonas([...editable, ...nonEditable]);
}, [editablePersonas, personas, editablePersonaIds]);
const updatePersonaOrder = async (orderedPersonaIds: UniqueIdentifier[]) => {
const reorderedPersonas = orderedPersonaIds.map(
(id) => personas.find((persona) => persona.id.toString() === id)!
);
setFinalPersonas(reorderedPersonas);
// Calculate display_priority based on current page.
// Page 1 (items 0-9): priorities 0-9
// Page 2 (items 10-19): priorities 10-19, etc.
const pageStartIndex = (currentPage - 1) * pageSize;
const displayPriorityMap = new Map<UniqueIdentifier, number>();
orderedPersonaIds.forEach((personaId, ind) => {
displayPriorityMap.set(personaId, pageStartIndex + ind);
});
const response = await fetch("/api/admin/agents/display-priorities", {
method: "PATCH",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
display_priority_map: Object.fromEntries(displayPriorityMap),
}),
});
if (!response.ok) {
toast.error(`Failed to update persona order - ${await response.text()}`);
setFinalPersonas(personas);
await refreshPersonas();
return;
}
await refreshPersonas();
await refreshUser();
};
const openDeleteModal = (persona: Persona) => {
setPersonaToDelete(persona);
setDeleteModalOpen(true);
};
const closeDeleteModal = () => {
setDeleteModalOpen(false);
setPersonaToDelete(null);
};
const handleDeletePersona = async () => {
if (personaToDelete) {
const response = await deletePersona(personaToDelete.id);
if (response.ok) {
refreshPersonas();
closeDeleteModal();
} else {
toast.error(`Failed to delete persona - ${await response.text()}`);
}
}
};
const openDefaultModal = (persona: Persona) => {
setPersonaToToggleDefault(persona);
setDefaultModalOpen(true);
};
const closeDefaultModal = () => {
setDefaultModalOpen(false);
setPersonaToToggleDefault(null);
};
const handleToggleDefault = async () => {
if (personaToToggleDefault) {
const response = await togglePersonaFeatured(
personaToToggleDefault.id,
personaToToggleDefault.is_featured
);
if (response.ok) {
refreshPersonas();
closeDefaultModal();
} else {
toast.error(`Failed to update persona - ${await response.text()}`);
}
}
};
return (
<div>
{deleteModalOpen && personaToDelete && (
<ConfirmationModalLayout
icon={SvgAlertCircle}
title="Delete Agent"
onClose={closeDeleteModal}
submit={<Button onClick={handleDeletePersona}>Delete</Button>}
>
{`Are you sure you want to delete ${personaToDelete.name}?`}
</ConfirmationModalLayout>
)}
{defaultModalOpen &&
personaToToggleDefault &&
(() => {
const isDefault = personaToToggleDefault.is_featured;
const title = isDefault
? "Remove Featured Agent"
: "Set Featured Agent";
const buttonText = isDefault ? "Remove Feature" : "Set as Featured";
const text = isDefault
? `Are you sure you want to remove the featured status of ${personaToToggleDefault.name}?`
: `Are you sure you want to set the featured status of ${personaToToggleDefault.name}?`;
const additionalText = isDefault
? `Removing "${personaToToggleDefault.name}" as a featured agent will not affect its visibility or accessibility.`
: `Setting "${personaToToggleDefault.name}" as a featured agent will make it public and visible to all users. This action cannot be undone.`;
return (
<ConfirmationModalLayout
icon={SvgAlertCircle}
title={title}
onClose={closeDefaultModal}
submit={
<Button onClick={handleToggleDefault}>{buttonText}</Button>
}
>
<div className="flex flex-col gap-2">
<Text as="p">{text}</Text>
<Text as="p" text03>
{additionalText}
</Text>
</div>
</ConfirmationModalLayout>
);
})()}
<DraggableTable
headers={[
"Name",
"Description",
"Type",
"Featured Agent",
"Is Visible",
"Delete",
]}
isAdmin={isAdmin}
rows={finalPersonas.map((persona) => {
const isEditable = editablePersonas.includes(persona);
return {
id: persona.id.toString(),
cells: [
<div key="name" className="flex">
{!persona.builtin_persona && (
<FiEdit2
className="mr-1 my-auto cursor-pointer"
onClick={() =>
router.push(
`/app/agents/edit/${
persona.id
}?u=${Date.now()}&admin=true` as Route
)
}
/>
)}
<p className="text font-medium whitespace-normal break-none">
{persona.name}
</p>
</div>,
<p
key="description"
className="whitespace-normal break-all max-w-2xl"
>
{persona.description}
</p>,
<PersonaTypeDisplay key={persona.id} persona={persona} />,
<div
key="featured"
onClick={() => {
openDefaultModal(persona);
}}
className={`
px-1 py-0.5 rounded flex hover:bg-accent-background-hovered cursor-pointer select-none w-fit items-center gap-2
`}
>
<div className="my-auto flex-none w-22">
{!persona.is_featured ? (
<div className="text-error">Not Featured</div>
) : (
"Featured"
)}
</div>
<Checkbox checked={persona.is_featured} />
</div>,
<div
key="is_visible"
onClick={async () => {
const response = await togglePersonaVisibility(
persona.id,
persona.is_listed
);
if (response.ok) {
refreshPersonas();
} else {
toast.error(
`Failed to update persona - ${await response.text()}`
);
}
}}
className={`
px-1 py-0.5 rounded flex hover:bg-accent-background-hovered cursor-pointer select-none w-fit items-center gap-2
`}
>
<div className="my-auto w-fit">
{!persona.is_listed ? (
<div className="text-error">Hidden</div>
) : (
"Visible"
)}
</div>
<Checkbox checked={persona.is_listed} />
</div>,
<div key="edit" className="flex">
<div className="mr-auto my-auto">
{!persona.builtin_persona && isEditable ? (
<Button
icon={SvgTrash}
prominence="tertiary"
onClick={() => openDeleteModal(persona)}
/>
) : (
<Text as="p">-</Text>
)}
</div>
</div>,
],
staticModifiers: [[1, "lg:w-[250px] xl:w-[400px] 2xl:w-[550px]"]],
};
})}
setRows={updatePersonaOrder}
/>
</div>
);
}

View File

@@ -1,160 +1 @@
"use client";
import { PersonasTable } from "./PersonaTable";
import Text from "@/components/ui/text";
import Title from "@/components/ui/title";
import Separator from "@/refresh-components/Separator";
import { SubLabel } from "@/components/Field";
import * as SettingsLayouts from "@/layouts/settings-layouts";
import CreateButton from "@/refresh-components/buttons/CreateButton";
import { useAdminPersonas } from "@/hooks/useAdminPersonas";
import { Persona } from "./interfaces";
import { ThreeDotsLoader } from "@/components/Loading";
import { ErrorCallout } from "@/components/ErrorCallout";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { useState, useEffect } from "react";
import { Pagination } from "@opal/components";
const route = ADMIN_ROUTES.AGENTS;
const PAGE_SIZE = 20;
function MainContent({
personas,
totalItems,
currentPage,
onPageChange,
refreshPersonas,
}: {
personas: Persona[];
totalItems: number;
currentPage: number;
onPageChange: (page: number) => void;
refreshPersonas: () => void;
}) {
// Filter out default/unified assistants.
// NOTE: The backend should already exclude them if includeDefault = false is
// provided. That change was made with the introduction of pagination; we keep
// this filter here for now for backwards compatibility.
const customPersonas = personas.filter((persona) => !persona.builtin_persona);
const totalPages = Math.ceil(totalItems / PAGE_SIZE);
// Clamp currentPage when totalItems shrinks (e.g., deleting the last item on a page)
useEffect(() => {
if (currentPage > totalPages && totalPages > 0) {
onPageChange(totalPages);
}
}, [currentPage, totalPages, onPageChange]);
return (
<div>
<Text className="mb-2">
Agents are a way to build custom search/question-answering experiences
for different use cases.
</Text>
<Text className="mt-2">They allow you to customize:</Text>
<div className="text-sm">
<ul className="list-disc mt-2 ml-4">
<li>
The prompt used by your LLM of choice to respond to the user query
</li>
<li>The documents that are used as context</li>
</ul>
</div>
<div>
<Separator />
<Title>Create an Agent</Title>
<CreateButton href="/app/agents/create?admin=true">
New Agent
</CreateButton>
<Separator />
<Title>Existing Agents</Title>
{totalItems > 0 ? (
<>
<SubLabel>
Agents will be displayed as options on the Chat / Search
interfaces in the order they are displayed below. Agents marked as
hidden will not be displayed. Editable agents are shown at the
top.
</SubLabel>
<PersonasTable
personas={customPersonas}
refreshPersonas={refreshPersonas}
currentPage={currentPage}
pageSize={PAGE_SIZE}
/>
{totalPages > 1 && (
<Pagination
currentPage={currentPage}
totalPages={totalPages}
onChange={onPageChange}
/>
)}
</>
) : (
<div className="mt-6 p-8 border border-border rounded-lg bg-background-weak text-center">
<Text className="text-lg font-medium mb-2">
No custom agents yet
</Text>
<Text className="text-subtle mb-3">
Create your first agent to:
</Text>
<ul className="text-subtle text-sm list-disc text-left inline-block mb-3">
<li>Build department-specific knowledge bases</li>
<li>Create specialized research agents</li>
<li>Set up compliance and policy advisors</li>
</ul>
<Text className="text-subtle text-sm mb-4">
...and so much more!
</Text>
<CreateButton href="/app/agents/create?admin=true">
Create Your First Agent
</CreateButton>
</div>
)}
</div>
</div>
);
}
export default function Page() {
const [currentPage, setCurrentPage] = useState(1);
const { personas, totalItems, isLoading, error, refresh } = useAdminPersonas({
pageNum: currentPage - 1, // Backend uses 0-indexed pages
pageSize: PAGE_SIZE,
});
return (
<SettingsLayouts.Root>
<SettingsLayouts.Header icon={route.icon} title={route.title} separator />
<SettingsLayouts.Body>
{isLoading && <ThreeDotsLoader />}
{error && (
<ErrorCallout
errorTitle="Failed to load agents"
errorMsg={
error?.info?.message ||
error?.info?.detail ||
"An unknown error occurred"
}
/>
)}
{!isLoading && !error && (
<MainContent
personas={personas}
totalItems={totalItems}
currentPage={currentPage}
onPageChange={setCurrentPage}
refreshPersonas={refresh}
/>
)}
</SettingsLayouts.Body>
</SettingsLayouts.Root>
);
}
export { default } from "@/refresh-pages/admin/AgentsPage";

View File

@@ -395,7 +395,7 @@ function SeatsCard({
<InputLayouts.Vertical title="Seats">
<InputNumber
value={newSeatCount}
onChange={setNewSeatCount}
onChange={(v) => setNewSeatCount(v ?? 1)}
min={1}
defaultValue={totalSeats}
showReset

View File

@@ -230,7 +230,7 @@ export default function CheckoutView({ onAdjustPlan }: CheckoutViewProps) {
>
<InputNumber
value={seats}
onChange={setSeats}
onChange={(v) => setSeats(v ?? minRequiredSeats)}
min={minRequiredSeats}
defaultValue={minRequiredSeats}
showReset

View File

@@ -260,7 +260,7 @@ export default function VoiceProviderSetupModal({
<SvgArrowExchange className="size-3 text-text-04" />
</div>
<div className="flex items-center justify-center size-7 p-0.5 shrink-0 overflow-clip">
<SvgOnyxLogo size={24} className="text-text-04 shrink-0" />
<SvgOnyxLogo size={24} className="shrink-0" />
</div>
</div>
);

View File

@@ -69,7 +69,7 @@ export const WebProviderSetupModal = memo(
<SvgArrowExchange className="size-3 text-text-04" />
</div>
<div className="flex items-center justify-center size-7 p-0.5 shrink-0 overflow-clip">
<SvgOnyxLogo size={24} className="text-text-04 shrink-0" />
<SvgOnyxLogo size={24} className="shrink-0" />
</div>
</div>
);

View File

@@ -1372,7 +1372,7 @@ export default function Page() {
} logo`,
fallback:
selectedContentProviderType === "onyx_web_crawler" ? (
<SvgOnyxLogo size={24} className="text-text-05" />
<SvgOnyxLogo size={24} />
) : undefined,
size: 24,
containerSize: 28,

View File

@@ -98,7 +98,7 @@ export default function IndexAttemptErrorsModal({
return (
<Modal open onOpenChange={onClose}>
<Modal.Content width="lg" height="full">
<Modal.Content width="full" height="full">
<Modal.Header
icon={SvgAlertTriangle}
title="Indexing Errors"

View File

@@ -0,0 +1 @@
export { default } from "@/refresh-pages/admin/GroupsPage/CreateGroupPage";

View File

@@ -0,0 +1 @@
export { default } from "@/refresh-pages/admin/HooksPage";

View File

@@ -28,7 +28,12 @@ export default function Layout({ children }: LayoutProps) {
<SettingsLayouts.Header icon={SvgSliders} title="Settings" separator />
<SettingsLayouts.Body>
<Section flexDirection="row" alignItems="start" gap={1.5}>
<Section
flexDirection="row"
justifyContent="start"
alignItems="start"
gap={1.5}
>
{/* Left: Tab Navigation */}
<div
data-testid="settings-left-tab-navigation"

View File

@@ -7,8 +7,11 @@ import { processRawChatHistory } from "@/app/app/services/lib";
import { getLatestMessageChain } from "@/app/app/services/messageTree";
import HumanMessage from "@/app/app/message/HumanMessage";
import AgentMessage from "@/app/app/message/messageComponents/AgentMessage";
import { Callout } from "@/components/ui/callout";
import OnyxInitializingLoader from "@/components/OnyxInitializingLoader";
import { Section } from "@/layouts/general-layouts";
import { IllustrationContent } from "@opal/layouts";
import SvgNotFound from "@opal/illustrations/not-found";
import { Button } from "@opal/components";
import { Persona } from "@/app/admin/agents/interfaces";
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import PreviewModal from "@/sections/modals/PreviewModal";
@@ -33,12 +36,17 @@ export default function SharedChatDisplay({
if (!chatSession) {
return (
<div className="min-h-full w-full">
<div className="mx-auto w-fit pt-8">
<Callout type="danger" title="Shared Chat Not Found">
Did not find a shared chat with the specified ID.
</Callout>
</div>
<div className="h-full w-full flex flex-col items-center justify-center">
<Section flexDirection="column" alignItems="center" gap={1}>
<IllustrationContent
illustration={SvgNotFound}
title="Shared chat not found"
description="Did not find a shared chat with the specified ID."
/>
<Button href="/app" prominence="secondary">
Start a new chat
</Button>
</Section>
</div>
);
}
@@ -51,12 +59,17 @@ export default function SharedChatDisplay({
if (firstMessage === undefined) {
return (
<div className="min-h-full w-full">
<div className="mx-auto w-fit pt-8">
<Callout type="danger" title="Shared Chat Not Found">
No messages found in shared chat.
</Callout>
</div>
<div className="h-full w-full flex flex-col items-center justify-center">
<Section flexDirection="column" alignItems="center" gap={1}>
<IllustrationContent
illustration={SvgNotFound}
title="Shared chat not found"
description="No messages found in shared chat."
/>
<Button href="/app" prominence="secondary">
Start a new chat
</Button>
</Section>
</div>
);
}

View File

@@ -13,6 +13,7 @@ import {
type KeyboardEvent,
} from "react";
import { useRouter } from "next/navigation";
import { getPastedFilesIfNoText } from "@/lib/clipboard";
import { cn, isImageFile } from "@/lib/utils";
import { Disabled } from "@opal/core";
import {
@@ -230,21 +231,11 @@ const InputBar = memo(
const handlePaste = useCallback(
(event: ClipboardEvent) => {
const items = event.clipboardData?.items;
if (items) {
const pastedFiles: File[] = [];
for (let i = 0; i < items.length; i++) {
const item = items[i];
if (item && item.kind === "file") {
const file = item.getAsFile();
if (file) pastedFiles.push(file);
}
}
if (pastedFiles.length > 0) {
event.preventDefault();
// Context handles session binding internally
uploadFiles(pastedFiles);
}
const pastedFiles = getPastedFilesIfNoText(event.clipboardData);
if (pastedFiles.length > 0) {
event.preventDefault();
// Context handles session binding internally
uploadFiles(pastedFiles);
}
},
[uploadFiles]

View File

@@ -413,7 +413,7 @@ const MemoizedBuildSidebarInner = memo(
return (
<SidebarWrapper folded={folded} onFoldClick={onFoldClick}>
<SidebarBody
actionButtons={
pinnedContent={
<div className="flex flex-col gap-0.5">
{newBuildButton}
{buildConfigurePanel}

View File

@@ -151,7 +151,7 @@ export default function ConfigureConnectorModal({
return (
<>
<Modal open={open} onOpenChange={onClose}>
<Modal.Content width="md" height="fit">
<Modal.Content width="xl" height="fit">
<Modal.Header
icon={SvgPlug}
title={getStepTitle()}

View File

@@ -263,7 +263,7 @@ export default function CredentialStep({
open
onOpenChange={() => setCreateCredentialFormToggle(false)}
>
<Modal.Content width="md" height="fit">
<Modal.Content width="xl" height="fit">
<Modal.Header
icon={SvgKey}
title={`Create a ${getSourceDisplayName(

View File

@@ -215,7 +215,7 @@ export default function UserLibraryModal({
return (
<>
<Modal open={open} onOpenChange={(isOpen) => !isOpen && onClose()}>
<Modal.Content width="md" height="fit">
<Modal.Content width="xl" height="fit">
<Modal.Header
icon={SvgFileText}
title="Your Files"

View File

@@ -12,7 +12,7 @@
.input-normal:focus:not(:active),
.input-normal:focus-within:not(:active) {
border-color: var(--border-05);
box-shadow: inset 0px 0px 0px 2px var(--background-tint-04);
box-shadow: 0px 0px 0px 2px rgba(204, 204, 207, 1);
}
.input-error {

View File

@@ -2,7 +2,9 @@
/* Base layers */
--z-base: 0;
--z-content: 1;
--z-settings-header: 8;
/* Settings header must sit above sticky table headers (--z-sticky: 10) so
the page header scrolls over pinned columns without being obscured. */
--z-settings-header: 11;
--z-app-layout: 9;
--z-sticky: 10;

Some files were not shown because too many files have changed in this diff Show More