Compare commits

..

7 Commits

Author SHA1 Message Date
Evan Lohn
5b45b7fc87 lite stuff 2026-03-04 18:20:17 -08:00
Evan Lohn
f46afd70fb chore: update install script 2026-03-04 18:12:22 -08:00
acaprau
c5c236d098 chore(opensearch): Fix and consolidate the dev script used to start OpenSearch locally (#9036) 2026-03-05 01:54:02 +00:00
Danelegend
b18baff4d0 fix: Correct file_id for docs (#9058) 2026-03-05 01:43:58 +00:00
SubashMohan
eb3e15c195 feat(table): add ColumnVisibilityPopover, Footer, Pagination, and SortingPopover components (#9019)
Co-authored-by: Nik <nikolas.garza5@gmail.com>
2026-03-05 01:43:37 +00:00
acaprau
47d9a9e1ac feat(document index): Re-enable search settings swap (#9005) 2026-03-05 01:41:03 +00:00
Evan Lohn
aca466b35d fix: doc to hierarchynode connection in pruning (#9046) 2026-03-05 01:30:36 +00:00
38 changed files with 2215 additions and 388 deletions

15
.vscode/launch.json vendored
View File

@@ -485,21 +485,6 @@
"group": "3"
}
},
{
"name": "Clear and Restart OpenSearch Container",
// Generic debugger type, required arg but has no bearing on bash.
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
"name": "Eval CLI",
"type": "debugpy",

View File

@@ -2,6 +2,7 @@ from collections.abc import Sequence
from operator import and_
from uuid import UUID
from fastapi import HTTPException
from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import Select
@@ -36,8 +37,6 @@ from onyx.db.models import UserGroup
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.models import UserRole
from onyx.db.users import fetch_user_by_id
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -167,12 +166,18 @@ def validate_object_creation_for_user(
if object_is_public and user.role == UserRole.BASIC:
detail = "User does not have permission to create public objects"
logger.error(detail)
raise OnyxError(OnyxErrorCode.INSUFFICIENT_PERMISSIONS, detail)
raise HTTPException(
status_code=400,
detail=detail,
)
if not target_group_ids:
detail = "Curators must specify 1+ groups"
logger.error(detail)
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, detail)
raise HTTPException(
status_code=400,
detail=detail,
)
user_curated_groups = fetch_user_groups_for_user(
db_session=db_session,
@@ -185,7 +190,10 @@ def validate_object_creation_for_user(
if not target_group_ids_set.issubset(user_curated_group_ids):
detail = "Curators cannot control groups they don't curate"
logger.error(detail)
raise OnyxError(OnyxErrorCode.INSUFFICIENT_PERMISSIONS, detail)
raise HTTPException(
status_code=400,
detail=detail,
)
def fetch_user_group(db_session: Session, user_group_id: int) -> UserGroup | None:

View File

@@ -39,9 +39,13 @@ CT = TypeVar("CT", bound=ConnectorCheckpoint)
class SlimConnectorExtractionResult(BaseModel):
"""Result of extracting document IDs and hierarchy nodes from a connector."""
"""Result of extracting document IDs and hierarchy nodes from a connector.
doc_ids: set[str]
raw_id_to_parent maps document ID → parent_hierarchy_raw_node_id (or None).
Use raw_id_to_parent.keys() wherever the old set of IDs was needed.
"""
raw_id_to_parent: dict[str, str | None]
hierarchy_nodes: list[HierarchyNode]
@@ -93,30 +97,37 @@ def _get_failure_id(failure: ConnectorFailure) -> str | None:
return None
class BatchResult(BaseModel):
raw_id_to_parent: dict[str, str | None]
hierarchy_nodes: list[HierarchyNode]
def _extract_from_batch(
doc_list: Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure],
) -> tuple[set[str], list[HierarchyNode]]:
"""Separate a batch into document IDs and hierarchy nodes.
) -> BatchResult:
"""Separate a batch into document IDs (with parent mapping) and hierarchy nodes.
ConnectorFailure items have their failed document/entity IDs added to the
ID set so that failed-to-retrieve documents are not accidentally pruned.
ID dict so that failed-to-retrieve documents are not accidentally pruned.
"""
ids: set[str] = set()
ids: dict[str, str | None] = {}
hierarchy_nodes: list[HierarchyNode] = []
for item in doc_list:
if isinstance(item, HierarchyNode):
hierarchy_nodes.append(item)
ids.add(item.raw_node_id)
if item.raw_node_id not in ids:
ids[item.raw_node_id] = None
elif isinstance(item, ConnectorFailure):
failed_id = _get_failure_id(item)
if failed_id:
ids.add(failed_id)
ids[failed_id] = None
logger.warning(
f"Failed to retrieve document {failed_id}: " f"{item.failure_message}"
)
else:
ids.add(item.id)
return ids, hierarchy_nodes
parent_raw = getattr(item, "parent_hierarchy_raw_node_id", None)
ids[item.id] = parent_raw
return BatchResult(raw_id_to_parent=ids, hierarchy_nodes=hierarchy_nodes)
def extract_ids_from_runnable_connector(
@@ -132,7 +143,7 @@ def extract_ids_from_runnable_connector(
Optionally, a callback can be passed to handle the length of each document batch.
"""
all_connector_doc_ids: set[str] = set()
all_raw_id_to_parent: dict[str, str | None] = {}
all_hierarchy_nodes: list[HierarchyNode] = []
# Sequence (covariant) lets all the specific list[...] iterator types unify here
@@ -177,15 +188,20 @@ def extract_ids_from_runnable_connector(
"extract_ids_from_runnable_connector: Stop signal detected"
)
batch_ids, batch_nodes = _extract_from_batch(doc_list)
all_connector_doc_ids.update(doc_batch_processing_func(batch_ids))
batch_result = _extract_from_batch(doc_list)
batch_ids = batch_result.raw_id_to_parent
batch_nodes = batch_result.hierarchy_nodes
doc_batch_processing_func(batch_ids)
for k, v in batch_ids.items():
if v is not None or k not in all_raw_id_to_parent:
all_raw_id_to_parent[k] = v
all_hierarchy_nodes.extend(batch_nodes)
if callback:
callback.progress("extract_ids_from_runnable_connector", len(batch_ids))
return SlimConnectorExtractionResult(
doc_ids=all_connector_doc_ids,
raw_id_to_parent=all_raw_id_to_parent,
hierarchy_nodes=all_hierarchy_nodes,
)

View File

@@ -29,6 +29,7 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -47,6 +48,8 @@ from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import ConnectorCredentialPair
from onyx.db.sync_record import insert_sync_record
@@ -57,6 +60,8 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
@@ -113,6 +118,38 @@ class PruneCallback(IndexingCallbackBase):
super().progress(tag, amount)
def _resolve_and_update_document_parents(
db_session: Session,
redis_client: Redis,
source: DocumentSource,
raw_id_to_parent: dict[str, str | None],
) -> None:
"""Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id for
each document and bulk-update the DB. Mirrors the resolution logic in
run_docfetching.py."""
source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)
resolved: dict[str, int | None] = {}
for doc_id, raw_parent_id in raw_id_to_parent.items():
if raw_parent_id is None:
continue
node_id, found = get_node_id_from_raw_id(redis_client, source, raw_parent_id)
resolved[doc_id] = node_id if found else source_node_id
if not resolved:
return
update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
task_logger.info(
f"Pruning: resolved and updated parent hierarchy for "
f"{len(resolved)} documents (source={source.value})"
)
"""Jobs / utils for kicking off pruning tasks."""
@@ -535,22 +572,22 @@ def connector_pruning_generator_task(
extraction_result = extract_ids_from_runnable_connector(
runnable_connector, callback
)
all_connector_doc_ids = extraction_result.doc_ids
all_connector_doc_ids = extraction_result.raw_id_to_parent
# Process hierarchy nodes (same as docfetching):
# upsert to Postgres and cache in Redis
source = cc_pair.connector.source
redis_client = get_redis_client(tenant_id=tenant_id)
if extraction_result.hierarchy_nodes:
is_connector_public = cc_pair.access_type == AccessType.PUBLIC
redis_client = get_redis_client(tenant_id=tenant_id)
ensure_source_node_exists(
redis_client, db_session, cc_pair.connector.source
)
ensure_source_node_exists(redis_client, db_session, source)
upserted_nodes = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=extraction_result.hierarchy_nodes,
source=cc_pair.connector.source,
source=source,
commit=True,
is_connector_public=is_connector_public,
)
@@ -561,7 +598,7 @@ def connector_pruning_generator_task(
]
cache_hierarchy_nodes_batch(
redis_client=redis_client,
source=cc_pair.connector.source,
source=source,
entries=cache_entries,
)
@@ -570,6 +607,26 @@ def connector_pruning_generator_task(
f"hierarchy nodes for cc_pair={cc_pair_id}"
)
ensure_source_node_exists(redis_client, db_session, source)
# Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id
# and bulk-update documents, mirroring the docfetching resolution
_resolve_and_update_document_parents(
db_session=db_session,
redis_client=redis_client,
source=source,
raw_id_to_parent=all_connector_doc_ids,
)
# Link hierarchy nodes to documents for sources where pages can be
# both hierarchy nodes AND documents (e.g. Notion, Confluence)
all_doc_id_list = list(all_connector_doc_ids.keys())
link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=all_doc_id_list,
source=source,
commit=True,
)
# a list of docs in our local index
all_indexed_document_ids = {
doc.id
@@ -581,7 +638,9 @@ def connector_pruning_generator_task(
}
# generate list of docs to remove (no longer in the source)
doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
doc_ids_to_remove = list(
all_indexed_document_ids - all_connector_doc_ids.keys()
)
task_logger.info(
"Pruning set collected: "

View File

@@ -943,6 +943,9 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=self._get_parent_hierarchy_raw_id(
page
),
)
)
@@ -992,6 +995,7 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=page_id,
)
)

View File

@@ -781,4 +781,5 @@ def build_slim_document(
return SlimDocument(
id=onyx_document_id_from_drive_file(file),
external_access=external_access,
parent_hierarchy_raw_node_id=(file.get("parents") or [None])[0],
)

View File

@@ -902,6 +902,11 @@ class JiraConnector(
external_access=self._get_project_permissions(
project_key, add_prefix=False
),
parent_hierarchy_raw_node_id=(
self._get_parent_hierarchy_raw_node_id(issue, project_key)
if project_key
else None
),
)
)
current_offset += 1

View File

@@ -385,6 +385,7 @@ class IndexingDocument(Document):
class SlimDocument(BaseModel):
id: str
external_access: ExternalAccess | None = None
parent_hierarchy_raw_node_id: str | None = None
class HierarchyNode(BaseModel):

View File

@@ -772,6 +772,7 @@ def _convert_driveitem_to_slim_document(
drive_name: str,
ctx: ClientContext,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
) -> SlimDocument:
if driveitem.id is None:
raise ValueError("DriveItem ID is required")
@@ -787,11 +788,15 @@ def _convert_driveitem_to_slim_document(
return SlimDocument(
id=driveitem.id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
def _convert_sitepage_to_slim_document(
site_page: dict[str, Any], ctx: ClientContext | None, graph_client: GraphClient
site_page: dict[str, Any],
ctx: ClientContext | None,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
) -> SlimDocument:
"""Convert a SharePoint site page to a SlimDocument object."""
if site_page.get("id") is None:
@@ -808,6 +813,7 @@ def _convert_sitepage_to_slim_document(
return SlimDocument(
id=id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
@@ -1594,12 +1600,22 @@ class SharepointConnector(
)
)
parent_hierarchy_url: str | None = None
if drive_web_url:
parent_hierarchy_url = self._get_parent_hierarchy_url(
site_url, drive_web_url, drive_name, driveitem
)
try:
logger.debug(f"Processing: {driveitem.web_url}")
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_driveitem_to_slim_document(
driveitem, drive_name, ctx, self.graph_client
driveitem,
drive_name,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=parent_hierarchy_url,
)
)
except Exception as e:
@@ -1619,7 +1635,10 @@ class SharepointConnector(
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_sitepage_to_slim_document(
site_page, ctx, self.graph_client
site_page,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=site_descriptor.url,
)
)
if len(doc_batch) >= SLIM_BATCH_SIZE:

View File

@@ -565,6 +565,7 @@ def _get_all_doc_ids(
channel_id=channel_id, thread_ts=message["ts"]
),
external_access=external_access,
parent_hierarchy_raw_node_id=channel_id,
)
)

View File

@@ -5,6 +5,7 @@ from datetime import timezone
from typing import Tuple
from uuid import UUID
from fastapi import HTTPException
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import exists
@@ -31,8 +32,6 @@ from onyx.db.models import SearchDoc as DBSearchDoc
from onyx.db.models import ToolCall
from onyx.db.models import User
from onyx.db.persona import get_best_persona_id_for_user
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride
@@ -228,9 +227,7 @@ def duplicate_chat_session_for_user_from_slack(
db_session=db_session,
)
if not chat_session:
raise OnyxError(
OnyxErrorCode.SESSION_NOT_FOUND, "Invalid Chat Session ID provided"
)
raise HTTPException(status_code=400, detail="Invalid Chat Session ID provided")
# This enforces permissions and sets a default
new_persona_id = get_best_persona_id_for_user(

View File

@@ -2,6 +2,7 @@ from datetime import datetime
from enum import Enum
from typing import TypeVarTuple
from fastapi import HTTPException
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import exists
@@ -31,8 +32,6 @@ from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.models import UserRole
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
@@ -540,7 +539,7 @@ def add_credential_to_connector(
)
if connector is None:
raise OnyxError(OnyxErrorCode.CONNECTOR_NOT_FOUND, "Connector does not exist")
raise HTTPException(status_code=404, detail="Connector does not exist")
if access_type == AccessType.SYNC:
if not fetch_ee_implementation_or_noop(
@@ -548,9 +547,9 @@ def add_credential_to_connector(
"check_if_valid_sync_source",
noop_return_value=True,
)(connector.source):
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
f"Connector of type {connector.source} does not support SYNC access type",
raise HTTPException(
status_code=400,
detail=f"Connector of type {connector.source} does not support SYNC access type",
)
if credential is None:
@@ -558,9 +557,9 @@ def add_credential_to_connector(
f"Credential {credential_id} does not exist or does not belong to user"
)
logger.error(error_msg)
raise OnyxError(
OnyxErrorCode.CREDENTIAL_NOT_FOUND,
error_msg,
raise HTTPException(
status_code=401,
detail=error_msg,
)
existing_association = (
@@ -623,12 +622,12 @@ def remove_credential_from_connector(
)
if connector is None:
raise OnyxError(OnyxErrorCode.CONNECTOR_NOT_FOUND, "Connector does not exist")
raise HTTPException(status_code=404, detail="Connector does not exist")
if credential is None:
raise OnyxError(
OnyxErrorCode.CREDENTIAL_NOT_FOUND,
"Credential does not exist or does not belong to user",
raise HTTPException(
status_code=404,
detail="Credential does not exist or does not belong to user",
)
association = get_connector_credential_pair_for_user(

View File

@@ -4,6 +4,7 @@ from typing import Any
from typing import AsyncContextManager
import asyncpg # type: ignore
from fastapi import HTTPException
from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import AsyncEngine
@@ -27,8 +28,6 @@ from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import is_valid_schema_name
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.sql_engine import USE_IAM_AUTH
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
from shared_configs.contextvars import get_current_tenant_id
@@ -115,7 +114,7 @@ async def get_async_session(
tenant_id = get_current_tenant_id()
if not is_valid_schema_name(tenant_id):
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Invalid tenant ID")
raise HTTPException(status_code=400, detail="Invalid tenant ID")
engine = get_sqlalchemy_async_engine()

View File

@@ -6,6 +6,7 @@ from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from fastapi import HTTPException
from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy.engine import create_engine
@@ -26,8 +27,6 @@ from onyx.configs.app_configs import POSTGRES_USE_NULL_POOL
from onyx.configs.app_configs import POSTGRES_USER
from onyx.configs.constants import POSTGRES_UNKNOWN_APP_NAME
from onyx.db.engine.iam_auth import provide_iam_token
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -345,7 +344,7 @@ def get_session_with_tenant(*, tenant_id: str) -> Generator[Session, None, None]
engine = get_sqlalchemy_engine()
if not is_valid_schema_name(tenant_id):
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Invalid tenant ID")
raise HTTPException(status_code=400, detail="Invalid tenant ID")
# no need to use the schema translation map for self-hosted + default schema
if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:
@@ -372,7 +371,7 @@ def get_session() -> Generator[Session, None, None]:
raise BasicAuthenticationError(detail="User must authenticate")
if not is_valid_schema_name(tenant_id):
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Invalid tenant ID")
raise HTTPException(status_code=400, detail="Invalid tenant ID")
with get_session_with_current_tenant() as db_session:
yield db_session
@@ -391,7 +390,7 @@ def get_db_readonly_user_session_with_current_tenant() -> (
readonly_engine = get_readonly_sqlalchemy_engine()
if not is_valid_schema_name(tenant_id):
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Invalid tenant ID")
raise HTTPException(status_code=400, detail="Invalid tenant ID")
# no need to use the schema translation map for self-hosted + default schema
if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:

View File

@@ -2,6 +2,7 @@ from datetime import datetime
from datetime import timezone
from uuid import UUID
from fastapi import HTTPException
from sqlalchemy import and_
from sqlalchemy import asc
from sqlalchemy import delete
@@ -25,8 +26,6 @@ from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.models import UserRole
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -135,9 +134,8 @@ def update_document_boost_for_user(
stmt = _add_user_filters(stmt, user, get_editable=True)
result: DbDocument | None = db_session.execute(stmt).scalar_one_or_none()
if result is None:
raise OnyxError(
OnyxErrorCode.UNAUTHORIZED,
"Document is not editable by this user",
raise HTTPException(
status_code=400, detail="Document is not editable by this user"
)
result.boost = boost
@@ -158,9 +156,8 @@ def update_document_hidden_for_user(
stmt = _add_user_filters(stmt, user, get_editable=True)
result = db_session.execute(stmt).scalar_one_or_none()
if result is None:
raise OnyxError(
OnyxErrorCode.UNAUTHORIZED,
"Document is not editable by this user",
raise HTTPException(
status_code=400, detail="Document is not editable by this user"
)
result.hidden = hidden

View File

@@ -1,5 +1,7 @@
"""CRUD operations for HierarchyNode."""
from collections import defaultdict
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -525,6 +527,53 @@ def get_document_parent_hierarchy_node_ids(
return {doc_id: parent_id for doc_id, parent_id in results}
def update_document_parent_hierarchy_nodes(
db_session: Session,
doc_parent_map: dict[str, int | None],
commit: bool = True,
) -> int:
"""Bulk-update Document.parent_hierarchy_node_id for multiple documents.
Only updates rows whose current value differs from the desired value to
avoid unnecessary writes.
Args:
db_session: SQLAlchemy session
doc_parent_map: Mapping of document_id → desired parent_hierarchy_node_id
commit: Whether to commit the transaction
Returns:
Number of documents actually updated
"""
if not doc_parent_map:
return 0
doc_ids = list(doc_parent_map.keys())
existing = get_document_parent_hierarchy_node_ids(db_session, doc_ids)
by_parent: dict[int | None, list[str]] = defaultdict(list)
for doc_id, desired_parent_id in doc_parent_map.items():
current = existing.get(doc_id)
if current == desired_parent_id or doc_id not in existing:
continue
by_parent[desired_parent_id].append(doc_id)
updated = 0
for desired_parent_id, ids in by_parent.items():
db_session.query(Document).filter(Document.id.in_(ids)).update(
{Document.parent_hierarchy_node_id: desired_parent_id},
synchronize_session=False,
)
updated += len(ids)
if commit:
db_session.commit()
elif updated:
db_session.flush()
return updated
def update_hierarchy_node_permissions(
db_session: Session,
raw_node_id: str,

View File

@@ -1,5 +1,6 @@
from uuid import UUID
from fastapi import HTTPException
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
@@ -10,8 +11,6 @@ from sqlalchemy.orm import Session
from onyx.db.models import InputPrompt
from onyx.db.models import InputPrompt__User
from onyx.db.models import User
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.server.features.input_prompt.models import InputPromptSnapshot
from onyx.server.manage.models import UserInfo
from onyx.utils.logger import setup_logger
@@ -55,9 +54,9 @@ def insert_input_prompt(
input_prompt = result.scalar_one_or_none()
if input_prompt is None:
raise OnyxError(
OnyxErrorCode.DUPLICATE_RESOURCE,
f"A prompt shortcut with the name '{prompt}' already exists",
raise HTTPException(
status_code=409,
detail=f"A prompt shortcut with the name '{prompt}' already exists",
)
db_session.commit()
@@ -79,7 +78,7 @@ def update_input_prompt(
raise ValueError(f"No input prompt with id {input_prompt_id}")
if not validate_user_prompt_authorization(user, input_prompt):
raise OnyxError(OnyxErrorCode.UNAUTHORIZED, "You don't own this prompt")
raise HTTPException(status_code=401, detail="You don't own this prompt")
input_prompt.prompt = prompt
input_prompt.content = content
@@ -89,9 +88,9 @@ def update_input_prompt(
db_session.commit()
except IntegrityError:
db_session.rollback()
raise OnyxError(
OnyxErrorCode.DUPLICATE_RESOURCE,
f"A prompt shortcut with the name '{prompt}' already exists",
raise HTTPException(
status_code=409,
detail=f"A prompt shortcut with the name '{prompt}' already exists",
)
return input_prompt
@@ -122,7 +121,7 @@ def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> Non
raise ValueError(f"No input prompt with id {input_prompt_id}")
if not input_prompt.is_public:
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "This prompt is not public")
raise HTTPException(status_code=400, detail="This prompt is not public")
db_session.delete(input_prompt)
db_session.commit()
@@ -141,13 +140,12 @@ def remove_input_prompt(
raise ValueError(f"No input prompt with id {input_prompt_id}")
if input_prompt.is_public and not delete_public:
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Cannot delete public prompts with this method",
raise HTTPException(
status_code=400, detail="Cannot delete public prompts with this method"
)
if not validate_user_prompt_authorization(user, input_prompt):
raise OnyxError(OnyxErrorCode.UNAUTHORIZED, "You do not own this prompt")
raise HTTPException(status_code=401, detail="You do not own this prompt")
db_session.delete(input_prompt)
db_session.commit()
@@ -169,7 +167,7 @@ def fetch_input_prompt_by_id(
result = db_session.scalar(query)
if result is None:
raise OnyxError(OnyxErrorCode.NOT_FOUND, "No input prompt found")
raise HTTPException(422, "No input prompt found")
return result

View File

@@ -3,6 +3,7 @@ from datetime import datetime
from enum import Enum
from uuid import UUID
from fastapi import HTTPException
from sqlalchemy import exists
from sqlalchemy import func
from sqlalchemy import not_
@@ -37,8 +38,6 @@ from onyx.db.models import User__UserGroup
from onyx.db.models import UserFile
from onyx.db.models import UserGroup
from onyx.db.notification import create_notification
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.server.features.persona.models import FullPersonaSnapshot
from onyx.server.features.persona.models import MinimalPersonaSnapshot
from onyx.server.features.persona.models import PersonaSharedNotificationData
@@ -145,9 +144,9 @@ def fetch_persona_by_id_for_user(
stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)
persona = db_session.scalars(stmt).one_or_none()
if not persona:
raise OnyxError(
OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
f"Persona with ID {persona_id} does not exist or user is not authorized to access it",
raise HTTPException(
status_code=403,
detail=f"Persona with ID {persona_id} does not exist or user is not authorized to access it",
)
return persona
@@ -316,7 +315,7 @@ def create_update_persona(
except ValueError as e:
logger.exception("Failed to create persona")
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
raise HTTPException(status_code=400, detail=str(e))
return FullPersonaSnapshot.from_model(persona)

View File

@@ -3,6 +3,7 @@ import uuid
from typing import List
from uuid import UUID
from fastapi import HTTPException
from fastapi import UploadFile
from pydantic import BaseModel
from pydantic import ConfigDict
@@ -19,8 +20,6 @@ from onyx.db.models import Project__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.server.documents.connector import upload_files
from onyx.server.features.projects.projects_file_utils import categorize_uploaded_files
from onyx.server.features.projects.projects_file_utils import RejectedFile
@@ -111,7 +110,7 @@ def upload_files_to_user_files_with_indexing(
) -> CategorizedFilesResult:
if project_id is not None and user is not None:
if not check_project_ownership(project_id, user.id, db_session):
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Project not found")
raise HTTPException(status_code=404, detail="Project not found")
categorized_files_result = create_user_files(
files,

View File

@@ -129,7 +129,7 @@ def get_current_search_settings(db_session: Session) -> SearchSettings:
latest_settings = result.scalars().first()
if not latest_settings:
raise RuntimeError("No search settings specified, DB is not in a valid state")
raise RuntimeError("No search settings specified; DB is not in a valid state.")
return latest_settings

View File

@@ -2,6 +2,7 @@ from collections.abc import Sequence
from typing import Any
from uuid import UUID
from fastapi import HTTPException
from fastapi_users.password import PasswordHelper
from sqlalchemy import func
from sqlalchemy import select
@@ -23,8 +24,6 @@ from onyx.db.models import Persona__User
from onyx.db.models import SamlAccount
from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
@@ -45,22 +44,22 @@ def validate_user_role_update(
"""
if current_role == UserRole.SLACK_USER:
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"To change a Slack User's role, they must first login to Onyx via the web app.",
raise HTTPException(
status_code=400,
detail="To change a Slack User's role, they must first login to Onyx via the web app.",
)
if current_role == UserRole.EXT_PERM_USER:
# This shouldn't happen, but just in case
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"To change an External Permissioned User's role, they must first login to Onyx via the web app.",
raise HTTPException(
status_code=400,
detail="To change an External Permissioned User's role, they must first login to Onyx via the web app.",
)
if current_role == UserRole.LIMITED:
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"To change a Limited User's role, they must first login to Onyx via the web app.",
raise HTTPException(
status_code=400,
detail="To change a Limited User's role, they must first login to Onyx via the web app.",
)
if explicit_override:
@@ -68,34 +67,40 @@ def validate_user_role_update(
if requested_role == UserRole.CURATOR:
# This shouldn't happen, but just in case
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Curator role must be set via the User Group Menu",
raise HTTPException(
status_code=400,
detail="Curator role must be set via the User Group Menu",
)
if requested_role == UserRole.LIMITED:
# This shouldn't happen, but just in case
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"A user cannot be set to a Limited User role. "
"This role is automatically assigned to users through certain endpoints in the API.",
raise HTTPException(
status_code=400,
detail=(
"A user cannot be set to a Limited User role. "
"This role is automatically assigned to users through certain endpoints in the API."
),
)
if requested_role == UserRole.SLACK_USER:
# This shouldn't happen, but just in case
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"A user cannot be set to a Slack User role. "
"This role is automatically assigned to users who only use Onyx via Slack.",
raise HTTPException(
status_code=400,
detail=(
"A user cannot be set to a Slack User role. "
"This role is automatically assigned to users who only use Onyx via Slack."
),
)
if requested_role == UserRole.EXT_PERM_USER:
# This shouldn't happen, but just in case
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"A user cannot be set to an External Permissioned User role. "
"This role is automatically assigned to users who have been "
"pulled in to the system via an external permissions system.",
raise HTTPException(
status_code=400,
detail=(
"A user cannot be set to an External Permissioned User role. "
"This role is automatically assigned to users who have been "
"pulled in to the system via an external permissions system."
),
)

View File

@@ -32,9 +32,6 @@ def get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:
Determines whether to enable multipass and large chunks by examining
the current search settings and the embedder configuration.
"""
if not search_settings:
return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
multipass = should_use_multipass(search_settings)
enable_large_chunks = SearchSettings.can_use_large_chunks(
multipass, search_settings.model_name, search_settings.provider_type

View File

@@ -26,11 +26,10 @@ def get_default_document_index(
To be used for retrieval only. Indexing should be done through both indices
until Vespa is deprecated.
Pre-existing docstring for this function, although secondary indices are not
currently supported:
Primary index is the index that is used for querying/updating etc. Secondary
index is for when both the currently used index and the upcoming index both
need to be updated, updates are applied to both indices.
need to be updated. Updates are applied to both indices.
WARNING: In that case, get_all_document_indices should be used.
"""
if DISABLE_VECTOR_DB:
return DisabledDocumentIndex(
@@ -51,11 +50,26 @@ def get_default_document_index(
opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
if opensearch_retrieval_enabled:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
return OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=secondary_index_name,
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=secondary_large_chunks_enabled,
multitenant=MULTI_TENANT,
@@ -86,8 +100,7 @@ def get_all_document_indices(
Used for indexing only. Until Vespa is deprecated we will index into both
document indices. Retrieval is done through only one index however.
Large chunks and secondary indices are not currently supported so we
hardcode appropriate values.
Large chunks are not currently supported so we hardcode appropriate values.
NOTE: Make sure the Vespa index object is returned first. In the rare event
that there is some conflict between indexing and the migration task, it is
@@ -123,13 +136,36 @@ def get_all_document_indices(
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
opensearch_document_index = OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=None,
large_chunks_enabled=False,
secondary_large_chunks_enabled=None,
secondary_index_name=(
secondary_search_settings.index_name
if secondary_search_settings
else None
),
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=(
secondary_search_settings.large_chunks_enabled
if secondary_search_settings
else None
),
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
)

View File

@@ -271,6 +271,9 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
embedding_dim: int,
embedding_precision: EmbeddingPrecision,
secondary_index_name: str | None,
secondary_embedding_dim: int | None,
secondary_embedding_precision: EmbeddingPrecision | None,
# NOTE: We do not support large chunks right now.
large_chunks_enabled: bool, # noqa: ARG002
secondary_large_chunks_enabled: bool | None, # noqa: ARG002
multitenant: bool = False,
@@ -286,12 +289,25 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
f"Expected {MULTI_TENANT}, got {multitenant}."
)
tenant_id = get_current_tenant_id()
tenant_state = TenantState(tenant_id=tenant_id, multitenant=multitenant)
self._real_index = OpenSearchDocumentIndex(
tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
tenant_state=tenant_state,
index_name=index_name,
embedding_dim=embedding_dim,
embedding_precision=embedding_precision,
)
self._secondary_real_index: OpenSearchDocumentIndex | None = None
if self.secondary_index_name:
if secondary_embedding_dim is None or secondary_embedding_precision is None:
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
self._secondary_real_index = OpenSearchDocumentIndex(
tenant_state=tenant_state,
index_name=self.secondary_index_name,
embedding_dim=secondary_embedding_dim,
embedding_precision=secondary_embedding_precision,
)
@staticmethod
def register_multitenant_indices(
@@ -307,19 +323,38 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
primary_embedding_dim: int,
primary_embedding_precision: EmbeddingPrecision,
secondary_index_embedding_dim: int | None, # noqa: ARG002
secondary_index_embedding_precision: EmbeddingPrecision | None, # noqa: ARG002
secondary_index_embedding_dim: int | None,
secondary_index_embedding_precision: EmbeddingPrecision | None,
) -> None:
# Only handle primary index for now, ignore secondary.
return self._real_index.verify_and_create_index_if_necessary(
self._real_index.verify_and_create_index_if_necessary(
primary_embedding_dim, primary_embedding_precision
)
if self.secondary_index_name:
if (
secondary_index_embedding_dim is None
or secondary_index_embedding_precision is None
):
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.verify_and_create_index_if_necessary(
secondary_index_embedding_dim, secondary_index_embedding_precision
)
def index(
self,
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
# Convert IndexBatchParams to IndexingMetadata.
chunk_counts: dict[str, IndexingMetadata.ChunkCounts] = {}
for doc_id in index_batch_params.doc_id_to_new_chunk_cnt:
@@ -351,7 +386,20 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
tenant_id: str, # noqa: ARG002
chunk_count: int | None,
) -> int:
return self._real_index.delete(doc_id, chunk_count)
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
total_chunks_deleted = self._real_index.delete(doc_id, chunk_count)
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
total_chunks_deleted += self._secondary_real_index.delete(
doc_id, chunk_count
)
return total_chunks_deleted
def update_single(
self,
@@ -362,6 +410,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
fields: VespaDocumentFields | None,
user_fields: VespaDocumentUserFields | None,
) -> None:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
f"Tried to update document {doc_id} with no updated fields or user fields."
@@ -392,6 +445,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
try:
self._real_index.update([update_request])
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.update([update_request])
except NotFoundError:
logger.exception(
f"Tried to update document {doc_id} but at least one of its chunks was not found in OpenSearch. "

View File

@@ -465,6 +465,12 @@ class VespaIndex(DocumentIndex):
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
if len(index_batch_params.doc_id_to_previous_chunk_cnt) != len(
index_batch_params.doc_id_to_new_chunk_cnt
):
@@ -659,6 +665,10 @@ class VespaIndex(DocumentIndex):
"""Note: if the document id does not exist, the update will be a no-op and the
function will complete with no errors or exceptions.
Handle other exceptions if you wish to implement retry behavior
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
@@ -679,13 +689,6 @@ class VespaIndex(DocumentIndex):
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
project_ids: set[int] | None = None
if user_fields is not None and user_fields.user_projects is not None:
project_ids = set(user_fields.user_projects)
@@ -705,7 +708,20 @@ class VespaIndex(DocumentIndex):
persona_ids=persona_ids,
)
vespa_document_index.update([update_request])
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
vespa_document_index.update([update_request])
def delete_single(
self,
@@ -714,6 +730,11 @@ class VespaIndex(DocumentIndex):
tenant_id: str,
chunk_count: int | None,
) -> int:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
tenant_state = TenantState(
tenant_id=get_current_tenant_id(),
multitenant=MULTI_TENANT,
@@ -726,13 +747,25 @@ class VespaIndex(DocumentIndex):
raise ValueError(
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
return vespa_document_index.delete(document_id=doc_id, chunk_count=chunk_count)
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
total_chunks_deleted = 0
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
total_chunks_deleted += vespa_document_index.delete(
document_id=doc_id, chunk_count=chunk_count
)
return total_chunks_deleted
def id_based_retrieval(
self,

View File

@@ -6,8 +6,11 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import resync_cc_pair
from onyx.db.engine.sql_engine import get_session
from onyx.db.index_attempt import expire_index_attempts
from onyx.db.llm import fetch_existing_llm_provider
@@ -15,20 +18,25 @@ from onyx.db.llm import update_default_contextual_model
from onyx.db.llm import update_no_default_contextual_rag_provider
from onyx.db.models import IndexModelStatus
from onyx.db.models import User
from onyx.db.search_settings import create_search_settings
from onyx.db.search_settings import delete_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_embedding_provider_from_provider_type
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.file_processing.unstructured import delete_unstructured_api_key
from onyx.file_processing.unstructured import get_unstructured_api_key
from onyx.file_processing.unstructured import update_unstructured_api_key
from onyx.natural_language_processing.search_nlp_models import clean_model_name
from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
from onyx.server.manage.models import FullModelVersionResponse
from onyx.server.models import IdReturn
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from shared_configs.configs import ALT_INDEX_SUFFIX
from shared_configs.configs import MULTI_TENANT
router = APIRouter(prefix="/search-settings")
@@ -41,110 +49,99 @@ def set_new_search_settings(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session), # noqa: ARG001
) -> IdReturn:
"""Creates a new EmbeddingModel row and cancels the previous secondary indexing if any
Gives an error if the same model name is used as the current or secondary index
"""
# TODO(andrei): Re-enable.
# NOTE Enable integration external dependency tests in test_search_settings.py
# when this is reenabled. They are currently skipped
logger.error("Setting new search settings is temporarily disabled.")
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="Setting new search settings is temporarily disabled.",
Creates a new SearchSettings row and cancels the previous secondary indexing
if any exists.
"""
if search_settings_new.index_name:
logger.warning("Index name was specified by request, this is not suggested")
# Disallow contextual RAG for cloud deployments.
if MULTI_TENANT and search_settings_new.enable_contextual_rag:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Contextual RAG disabled in Onyx Cloud",
)
# Validate cloud provider exists or create new LiteLLM provider.
if search_settings_new.provider_type is not None:
cloud_provider = get_embedding_provider_from_provider_type(
db_session, provider_type=search_settings_new.provider_type
)
if cloud_provider is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
)
validate_contextual_rag_model(
provider_name=search_settings_new.contextual_rag_llm_provider,
model_name=search_settings_new.contextual_rag_llm_name,
db_session=db_session,
)
# if search_settings_new.index_name:
# logger.warning("Index name was specified by request, this is not suggested")
# # Disallow contextual RAG for cloud deployments
# if MULTI_TENANT and search_settings_new.enable_contextual_rag:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail="Contextual RAG disabled in Onyx Cloud",
# )
search_settings = get_current_search_settings(db_session)
# # Validate cloud provider exists or create new LiteLLM provider
# if search_settings_new.provider_type is not None:
# cloud_provider = get_embedding_provider_from_provider_type(
# db_session, provider_type=search_settings_new.provider_type
# )
if search_settings_new.index_name is None:
# We define index name here.
index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
if (
search_settings_new.model_name == search_settings.model_name
and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
):
index_name += ALT_INDEX_SUFFIX
search_values = search_settings_new.model_dump()
search_values["index_name"] = index_name
new_search_settings_request = SavedSearchSettings(**search_values)
else:
new_search_settings_request = SavedSearchSettings(
**search_settings_new.model_dump()
)
# if cloud_provider is None:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
# )
secondary_search_settings = get_secondary_search_settings(db_session)
# validate_contextual_rag_model(
# provider_name=search_settings_new.contextual_rag_llm_provider,
# model_name=search_settings_new.contextual_rag_llm_name,
# db_session=db_session,
# )
if secondary_search_settings:
# Cancel any background indexing jobs.
expire_index_attempts(
search_settings_id=secondary_search_settings.id, db_session=db_session
)
# search_settings = get_current_search_settings(db_session)
# Mark previous model as a past model directly.
update_search_settings_status(
search_settings=secondary_search_settings,
new_status=IndexModelStatus.PAST,
db_session=db_session,
)
# if search_settings_new.index_name is None:
# # We define index name here
# index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
# if (
# search_settings_new.model_name == search_settings.model_name
# and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
# ):
# index_name += ALT_INDEX_SUFFIX
# search_values = search_settings_new.model_dump()
# search_values["index_name"] = index_name
# new_search_settings_request = SavedSearchSettings(**search_values)
# else:
# new_search_settings_request = SavedSearchSettings(
# **search_settings_new.model_dump()
# )
new_search_settings = create_search_settings(
search_settings=new_search_settings_request, db_session=db_session
)
# secondary_search_settings = get_secondary_search_settings(db_session)
# Ensure the document indices have the new index immediately.
document_indices = get_all_document_indices(search_settings, new_search_settings)
for document_index in document_indices:
document_index.ensure_indices_exist(
primary_embedding_dim=search_settings.final_embedding_dim,
primary_embedding_precision=search_settings.embedding_precision,
secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
secondary_index_embedding_precision=new_search_settings.embedding_precision,
)
# if secondary_search_settings:
# # Cancel any background indexing jobs
# expire_index_attempts(
# search_settings_id=secondary_search_settings.id, db_session=db_session
# )
# Pause index attempts for the currently in-use index to preserve resources.
if DISABLE_INDEX_UPDATE_ON_SWAP:
expire_index_attempts(
search_settings_id=search_settings.id, db_session=db_session
)
for cc_pair in get_connector_credential_pairs(db_session):
resync_cc_pair(
cc_pair=cc_pair,
search_settings_id=new_search_settings.id,
db_session=db_session,
)
# # Mark previous model as a past model directly
# update_search_settings_status(
# search_settings=secondary_search_settings,
# new_status=IndexModelStatus.PAST,
# db_session=db_session,
# )
# new_search_settings = create_search_settings(
# search_settings=new_search_settings_request, db_session=db_session
# )
# # Ensure Vespa has the new index immediately
# get_multipass_config(search_settings)
# get_multipass_config(new_search_settings)
# document_index = get_default_document_index(
# search_settings, new_search_settings, db_session
# )
# document_index.ensure_indices_exist(
# primary_embedding_dim=search_settings.final_embedding_dim,
# primary_embedding_precision=search_settings.embedding_precision,
# secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
# secondary_index_embedding_precision=new_search_settings.embedding_precision,
# )
# # Pause index attempts for the currently in use index to preserve resources
# if DISABLE_INDEX_UPDATE_ON_SWAP:
# expire_index_attempts(
# search_settings_id=search_settings.id, db_session=db_session
# )
# for cc_pair in get_connector_credential_pairs(db_session):
# resync_cc_pair(
# cc_pair=cc_pair,
# search_settings_id=new_search_settings.id,
# db_session=db_session,
# )
# db_session.commit()
# return IdReturn(id=new_search_settings.id)
db_session.commit()
return IdReturn(id=new_search_settings.id)
@router.post("/cancel-new-embedding", dependencies=[Depends(require_vector_db)])

View File

@@ -1,6 +1,5 @@
import datetime
import json
import os
from collections.abc import Generator
from datetime import timedelta
from uuid import UUID
@@ -61,7 +60,6 @@ from onyx.db.persona import get_persona_by_id
from onyx.db.usage import increment_usage
from onyx.db.usage import UsageType
from onyx.db.user_file import get_file_id_by_user_file_id
from onyx.file_processing.extract_file_text import docx_to_txt_filename
from onyx.file_store.file_store import get_default_file_store
from onyx.llm.constants import LlmProviderNames
from onyx.llm.factory import get_default_llm
@@ -812,18 +810,6 @@ def fetch_chat_file(
if not file_record:
raise HTTPException(status_code=404, detail="File not found")
original_file_name = file_record.display_name
if file_record.file_type.startswith(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
# Check if a converted text file exists for .docx files
txt_file_name = docx_to_txt_filename(original_file_name)
txt_file_id = os.path.join(os.path.dirname(file_id), txt_file_name)
txt_file_record = file_store.read_file_record(txt_file_id)
if txt_file_record:
file_record = txt_file_record
file_id = txt_file_id
media_type = file_record.file_type
file_io = file_store.read_file(file_id, mode="b")

View File

@@ -1,10 +1,20 @@
#!/bin/bash
set -e
cleanup() {
echo "Error occurred. Cleaning up..."
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
COMPOSE_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.yml"
COMPOSE_DEV_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.dev.yml"
stop_and_remove_containers() {
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled stop opensearch 2>/dev/null || true
docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled rm -f opensearch 2>/dev/null || true
}
cleanup() {
echo "Error occurred. Cleaning up..."
stop_and_remove_containers
}
# Trap errors and output a message, then cleanup
@@ -12,16 +22,26 @@ trap 'echo "Error occurred on line $LINENO. Exiting script." >&2; cleanup' ERR
# Usage of the script with optional volume arguments
# ./restart_containers.sh [vespa_volume] [postgres_volume] [redis_volume]
# [minio_volume] [--keep-opensearch-data]
VESPA_VOLUME=${1:-""} # Default is empty if not provided
POSTGRES_VOLUME=${2:-""} # Default is empty if not provided
REDIS_VOLUME=${3:-""} # Default is empty if not provided
MINIO_VOLUME=${4:-""} # Default is empty if not provided
KEEP_OPENSEARCH_DATA=false
POSITIONAL_ARGS=()
for arg in "$@"; do
if [[ "$arg" == "--keep-opensearch-data" ]]; then
KEEP_OPENSEARCH_DATA=true
else
POSITIONAL_ARGS+=("$arg")
fi
done
VESPA_VOLUME=${POSITIONAL_ARGS[0]:-""}
POSTGRES_VOLUME=${POSITIONAL_ARGS[1]:-""}
REDIS_VOLUME=${POSITIONAL_ARGS[2]:-""}
MINIO_VOLUME=${POSITIONAL_ARGS[3]:-""}
# Stop and remove the existing containers
echo "Stopping and removing existing containers..."
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
stop_and_remove_containers
# Start the PostgreSQL container with optional volume
echo "Starting PostgreSQL container..."
@@ -39,6 +59,29 @@ else
docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
fi
# If OPENSEARCH_ADMIN_PASSWORD is not already set, try loading it from
# .vscode/.env so existing dev setups that stored it there aren't silently
# broken.
VSCODE_ENV="$SCRIPT_DIR/../../.vscode/.env"
if [[ -z "${OPENSEARCH_ADMIN_PASSWORD:-}" && -f "$VSCODE_ENV" ]]; then
set -a
# shellcheck source=/dev/null
source "$VSCODE_ENV"
set +a
fi
# Start the OpenSearch container using the same service from docker-compose that
# our users use, setting OPENSEARCH_INITIAL_ADMIN_PASSWORD from the env's
# OPENSEARCH_ADMIN_PASSWORD if it exists, else defaulting to StrongPassword123!.
# Pass --keep-opensearch-data to preserve the opensearch-data volume across
# restarts, else the volume is deleted so the container starts fresh.
if [[ "$KEEP_OPENSEARCH_DATA" == "false" ]]; then
echo "Deleting opensearch-data volume..."
docker volume rm onyx_opensearch-data 2>/dev/null || true
fi
echo "Starting OpenSearch container..."
docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled up --force-recreate -d opensearch
# Start the Redis container with optional volume
echo "Starting Redis container..."
if [[ -n "$REDIS_VOLUME" ]]; then
@@ -60,7 +103,6 @@ echo "Starting Code Interpreter container..."
docker run --detach --name onyx_code_interpreter --publish 8000:8000 --user root -v /var/run/docker.sock:/var/run/docker.sock onyxdotapp/code-interpreter:latest bash ./entrypoint.sh code-interpreter-api
# Ensure alembic runs in the correct directory (backend/)
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PARENT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PARENT_DIR"

View File

@@ -1,10 +0,0 @@
#!/bin/bash
# We get OPENSEARCH_ADMIN_PASSWORD from the repo .env file.
source "$(dirname "$0")/../../.vscode/.env"
cd "$(dirname "$0")/../../deployment/docker_compose"
# Start OpenSearch.
echo "Forcefully starting fresh OpenSearch container..."
docker compose -f docker-compose.opensearch.yml up --force-recreate -d opensearch

View File

@@ -5,6 +5,8 @@ Verifies that:
1. extract_ids_from_runnable_connector correctly separates hierarchy nodes from doc IDs
2. Extracted hierarchy nodes are correctly upserted to Postgres via upsert_hierarchy_nodes_batch
3. Upserting is idempotent (running twice doesn't duplicate nodes)
4. Document-to-hierarchy-node linkage is updated during pruning
5. link_hierarchy_nodes_to_documents links nodes that are also documents
Uses a mock SlimConnectorWithPermSync that yields known hierarchy nodes and slim documents,
combined with a real PostgreSQL database for verifying persistence.
@@ -27,9 +29,13 @@ from onyx.db.enums import HierarchyNodeType
from onyx.db.hierarchy import ensure_source_node_exists
from onyx.db.hierarchy import get_all_hierarchy_nodes_for_source
from onyx.db.hierarchy import get_hierarchy_node_by_raw_id
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import Document as DbDocument
from onyx.db.models import HierarchyNode as DBHierarchyNode
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.kg.models import KGStage
# ---------------------------------------------------------------------------
# Constants
@@ -89,8 +95,18 @@ def _make_hierarchy_nodes() -> list[PydanticHierarchyNode]:
]
DOC_PARENT_MAP = {
"msg-001": CHANNEL_A_ID,
"msg-002": CHANNEL_A_ID,
"msg-003": CHANNEL_B_ID,
}
def _make_slim_docs() -> list[SlimDocument | PydanticHierarchyNode]:
return [SlimDocument(id=doc_id) for doc_id in SLIM_DOC_IDS]
return [
SlimDocument(id=doc_id, parent_hierarchy_raw_node_id=DOC_PARENT_MAP.get(doc_id))
for doc_id in SLIM_DOC_IDS
]
class MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):
@@ -126,14 +142,31 @@ class MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):
# ---------------------------------------------------------------------------
def _cleanup_test_hierarchy_nodes(db_session: Session) -> None:
"""Remove all hierarchy nodes for TEST_SOURCE to isolate tests."""
def _cleanup_test_data(db_session: Session) -> None:
"""Remove all test hierarchy nodes and documents to isolate tests."""
for doc_id in SLIM_DOC_IDS:
db_session.query(DbDocument).filter(DbDocument.id == doc_id).delete()
db_session.query(DBHierarchyNode).filter(
DBHierarchyNode.source == TEST_SOURCE
).delete()
db_session.commit()
def _create_test_documents(db_session: Session) -> list[DbDocument]:
"""Insert minimal Document rows for our test doc IDs."""
docs = []
for doc_id in SLIM_DOC_IDS:
doc = DbDocument(
id=doc_id,
semantic_id=doc_id,
kg_stage=KGStage.NOT_STARTED,
)
db_session.add(doc)
docs.append(doc)
db_session.commit()
return docs
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
@@ -147,14 +180,14 @@ def test_pruning_extracts_hierarchy_nodes(db_session: Session) -> None: # noqa:
result = extract_ids_from_runnable_connector(connector, callback=None)
# Doc IDs should include both slim doc IDs and hierarchy node raw_node_ids
# (hierarchy node IDs are added to doc_ids so they aren't pruned)
# (hierarchy node IDs are added to raw_id_to_parent so they aren't pruned)
expected_ids = {
CHANNEL_A_ID,
CHANNEL_B_ID,
CHANNEL_C_ID,
*SLIM_DOC_IDS,
}
assert result.doc_ids == expected_ids
assert result.raw_id_to_parent.keys() == expected_ids
# Hierarchy nodes should be the 3 channels
assert len(result.hierarchy_nodes) == 3
@@ -165,7 +198,7 @@ def test_pruning_extracts_hierarchy_nodes(db_session: Session) -> None: # noqa:
def test_pruning_upserts_hierarchy_nodes_to_db(db_session: Session) -> None:
"""Full flow: extract hierarchy nodes from mock connector, upsert to Postgres,
then verify the DB state (node count, parent relationships, permissions)."""
_cleanup_test_hierarchy_nodes(db_session)
_cleanup_test_data(db_session)
# Step 1: ensure the SOURCE node exists (mirrors what the pruning task does)
source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -230,7 +263,7 @@ def test_pruning_upserts_hierarchy_nodes_public_connector(
) -> None:
"""When the connector's access type is PUBLIC, all hierarchy nodes must be
marked is_public=True regardless of their external_access settings."""
_cleanup_test_hierarchy_nodes(db_session)
_cleanup_test_data(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -257,7 +290,7 @@ def test_pruning_upserts_hierarchy_nodes_public_connector(
def test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:
"""Upserting the same hierarchy nodes twice must not create duplicates.
The second call should update existing rows in place."""
_cleanup_test_hierarchy_nodes(db_session)
_cleanup_test_data(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -295,7 +328,7 @@ def test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:
def test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> None:
"""Upserting a hierarchy node with changed fields should update the existing row."""
_cleanup_test_hierarchy_nodes(db_session)
_cleanup_test_data(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -342,3 +375,193 @@ def test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> No
assert db_node.is_public is True
assert db_node.external_user_emails is not None
assert set(db_node.external_user_emails) == {"new_user@example.com"}
# ---------------------------------------------------------------------------
# Document-to-hierarchy-node linkage tests
# ---------------------------------------------------------------------------
def test_extraction_preserves_parent_hierarchy_raw_node_id(
db_session: Session, # noqa: ARG001
) -> None:
"""extract_ids_from_runnable_connector should carry the
parent_hierarchy_raw_node_id from SlimDocument into the raw_id_to_parent dict."""
connector = MockSlimConnectorWithPermSync()
result = extract_ids_from_runnable_connector(connector, callback=None)
for doc_id, expected_parent in DOC_PARENT_MAP.items():
assert (
result.raw_id_to_parent[doc_id] == expected_parent
), f"raw_id_to_parent[{doc_id}] should be {expected_parent}"
# Hierarchy node entries have None parent (they aren't documents)
for channel_id in [CHANNEL_A_ID, CHANNEL_B_ID, CHANNEL_C_ID]:
assert result.raw_id_to_parent[channel_id] is None
def test_update_document_parent_hierarchy_nodes(db_session: Session) -> None:
"""update_document_parent_hierarchy_nodes should set
Document.parent_hierarchy_node_id for each document in the mapping."""
_cleanup_test_data(db_session)
source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
upserted = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=_make_hierarchy_nodes(),
source=TEST_SOURCE,
commit=True,
is_connector_public=False,
)
node_id_by_raw = {n.raw_node_id: n.id for n in upserted}
# Create documents with no parent set
docs = _create_test_documents(db_session)
for doc in docs:
assert doc.parent_hierarchy_node_id is None
# Build resolved map (same logic as _resolve_and_update_document_parents)
resolved: dict[str, int | None] = {}
for doc_id, raw_parent in DOC_PARENT_MAP.items():
resolved[doc_id] = node_id_by_raw.get(raw_parent, source_node.id)
updated = update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
assert updated == len(SLIM_DOC_IDS)
# Verify each document now points to the correct hierarchy node
db_session.expire_all()
for doc_id, raw_parent in DOC_PARENT_MAP.items():
tmp_doc = db_session.get(DbDocument, doc_id)
assert tmp_doc is not None
doc = tmp_doc
expected_node_id = node_id_by_raw[raw_parent]
assert (
doc.parent_hierarchy_node_id == expected_node_id
), f"Document {doc_id} should point to node for {raw_parent}"
def test_update_document_parent_is_idempotent(db_session: Session) -> None:
"""Running update_document_parent_hierarchy_nodes a second time with the
same mapping should update zero rows."""
_cleanup_test_data(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
upserted = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=_make_hierarchy_nodes(),
source=TEST_SOURCE,
commit=True,
is_connector_public=False,
)
node_id_by_raw = {n.raw_node_id: n.id for n in upserted}
_create_test_documents(db_session)
resolved: dict[str, int | None] = {
doc_id: node_id_by_raw[raw_parent]
for doc_id, raw_parent in DOC_PARENT_MAP.items()
}
first_updated = update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
assert first_updated == len(SLIM_DOC_IDS)
second_updated = update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
assert second_updated == 0
def test_link_hierarchy_nodes_to_documents_for_confluence(
db_session: Session,
) -> None:
"""For sources in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS (e.g. Confluence),
link_hierarchy_nodes_to_documents should set HierarchyNode.document_id
when a hierarchy node's raw_node_id matches a document ID."""
_cleanup_test_data(db_session)
confluence_source = DocumentSource.CONFLUENCE
# Clean up any existing Confluence hierarchy nodes
db_session.query(DBHierarchyNode).filter(
DBHierarchyNode.source == confluence_source
).delete()
db_session.commit()
ensure_source_node_exists(db_session, confluence_source, commit=True)
# Create a hierarchy node whose raw_node_id matches a document ID
page_node_id = "confluence-page-123"
nodes = [
PydanticHierarchyNode(
raw_node_id=page_node_id,
raw_parent_id=None,
display_name="Test Page",
link="https://wiki.example.com/page/123",
node_type=HierarchyNodeType.PAGE,
),
]
upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=nodes,
source=confluence_source,
commit=True,
is_connector_public=False,
)
# Verify the node exists but has no document_id yet
db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
assert db_node is not None
assert db_node.document_id is None
# Create a document with the same ID as the hierarchy node
doc = DbDocument(
id=page_node_id,
semantic_id="Test Page",
kg_stage=KGStage.NOT_STARTED,
)
db_session.add(doc)
db_session.commit()
# Link nodes to documents
linked = link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=[page_node_id],
source=confluence_source,
commit=True,
)
assert linked == 1
# Verify the hierarchy node now has document_id set
db_session.expire_all()
db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
assert db_node is not None
assert db_node.document_id == page_node_id
# Cleanup
db_session.query(DbDocument).filter(DbDocument.id == page_node_id).delete()
db_session.query(DBHierarchyNode).filter(
DBHierarchyNode.source == confluence_source
).delete()
db_session.commit()
def test_link_hierarchy_nodes_skips_non_hierarchy_sources(
db_session: Session,
) -> None:
"""link_hierarchy_nodes_to_documents should return 0 for sources that
don't support hierarchy-node-as-document (e.g. Slack, Google Drive)."""
linked = link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=SLIM_DOC_IDS,
source=TEST_SOURCE, # Slack — not in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS
commit=False,
)
assert linked == 0

View File

@@ -11,6 +11,7 @@ from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.enums import EmbeddingPrecision
from onyx.db.llm import fetch_default_contextual_rag_model
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_contextual_model
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import IndexModelStatus
@@ -37,6 +38,8 @@ def _create_llm_provider_and_model(
model_name: str,
) -> None:
"""Insert an LLM provider with a single visible model configuration."""
if fetch_existing_llm_provider(name=provider_name, db_session=db_session):
return
upsert_llm_provider(
LLMProviderUpsertRequest(
name=provider_name,
@@ -146,8 +149,8 @@ def baseline_search_settings(
)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.db.swap_index.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -155,6 +158,7 @@ def test_indexing_pipeline_uses_contextual_rag_settings_from_create(
mock_index_handler: MagicMock,
mock_get_llm: MagicMock,
mock_get_doc_index: MagicMock, # noqa: ARG001
mock_get_all_doc_indices_search_settings: MagicMock, # noqa: ARG001
mock_get_all_doc_indices: MagicMock,
baseline_search_settings: None, # noqa: ARG001
db_session: Session,
@@ -196,8 +200,8 @@ def test_indexing_pipeline_uses_contextual_rag_settings_from_create(
)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.db.swap_index.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -205,6 +209,7 @@ def test_indexing_pipeline_uses_updated_contextual_rag_settings(
mock_index_handler: MagicMock,
mock_get_llm: MagicMock,
mock_get_doc_index: MagicMock, # noqa: ARG001
mock_get_all_doc_indices_search_settings: MagicMock, # noqa: ARG001
mock_get_all_doc_indices: MagicMock,
baseline_search_settings: None, # noqa: ARG001
db_session: Session,
@@ -266,7 +271,7 @@ def test_indexing_pipeline_uses_updated_contextual_rag_settings(
)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -274,6 +279,7 @@ def test_indexing_pipeline_skips_llm_when_contextual_rag_disabled(
mock_index_handler: MagicMock,
mock_get_llm: MagicMock,
mock_get_doc_index: MagicMock, # noqa: ARG001
mock_get_all_doc_indices_search_settings: MagicMock, # noqa: ARG001
baseline_search_settings: None, # noqa: ARG001
db_session: Session,
) -> None:

View File

@@ -1,4 +1,3 @@
import pytest
import requests
from tests.integration.common_utils.constants import API_SERVER_URL
@@ -365,7 +364,6 @@ def test_update_contextual_rag_missing_model_name(
assert "Provider name and model name are required" in response.json()["detail"]
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_search_settings_with_contextual_rag(
reset: None, # noqa: ARG001
admin_user: DATestUser,
@@ -394,7 +392,6 @@ def test_set_new_search_settings_with_contextual_rag(
_cancel_new_embedding(admin_user)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_search_settings_without_contextual_rag(
reset: None, # noqa: ARG001
admin_user: DATestUser,
@@ -419,7 +416,6 @@ def test_set_new_search_settings_without_contextual_rag(
_cancel_new_embedding(admin_user)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_then_update_inference_settings(
reset: None, # noqa: ARG001
admin_user: DATestUser,
@@ -457,7 +453,6 @@ def test_set_new_then_update_inference_settings(
_cancel_new_embedding(admin_user)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_search_settings_replaces_previous_secondary(
reset: None, # noqa: ARG001
admin_user: DATestUser,

View File

@@ -1,8 +1,8 @@
#!/bin/bash
set -e
set -euo pipefail
# Expected resource requirements
# Expected resource requirements (overridden below if --lite)
EXPECTED_DOCKER_RAM_GB=10
EXPECTED_DISK_GB=32
@@ -10,6 +10,10 @@ EXPECTED_DISK_GB=32
SHUTDOWN_MODE=false
DELETE_DATA_MODE=false
INCLUDE_CRAFT=false # Disabled by default, use --include-craft to enable
LITE_MODE=false # Disabled by default, use --lite to enable
NO_PROMPT=false
DRY_RUN=false
VERBOSE=false
while [[ $# -gt 0 ]]; do
case $1 in
@@ -25,6 +29,22 @@ while [[ $# -gt 0 ]]; do
INCLUDE_CRAFT=true
shift
;;
--lite)
LITE_MODE=true
shift
;;
--no-prompt)
NO_PROMPT=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
--verbose)
VERBOSE=true
shift
;;
--help|-h)
echo "Onyx Installation Script"
echo ""
@@ -32,15 +52,21 @@ while [[ $# -gt 0 ]]; do
echo ""
echo "Options:"
echo " --include-craft Enable Onyx Craft (AI-powered web app building)"
echo " --lite Deploy Onyx Lite (no Vespa, Redis, or model servers)"
echo " --shutdown Stop (pause) Onyx containers"
echo " --delete-data Remove all Onyx data (containers, volumes, and files)"
echo " --no-prompt Run non-interactively with defaults (for CI/automation)"
echo " --dry-run Show what would be done without making changes"
echo " --verbose Show detailed output for debugging"
echo " --help, -h Show this help message"
echo ""
echo "Examples:"
echo " $0 # Install Onyx"
echo " $0 --lite # Install Onyx Lite (minimal deployment)"
echo " $0 --include-craft # Install Onyx with Craft enabled"
echo " $0 --shutdown # Pause Onyx services"
echo " $0 --delete-data # Completely remove Onyx and all data"
echo " $0 --no-prompt # Non-interactive install with defaults"
exit 0
;;
*)
@@ -51,8 +77,116 @@ while [[ $# -gt 0 ]]; do
esac
done
if [[ "$VERBOSE" = true ]]; then
set -x
fi
if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
echo "ERROR: --lite and --include-craft cannot be used together."
echo "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
exit 1
fi
# Lite mode needs far fewer resources (no Vespa, Redis, or model servers)
if [[ "$LITE_MODE" = true ]]; then
EXPECTED_DOCKER_RAM_GB=4
EXPECTED_DISK_GB=16
fi
INSTALL_ROOT="${INSTALL_PREFIX:-onyx_data}"
LITE_COMPOSE_FILE="docker-compose.onyx-lite.yml"
# Build the -f flags for docker compose. For shutdown/delete-data we auto-detect
# whether the lite overlay was previously downloaded; for install we use --lite.
compose_file_args() {
local args="-f docker-compose.yml"
if [[ "$LITE_MODE" = true ]] || [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; then
args="$args -f ${LITE_COMPOSE_FILE}"
fi
echo "$args"
}
# --- Temp file cleanup ---
TMPFILES=()
cleanup_tmpfiles() {
local f
for f in "${TMPFILES[@]:-}"; do
rm -rf "$f" 2>/dev/null || true
done
}
trap cleanup_tmpfiles EXIT
mktempfile() {
local f
f="$(mktemp)"
TMPFILES+=("$f")
echo "$f"
}
# --- Downloader detection (curl with wget fallback) ---
DOWNLOADER=""
detect_downloader() {
if command -v curl &> /dev/null; then
DOWNLOADER="curl"
return 0
fi
if command -v wget &> /dev/null; then
DOWNLOADER="wget"
return 0
fi
echo "ERROR: Neither curl nor wget found. Please install one and retry."
exit 1
}
detect_downloader
download_file() {
local url="$1"
local output="$2"
if [[ "$DOWNLOADER" == "curl" ]]; then
curl -fsSL --retry 3 --retry-delay 2 --retry-connrefused -o "$output" "$url"
else
wget -q --tries=3 --timeout=20 -O "$output" "$url"
fi
}
# --- Interactive prompt helpers ---
is_interactive() {
[[ "$NO_PROMPT" = false ]] && [[ -t 0 ]]
}
prompt_or_default() {
local prompt_text="$1"
local default_value="$2"
if is_interactive; then
read -p "$prompt_text" -r REPLY
if [[ -z "$REPLY" ]]; then
REPLY="$default_value"
fi
else
REPLY="$default_value"
fi
}
prompt_yn_or_default() {
local prompt_text="$1"
local default_value="$2"
if is_interactive; then
read -p "$prompt_text" -n 1 -r
echo ""
else
REPLY="$default_value"
fi
}
prompt_enter_or_skip() {
local prompt_text="$1"
if is_interactive; then
echo -e "$prompt_text"
read -r
fi
}
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
@@ -111,7 +245,7 @@ if [ "$SHUTDOWN_MODE" = true ]; then
fi
# Stop containers (without removing them)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml stop)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) stop)
if [ $? -eq 0 ]; then
print_success "Onyx containers stopped (paused)"
else
@@ -140,12 +274,17 @@ if [ "$DELETE_DATA_MODE" = true ]; then
echo " • All downloaded files and configurations"
echo " • All user data and documents"
echo ""
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
echo ""
if [ "$REPLY" != "DELETE" ]; then
print_info "Operation cancelled."
exit 0
if is_interactive; then
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
echo ""
if [ "$REPLY" != "DELETE" ]; then
print_info "Operation cancelled."
exit 0
fi
else
print_error "Cannot confirm destructive operation in non-interactive mode."
print_info "Run interactively or remove the ${INSTALL_ROOT} directory manually."
exit 1
fi
print_info "Removing Onyx containers and volumes..."
@@ -164,7 +303,7 @@ if [ "$DELETE_DATA_MODE" = true ]; then
fi
# Stop and remove containers with volumes
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml down -v)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) down -v)
if [ $? -eq 0 ]; then
print_success "Onyx containers and volumes removed"
else
@@ -198,8 +337,13 @@ echo " \____/|_| |_|\__, /_/\_\ "
echo " __/ | "
echo " |___/ "
echo -e "${NC}"
echo "Welcome to Onyx Installation Script"
echo "===================================="
if [[ "$LITE_MODE" = true ]]; then
echo "Welcome to Onyx Lite Installation Script"
echo "========================================="
else
echo "Welcome to Onyx Installation Script"
echo "===================================="
fi
echo ""
# User acknowledgment section
@@ -207,10 +351,14 @@ echo -e "${YELLOW}${BOLD}This script will:${NC}"
echo "1. Download deployment files for Onyx into a new '${INSTALL_ROOT}' directory"
echo "2. Check your system resources (Docker, memory, disk space)"
echo "3. Guide you through deployment options (version, authentication)"
if [[ "$LITE_MODE" = true ]]; then
echo ""
echo -e "${YELLOW}${BOLD}Lite mode:${NC} Vespa, Redis, and model servers will NOT be started."
echo "This gives you the core chat experience with lower resource requirements."
fi
echo ""
# Only prompt for acknowledgment if running interactively
if [ -t 0 ]; then
if is_interactive; then
echo -e "${YELLOW}${BOLD}Please acknowledge and press Enter to continue...${NC}"
read -r
echo ""
@@ -219,6 +367,26 @@ else
echo ""
fi
# Detect OS (including WSL)
IS_WSL=false
if [[ -n "${WSL_DISTRO_NAME:-}" ]] || grep -qi microsoft /proc/version 2>/dev/null; then
IS_WSL=true
fi
# Dry-run: show plan and exit
if [[ "$DRY_RUN" = true ]]; then
print_info "Dry run mode — showing what would happen:"
echo " • Install root: ${INSTALL_ROOT}"
echo " • Lite mode: ${LITE_MODE}"
echo " • Include Craft: ${INCLUDE_CRAFT}"
echo " • OS type: ${OSTYPE:-unknown} (WSL: ${IS_WSL})"
echo " • Downloader: ${DOWNLOADER}"
echo " • Min RAM: ${EXPECTED_DOCKER_RAM_GB}GB, Min disk: ${EXPECTED_DISK_GB}GB"
echo ""
print_success "Dry run complete (no changes made)"
exit 0
fi
# GitHub repo base URL - using main branch
GITHUB_RAW_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose"
@@ -260,41 +428,35 @@ else
exit 1
fi
# Function to compare version numbers
# Returns 0 if $1 <= $2, 1 if $1 > $2
# Handles missing or non-numeric parts gracefully (treats them as 0)
version_compare() {
# Returns 0 if $1 <= $2, 1 if $1 > $2
local version1=$1
local version2=$2
local version1="${1:-0.0.0}"
local version2="${2:-0.0.0}"
# Split versions into components
local v1_major=$(echo $version1 | cut -d. -f1)
local v1_minor=$(echo $version1 | cut -d. -f2)
local v1_patch=$(echo $version1 | cut -d. -f3)
local v1_major v1_minor v1_patch v2_major v2_minor v2_patch
v1_major=$(echo "$version1" | cut -d. -f1)
v1_minor=$(echo "$version1" | cut -d. -f2)
v1_patch=$(echo "$version1" | cut -d. -f3)
v2_major=$(echo "$version2" | cut -d. -f1)
v2_minor=$(echo "$version2" | cut -d. -f2)
v2_patch=$(echo "$version2" | cut -d. -f3)
local v2_major=$(echo $version2 | cut -d. -f1)
local v2_minor=$(echo $version2 | cut -d. -f2)
local v2_patch=$(echo $version2 | cut -d. -f3)
# Default non-numeric or empty parts to 0
[[ "$v1_major" =~ ^[0-9]+$ ]] || v1_major=0
[[ "$v1_minor" =~ ^[0-9]+$ ]] || v1_minor=0
[[ "$v1_patch" =~ ^[0-9]+$ ]] || v1_patch=0
[[ "$v2_major" =~ ^[0-9]+$ ]] || v2_major=0
[[ "$v2_minor" =~ ^[0-9]+$ ]] || v2_minor=0
[[ "$v2_patch" =~ ^[0-9]+$ ]] || v2_patch=0
# Compare major version
if [ "$v1_major" -lt "$v2_major" ]; then
return 0
elif [ "$v1_major" -gt "$v2_major" ]; then
return 1
fi
if [ "$v1_major" -lt "$v2_major" ]; then return 0
elif [ "$v1_major" -gt "$v2_major" ]; then return 1; fi
# Compare minor version
if [ "$v1_minor" -lt "$v2_minor" ]; then
return 0
elif [ "$v1_minor" -gt "$v2_minor" ]; then
return 1
fi
if [ "$v1_minor" -lt "$v2_minor" ]; then return 0
elif [ "$v1_minor" -gt "$v2_minor" ]; then return 1; fi
# Compare patch version
if [ "$v1_patch" -le "$v2_patch" ]; then
return 0
else
return 1
fi
[ "$v1_patch" -le "$v2_patch" ]
}
# Check Docker daemon
@@ -371,8 +533,7 @@ if [ "$RESOURCE_WARNING" = true ]; then
echo ""
print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance."
echo ""
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
echo ""
prompt_yn_or_default "Do you want to continue anyway? (y/N): " "y"
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Installation cancelled. Please allocate more resources and try again."
exit 1
@@ -397,6 +558,9 @@ print_info "This step downloads all necessary configuration files from GitHub...
echo ""
print_info "Downloading the following files:"
echo " • docker-compose.yml - Main Docker Compose configuration"
if [[ "$LITE_MODE" = true ]]; then
echo "${LITE_COMPOSE_FILE} - Lite mode overlay"
fi
echo " • env.template - Environment variables template"
echo " • nginx/app.conf.template - Nginx web server configuration"
echo " • nginx/run-nginx.sh - Nginx startup script"
@@ -406,7 +570,7 @@ echo ""
# Download Docker Compose file
COMPOSE_FILE="${INSTALL_ROOT}/deployment/docker-compose.yml"
print_info "Downloading docker-compose.yml..."
if curl -fsSL -o "$COMPOSE_FILE" "${GITHUB_RAW_URL}/docker-compose.yml" 2>/dev/null; then
if download_file "${GITHUB_RAW_URL}/docker-compose.yml" "$COMPOSE_FILE" 2>/dev/null; then
print_success "Docker Compose file downloaded successfully"
# Check if Docker Compose version is older than 2.24.0 and show warning
@@ -431,8 +595,7 @@ if curl -fsSL -o "$COMPOSE_FILE" "${GITHUB_RAW_URL}/docker-compose.yml" 2>/dev/n
echo ""
print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
echo ""
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
echo ""
prompt_yn_or_default "Do you want to continue anyway? (y/N): " "y"
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
exit 1
@@ -445,10 +608,23 @@ else
exit 1
fi
# Download lite overlay if --lite was requested
if [[ "$LITE_MODE" = true ]]; then
LITE_FILE="${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
print_info "Downloading ${LITE_COMPOSE_FILE} (lite overlay)..."
if download_file "${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "$LITE_FILE" 2>/dev/null; then
print_success "Lite overlay downloaded successfully"
else
print_error "Failed to download lite overlay"
print_info "Please ensure you have internet connection and try again"
exit 1
fi
fi
# Download env.template file
ENV_TEMPLATE="${INSTALL_ROOT}/deployment/env.template"
print_info "Downloading env.template..."
if curl -fsSL -o "$ENV_TEMPLATE" "${GITHUB_RAW_URL}/env.template" 2>/dev/null; then
if download_file "${GITHUB_RAW_URL}/env.template" "$ENV_TEMPLATE" 2>/dev/null; then
print_success "Environment template downloaded successfully"
else
print_error "Failed to download env.template"
@@ -462,7 +638,7 @@ NGINX_BASE_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deploym
# Download app.conf.template
NGINX_CONFIG="${INSTALL_ROOT}/data/nginx/app.conf.template"
print_info "Downloading nginx configuration template..."
if curl -fsSL -o "$NGINX_CONFIG" "$NGINX_BASE_URL/app.conf.template" 2>/dev/null; then
if download_file "$NGINX_BASE_URL/app.conf.template" "$NGINX_CONFIG" 2>/dev/null; then
print_success "Nginx configuration template downloaded"
else
print_error "Failed to download nginx configuration template"
@@ -473,7 +649,7 @@ fi
# Download run-nginx.sh script
NGINX_RUN_SCRIPT="${INSTALL_ROOT}/data/nginx/run-nginx.sh"
print_info "Downloading nginx startup script..."
if curl -fsSL -o "$NGINX_RUN_SCRIPT" "$NGINX_BASE_URL/run-nginx.sh" 2>/dev/null; then
if download_file "$NGINX_BASE_URL/run-nginx.sh" "$NGINX_RUN_SCRIPT" 2>/dev/null; then
chmod +x "$NGINX_RUN_SCRIPT"
print_success "Nginx startup script downloaded and made executable"
else
@@ -485,7 +661,7 @@ fi
# Download README file
README_FILE="${INSTALL_ROOT}/README.md"
print_info "Downloading README.md..."
if curl -fsSL -o "$README_FILE" "${GITHUB_RAW_URL}/README.md" 2>/dev/null; then
if download_file "${GITHUB_RAW_URL}/README.md" "$README_FILE" 2>/dev/null; then
print_success "README.md downloaded successfully"
else
print_error "Failed to download README.md"
@@ -513,7 +689,7 @@ if [ -d "${INSTALL_ROOT}/deployment" ] && [ -f "${INSTALL_ROOT}/deployment/docke
if [ -n "$COMPOSE_CMD" ]; then
# Check if any containers are running
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null | wc -l)
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null | wc -l)
if [ "$RUNNING_CONTAINERS" -gt 0 ]; then
print_error "Onyx services are currently running!"
echo ""
@@ -534,7 +710,7 @@ if [ -f "$ENV_FILE" ]; then
echo "• Press Enter to restart with current configuration"
echo "• Type 'update' to update to a newer version"
echo ""
read -p "Choose an option [default: restart]: " -r
prompt_or_default "Choose an option [default: restart]: " ""
echo ""
if [ "$REPLY" = "update" ]; then
@@ -543,22 +719,19 @@ if [ -f "$ENV_FILE" ]; then
echo "• Press Enter for latest (recommended)"
echo "• Type a specific tag (e.g., v0.1.0)"
echo ""
# If --include-craft was passed, default to craft-latest
if [ "$INCLUDE_CRAFT" = true ]; then
read -p "Enter tag [default: craft-latest]: " -r VERSION
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
VERSION="$REPLY"
else
read -p "Enter tag [default: latest]: " -r VERSION
prompt_or_default "Enter tag [default: latest]: " "latest"
VERSION="$REPLY"
fi
echo ""
if [ -z "$VERSION" ]; then
if [ "$INCLUDE_CRAFT" = true ]; then
VERSION="craft-latest"
print_info "Selected: craft-latest (Craft enabled)"
else
VERSION="latest"
print_info "Selected: Latest version"
fi
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
print_info "Selected: craft-latest (Craft enabled)"
elif [ "$VERSION" = "latest" ]; then
print_info "Selected: Latest version"
else
print_info "Selected: $VERSION"
fi
@@ -595,23 +768,21 @@ else
echo "• Press Enter for craft-latest (recommended for Craft)"
echo "• Type a specific tag (e.g., craft-v1.0.0)"
echo ""
read -p "Enter tag [default: craft-latest]: " -r VERSION
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
VERSION="$REPLY"
else
echo "• Press Enter for latest (recommended)"
echo "• Type a specific tag (e.g., v0.1.0)"
echo ""
read -p "Enter tag [default: latest]: " -r VERSION
prompt_or_default "Enter tag [default: latest]: " "latest"
VERSION="$REPLY"
fi
echo ""
if [ -z "$VERSION" ]; then
if [ "$INCLUDE_CRAFT" = true ]; then
VERSION="craft-latest"
print_info "Selected: craft-latest (Craft enabled)"
else
VERSION="latest"
print_info "Selected: Latest tag"
fi
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
print_info "Selected: craft-latest (Craft enabled)"
elif [ "$VERSION" = "latest" ]; then
print_info "Selected: Latest tag"
else
print_info "Selected: $VERSION"
fi
@@ -686,6 +857,13 @@ else
echo ""
fi
# Reject craft image tags when running in lite mode
if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
print_error "Cannot use a craft image tag (${VERSION}) with --lite."
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
exit 1
fi
# Function to check if a port is available
is_port_available() {
local port=$1
@@ -771,7 +949,7 @@ print_step "Pulling Docker images"
print_info "This may take several minutes depending on your internet connection..."
echo ""
print_info "Downloading Docker images (this may take a while)..."
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml pull --quiet)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) pull --quiet)
if [ $? -eq 0 ]; then
print_success "Docker images downloaded successfully"
else
@@ -785,9 +963,9 @@ print_info "Launching containers..."
echo ""
if [ "$USE_LATEST" = true ]; then
print_info "Force pulling latest images and recreating containers..."
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d --pull always --force-recreate)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d --pull always --force-recreate)
else
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d)
fi
if [ $? -ne 0 ]; then
print_error "Failed to start Onyx services"
@@ -809,7 +987,7 @@ echo ""
# Check for restart loops
print_info "Checking container health status..."
RESTART_ISSUES=false
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null)
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null)
for CONTAINER in $CONTAINERS; do
PROJECT_NAME="$(basename "$INSTALL_ROOT")_deployment_"
@@ -838,7 +1016,7 @@ if [ "$RESTART_ISSUES" = true ]; then
print_error "Some containers are experiencing issues!"
echo ""
print_info "Please check the logs for more information:"
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD -f docker-compose.yml logs)"
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) logs)"
echo ""
print_info "If the issue persists, please contact: founders@onyx.app"
@@ -857,8 +1035,12 @@ check_onyx_health() {
echo ""
while [ $attempt -le $max_attempts ]; do
# Check for successful HTTP responses (200, 301, 302, etc.)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port")
local http_code=""
if [[ "$DOWNLOADER" == "curl" ]]; then
http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port" 2>/dev/null || echo "000")
else
http_code=$(wget -q --spider -S "http://localhost:$port" 2>&1 | grep "HTTP/" | tail -1 | awk '{print $2}' || echo "000")
fi
if echo "$http_code" | grep -qE "^(200|301|302|303|307|308)$"; then
return 0
fi
@@ -914,6 +1096,18 @@ print_info "If authentication is enabled, you can create your admin account here
echo " • Visit http://localhost:${HOST_PORT}/auth/signup to create your admin account"
echo " • The first user created will automatically have admin privileges"
echo ""
if [[ "$LITE_MODE" = true ]]; then
echo ""
print_info "Running in Lite mode — the following services are NOT started:"
echo " • Vespa (vector database)"
echo " • Redis (cache)"
echo " • Model servers (embedding/inference)"
echo " • Background workers (Celery)"
echo ""
print_info "Connectors and RAG search are disabled. LLM chat, tools, user file"
print_info "uploads, Projects, Agent knowledge, and code interpreter still work."
fi
echo ""
print_info "Refer to the README in the ${INSTALL_ROOT} directory for more information."
echo ""
print_info "For help or issues, contact: founders@onyx.app"

View File

@@ -0,0 +1,190 @@
"use client";
import { useState } from "react";
import Text from "@/refresh-components/texts/Text";
import TableQualifier from "@/refresh-components/table/TableQualifier";
import { TableSizeProvider } from "@/refresh-components/table/TableSizeContext";
import type { TableSize } from "@/refresh-components/table/TableSizeContext";
import type { QualifierContentType } from "@/refresh-components/table/types";
import { SvgCheckCircle } from "@opal/icons";
// ---------------------------------------------------------------------------
// Content type configurations
// ---------------------------------------------------------------------------
interface ContentConfig {
label: string;
content: QualifierContentType;
extraProps: Record<string, unknown>;
}
const CONTENT_TYPES: ContentConfig[] = [
{
label: "Simple",
content: "simple",
extraProps: {},
},
{
label: "Icon",
content: "icon",
extraProps: { icon: SvgCheckCircle },
},
{
label: "Image",
content: "image",
extraProps: {
imageSrc: "https://picsum.photos/36",
imageAlt: "Placeholder",
},
},
{
label: "Avatar Icon",
content: "avatar-icon",
extraProps: {},
},
{
label: "Avatar User",
content: "avatar-user",
extraProps: { initials: "AJ" },
},
];
// ---------------------------------------------------------------------------
// Row of qualifier states for a single content type
// ---------------------------------------------------------------------------
interface QualifierRowProps {
config: ContentConfig;
}
function QualifierRow({ config }: QualifierRowProps) {
const [selectableSelected, setSelectableSelected] = useState(false);
const [permanentSelected, setPermanentSelected] = useState(true);
return (
<div className="space-y-2">
<Text mainUiAction text02>
{config.label}
</Text>
<div className="flex items-start gap-8">
{/* Default */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={false}
selected={false}
disabled={false}
{...config.extraProps}
/>
<Text secondaryBody text04>
Default
</Text>
</div>
{/* Selectable (hover to reveal checkbox) */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={selectableSelected}
disabled={false}
onSelectChange={setSelectableSelected}
{...config.extraProps}
/>
<Text secondaryBody text04>
Selectable
</Text>
</div>
{/* Selected */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={permanentSelected}
disabled={false}
onSelectChange={setPermanentSelected}
{...config.extraProps}
/>
<Text secondaryBody text04>
Selected
</Text>
</div>
{/* Disabled (unselected) */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={false}
disabled={true}
{...config.extraProps}
/>
<Text secondaryBody text04>
Disabled
</Text>
</div>
{/* Disabled (selected) */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={true}
disabled={true}
{...config.extraProps}
/>
<Text secondaryBody text04>
Disabled+Sel
</Text>
</div>
</div>
</div>
);
}
// ---------------------------------------------------------------------------
// Size section — all content types at a given size
// ---------------------------------------------------------------------------
interface SizeSectionProps {
size: TableSize;
title: string;
}
function SizeSection({ size, title }: SizeSectionProps) {
return (
<div className="space-y-6">
<Text headingH3>{title}</Text>
<TableSizeProvider size={size}>
<div className="flex flex-col gap-8">
{CONTENT_TYPES.map((config) => (
<QualifierRow key={`${size}-${config.content}`} config={config} />
))}
</div>
</TableSizeProvider>
</div>
);
}
// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------
export default function TableQualifierDemoPage() {
return (
<div className="p-6 space-y-10">
<div className="space-y-4">
<Text headingH2>TableQualifier Demo</Text>
<Text mainContentMuted text03>
All content types, sizes, and interactive states. Hover selectable
variants to reveal the checkbox; click to toggle.
</Text>
</div>
<SizeSection size="regular" title="Regular (36px)" />
<SizeSection size="small" title="Small (28px)" />
</div>
);
}

View File

@@ -0,0 +1,106 @@
"use client";
import { useState } from "react";
import {
type Table,
type ColumnDef,
type RowData,
type VisibilityState,
} from "@tanstack/react-table";
import { Button } from "@opal/components";
import { SvgColumn, SvgCheck } from "@opal/icons";
import Popover from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import Divider from "@/refresh-components/Divider";
// ---------------------------------------------------------------------------
// Popover UI
// ---------------------------------------------------------------------------
interface ColumnVisibilityPopoverProps<TData extends RowData = RowData> {
table: Table<TData>;
columnVisibility: VisibilityState;
size?: "regular" | "small";
}
function ColumnVisibilityPopover<TData extends RowData>({
table,
columnVisibility,
size = "regular",
}: ColumnVisibilityPopoverProps<TData>) {
const [open, setOpen] = useState(false);
const hideableColumns = table
.getAllLeafColumns()
.filter((col) => col.getCanHide());
return (
<Popover open={open} onOpenChange={setOpen}>
<Popover.Trigger asChild>
<Button
icon={SvgColumn}
transient={open}
size={size === "small" ? "sm" : "md"}
prominence="internal"
tooltip="Columns"
/>
</Popover.Trigger>
<Popover.Content width="lg" align="end" side="bottom">
<Divider showTitle text="Shown Columns" />
<Popover.Menu>
{hideableColumns.map((column) => {
const isVisible = columnVisibility[column.id] !== false;
const label =
typeof column.columnDef.header === "string"
? column.columnDef.header
: column.id;
return (
<LineItem
key={column.id}
selected={isVisible}
emphasized
rightChildren={isVisible ? <SvgCheck size={16} /> : undefined}
onClick={() => {
column.toggleVisibility();
}}
>
{label}
</LineItem>
);
})}
</Popover.Menu>
</Popover.Content>
</Popover>
);
}
// ---------------------------------------------------------------------------
// Column definition factory
// ---------------------------------------------------------------------------
interface CreateColumnVisibilityColumnOptions {
size?: "regular" | "small";
}
function createColumnVisibilityColumn<TData>(
options?: CreateColumnVisibilityColumnOptions
): ColumnDef<TData, unknown> {
return {
id: "__columnVisibility",
size: 44,
enableHiding: false,
enableSorting: false,
enableResizing: false,
header: ({ table }) => (
<ColumnVisibilityPopover
table={table}
columnVisibility={table.getState().columnVisibility}
size={options?.size}
/>
),
cell: () => null,
};
}
export { ColumnVisibilityPopover, createColumnVisibilityColumn };

View File

@@ -0,0 +1,260 @@
"use client";
import { cn } from "@/lib/utils";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import Pagination from "@/refresh-components/table/Pagination";
import { useTableSize } from "@/refresh-components/table/TableSizeContext";
import type { TableSize } from "@/refresh-components/table/TableSizeContext";
import { SvgEye, SvgXCircle } from "@opal/icons";
type SelectionState = "none" | "partial" | "all";
/**
* Footer mode for tables with selectable rows.
* Displays a selection message on the left (with optional view/clear actions)
* and a `count`-type pagination on the right.
*/
interface FooterSelectionModeProps {
mode: "selection";
/** Whether the table supports selecting multiple rows. */
multiSelect: boolean;
/** Current selection state: `"none"`, `"partial"`, or `"all"`. */
selectionState: SelectionState;
/** Number of currently selected items. */
selectedCount: number;
/** If provided, renders a "View" icon button when items are selected. */
onView?: () => void;
/** If provided, renders a "Clear" icon button when items are selected. */
onClear?: () => void;
/** Number of items displayed per page. */
pageSize: number;
/** Total number of items across all pages. */
totalItems: number;
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** Controls overall footer sizing. `"regular"` (default) or `"small"`. */
size?: TableSize;
className?: string;
}
/**
* Footer mode for read-only tables (no row selection).
* Displays "Showing X~Y of Z" on the left and a `list`-type pagination
* on the right.
*/
interface FooterSummaryModeProps {
mode: "summary";
/** First item number in the current page (e.g. `1`). */
rangeStart: number;
/** Last item number in the current page (e.g. `25`). */
rangeEnd: number;
/** Total number of items across all pages. */
totalItems: number;
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** Controls overall footer sizing. `"regular"` (default) or `"small"`. */
size?: TableSize;
className?: string;
}
/**
* Discriminated union of footer modes.
* Use `mode: "selection"` for tables with selectable rows, or
* `mode: "summary"` for read-only tables.
*/
export type FooterProps = FooterSelectionModeProps | FooterSummaryModeProps;
function getSelectionMessage(
state: SelectionState,
multi: boolean,
count: number
): string {
if (state === "none") {
return multi ? "Select items to continue" : "Select an item to continue";
}
if (!multi) return "Item selected";
return `${count} item${count !== 1 ? "s" : ""} selected`;
}
/**
* Table footer combining status information on the left with pagination on the
* right. Use `mode: "selection"` for tables with selectable rows, or
* `mode: "summary"` for read-only tables.
*/
export default function Footer(props: FooterProps) {
const contextSize = useTableSize();
const resolvedSize = props.size ?? contextSize;
const isSmall = resolvedSize === "small";
return (
<div
className={cn(
"table-footer",
"flex w-full items-center justify-between border-t border-border-01",
props.className
)}
data-size={resolvedSize}
>
{/* Left side */}
<div className="flex items-center gap-1 px-1">
{props.mode === "selection" ? (
<SelectionLeft
selectionState={props.selectionState}
multiSelect={props.multiSelect}
selectedCount={props.selectedCount}
onView={props.onView}
onClear={props.onClear}
isSmall={isSmall}
/>
) : (
<SummaryLeft
rangeStart={props.rangeStart}
rangeEnd={props.rangeEnd}
totalItems={props.totalItems}
isSmall={isSmall}
/>
)}
</div>
{/* Right side */}
<div className="flex items-center gap-2 px-1 py-2">
{props.mode === "selection" ? (
<Pagination
type="count"
pageSize={props.pageSize}
totalItems={props.totalItems}
currentPage={props.currentPage}
totalPages={props.totalPages}
onPageChange={props.onPageChange}
showUnits
size={isSmall ? "sm" : "md"}
/>
) : (
<Pagination
type="list"
currentPage={props.currentPage}
totalPages={props.totalPages}
onPageChange={props.onPageChange}
size={isSmall ? "md" : "lg"}
/>
)}
</div>
</div>
);
}
interface SelectionLeftProps {
selectionState: SelectionState;
multiSelect: boolean;
selectedCount: number;
onView?: () => void;
onClear?: () => void;
isSmall: boolean;
}
function SelectionLeft({
selectionState,
multiSelect,
selectedCount,
onView,
onClear,
isSmall,
}: SelectionLeftProps) {
const message = getSelectionMessage(
selectionState,
multiSelect,
selectedCount
);
const hasSelection = selectionState !== "none";
return (
<div className="flex flex-row gap-1 items-center justify-center w-fit flex-shrink-0 h-fit px-1">
{isSmall ? (
<Text
secondaryAction={hasSelection}
secondaryBody={!hasSelection}
text03
>
{message}
</Text>
) : (
<Text mainUiBody={hasSelection} mainUiMuted={!hasSelection} text03>
{message}
</Text>
)}
{hasSelection && (
<div className="flex flex-row items-center w-fit flex-shrink-0 h-fit">
{onView && (
<Button
icon={SvgEye}
onClick={onView}
tooltip="View"
size={isSmall ? "sm" : "md"}
prominence="tertiary"
/>
)}
{onClear && (
<Button
icon={SvgXCircle}
onClick={onClear}
tooltip="Clear selection"
size={isSmall ? "sm" : "md"}
prominence="tertiary"
/>
)}
</div>
)}
</div>
);
}
interface SummaryLeftProps {
rangeStart: number;
rangeEnd: number;
totalItems: number;
isSmall: boolean;
}
function SummaryLeft({
rangeStart,
rangeEnd,
totalItems,
isSmall,
}: SummaryLeftProps) {
return (
<div className="flex flex-row gap-1 items-center w-fit h-fit px-1">
{isSmall ? (
<Text secondaryBody text03>
Showing{" "}
<Text as="span" secondaryMono text03>
{rangeStart}~{rangeEnd}
</Text>{" "}
of{" "}
<Text as="span" secondaryMono text03>
{totalItems}
</Text>
</Text>
) : (
<Text mainUiMuted text03>
Showing{" "}
<Text as="span" mainUiMono text03>
{rangeStart}~{rangeEnd}
</Text>{" "}
of{" "}
<Text as="span" mainUiMono text03>
{totalItems}
</Text>
</Text>
)}
</div>
);
}

View File

@@ -0,0 +1,393 @@
"use client";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { SvgChevronLeft, SvgChevronRight } from "@opal/icons";
type PaginationSize = "lg" | "md" | "sm";
/**
* Minimal page navigation showing `currentPage / totalPages` with prev/next arrows.
* Use when you only need simple forward/backward navigation.
*/
interface SimplePaginationProps {
type: "simple";
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** When `true`, displays the word "pages" after the page indicator. */
showUnits?: boolean;
/** When `false`, hides the page indicator between the prev/next arrows. Defaults to `true`. */
showPageIndicator?: boolean;
/** Controls button and text sizing. Defaults to `"lg"`. */
size?: PaginationSize;
className?: string;
}
/**
* Item-count pagination showing `currentItems of totalItems` with optional page
* controls and a "Go to" button. Use inside table footers that need to communicate
* how many items the user is viewing.
*/
interface CountPaginationProps {
type: "count";
/** Number of items displayed per page. Used to compute the visible range. */
pageSize: number;
/** Total number of items across all pages. */
totalItems: number;
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** When `false`, hides the page number between the prev/next arrows (arrows still visible). Defaults to `true`. */
showPageIndicator?: boolean;
/** When `true`, renders a "Go to" button. Requires `onGoTo`. */
showGoTo?: boolean;
/** Callback invoked when the "Go to" button is clicked. */
onGoTo?: () => void;
/** When `true`, displays the word "items" after the total count. */
showUnits?: boolean;
/** Controls button and text sizing. Defaults to `"lg"`. */
size?: PaginationSize;
className?: string;
}
/**
* Numbered page-list pagination with clickable page buttons and ellipsis
* truncation for large page counts. Does not support `"sm"` size.
*/
interface ListPaginationProps {
type: "list";
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** When `false`, hides the page buttons between the prev/next arrows. Defaults to `true`. */
showPageIndicator?: boolean;
/** Controls button and text sizing. Defaults to `"lg"`. Only `"lg"` and `"md"` are supported. */
size?: Exclude<PaginationSize, "sm">;
className?: string;
}
/**
* Discriminated union of all pagination variants.
* Use the `type` prop to select between `"simple"`, `"count"`, and `"list"`.
*/
export type PaginationProps =
| SimplePaginationProps
| CountPaginationProps
| ListPaginationProps;
function getPageNumbers(currentPage: number, totalPages: number) {
const pages: (number | string)[] = [];
const maxPagesToShow = 7;
if (totalPages <= maxPagesToShow) {
for (let i = 1; i <= totalPages; i++) {
pages.push(i);
}
} else {
pages.push(1);
let startPage = Math.max(2, currentPage - 1);
let endPage = Math.min(totalPages - 1, currentPage + 1);
if (currentPage <= 3) {
endPage = 5;
} else if (currentPage >= totalPages - 2) {
startPage = totalPages - 4;
}
if (startPage > 2) {
if (startPage === 3) {
pages.push(2);
} else {
pages.push("start-ellipsis");
}
}
for (let i = startPage; i <= endPage; i++) {
pages.push(i);
}
if (endPage < totalPages - 1) {
if (endPage === totalPages - 2) {
pages.push(totalPages - 1);
} else {
pages.push("end-ellipsis");
}
}
pages.push(totalPages);
}
return pages;
}
function sizedTextProps(isSmall: boolean, variant: "mono" | "muted") {
if (variant === "mono") {
return isSmall ? { secondaryMono: true } : { mainUiMono: true };
}
return isSmall ? { secondaryBody: true } : { mainUiMuted: true };
}
interface NavButtonsProps {
currentPage: number;
totalPages: number;
onPageChange: (page: number) => void;
size: PaginationSize;
children?: React.ReactNode;
}
function NavButtons({
currentPage,
totalPages,
onPageChange,
size,
children,
}: NavButtonsProps) {
return (
<>
<Button
icon={SvgChevronLeft}
onClick={() => onPageChange(currentPage - 1)}
disabled={currentPage <= 1}
size={size}
prominence="tertiary"
tooltip="Previous page"
/>
{children}
<Button
icon={SvgChevronRight}
onClick={() => onPageChange(currentPage + 1)}
disabled={currentPage >= totalPages}
size={size}
prominence="tertiary"
tooltip="Next page"
/>
</>
);
}
/**
* Table pagination component with three variants: `simple`, `count`, and `list`.
* Pass the `type` prop to select the variant, and the component will render the
* appropriate UI.
*/
export default function Pagination(props: PaginationProps) {
const normalized = { ...props, totalPages: Math.max(1, props.totalPages) };
switch (normalized.type) {
case "simple":
return <SimplePaginationInner {...normalized} />;
case "count":
return <CountPaginationInner {...normalized} />;
case "list":
return <ListPaginationInner {...normalized} />;
}
}
function SimplePaginationInner({
currentPage,
totalPages,
onPageChange,
showUnits,
showPageIndicator = true,
size = "lg",
className,
}: SimplePaginationProps) {
const isSmall = size === "sm";
return (
<div className={cn("flex items-center gap-1", className)}>
<NavButtons
currentPage={currentPage}
totalPages={totalPages}
onPageChange={onPageChange}
size={size}
>
{showPageIndicator && (
<>
<Text {...sizedTextProps(isSmall, "mono")} text03>
{currentPage}
<Text as="span" {...sizedTextProps(isSmall, "muted")} text03>
/
</Text>
{totalPages}
</Text>
{showUnits && (
<Text {...sizedTextProps(isSmall, "muted")} text03>
pages
</Text>
)}
</>
)}
</NavButtons>
</div>
);
}
function CountPaginationInner({
pageSize,
totalItems,
currentPage,
totalPages,
onPageChange,
showPageIndicator = true,
showGoTo,
onGoTo,
showUnits,
size = "lg",
className,
}: CountPaginationProps) {
const isSmall = size === "sm";
const rangeStart = totalItems === 0 ? 0 : (currentPage - 1) * pageSize + 1;
const rangeEnd = Math.min(currentPage * pageSize, totalItems);
const currentItems = `${rangeStart}~${rangeEnd}`;
return (
<div className={cn("flex items-center gap-1", className)}>
<Text {...sizedTextProps(isSmall, "mono")} text03>
{currentItems}
</Text>
<Text {...sizedTextProps(isSmall, "muted")} text03>
of
</Text>
<Text {...sizedTextProps(isSmall, "mono")} text03>
{totalItems}
</Text>
{showUnits && (
<Text {...sizedTextProps(isSmall, "muted")} text03>
items
</Text>
)}
<NavButtons
currentPage={currentPage}
totalPages={totalPages}
onPageChange={onPageChange}
size={size}
>
{showPageIndicator && (
<Text {...sizedTextProps(isSmall, "mono")} text03>
{currentPage}
</Text>
)}
</NavButtons>
{showGoTo && onGoTo && (
<Button onClick={onGoTo} size={size} prominence="tertiary">
Go to
</Button>
)}
</div>
);
}
interface PageNumberIconProps {
className?: string;
pageNum: number;
isActive: boolean;
isLarge: boolean;
}
function PageNumberIcon({
className: iconClassName,
pageNum,
isActive,
isLarge,
}: PageNumberIconProps) {
return (
<div className={cn(iconClassName, "flex flex-col justify-center")}>
{isLarge ? (
<Text
mainUiBody={isActive}
mainUiMuted={!isActive}
text04={isActive}
text02={!isActive}
>
{pageNum}
</Text>
) : (
<Text
secondaryAction={isActive}
secondaryBody={!isActive}
text04={isActive}
text02={!isActive}
>
{pageNum}
</Text>
)}
</div>
);
}
function ListPaginationInner({
currentPage,
totalPages,
onPageChange,
showPageIndicator = true,
size = "lg",
className,
}: ListPaginationProps) {
const pageNumbers = getPageNumbers(currentPage, totalPages);
const isLarge = size === "lg";
return (
<div className={cn("flex items-center gap-1", className)}>
<NavButtons
currentPage={currentPage}
totalPages={totalPages}
onPageChange={onPageChange}
size={size}
>
{showPageIndicator && (
<div className="flex items-center">
{pageNumbers.map((page) => {
if (typeof page === "string") {
return (
<Text
key={page}
mainUiMuted={isLarge}
secondaryBody={!isLarge}
text03
>
...
</Text>
);
}
const pageNum = page as number;
const isActive = pageNum === currentPage;
return (
<Button
key={pageNum}
onClick={() => onPageChange(pageNum)}
size={size}
prominence="tertiary"
transient={isActive}
icon={({ className: iconClassName }) => (
<PageNumberIcon
className={iconClassName}
pageNum={pageNum}
isActive={isActive}
isLarge={isLarge}
/>
)}
/>
);
})}
</div>
)}
</NavButtons>
</div>
);
}

View File

@@ -0,0 +1,181 @@
"use client";
import { useState } from "react";
import {
type Table,
type ColumnDef,
type RowData,
type SortingState,
} from "@tanstack/react-table";
import { Button } from "@opal/components";
import { SvgArrowUpDown, SvgSortOrder, SvgCheck } from "@opal/icons";
import Popover from "@/refresh-components/Popover";
import Divider from "@/refresh-components/Divider";
import LineItem from "@/refresh-components/buttons/LineItem";
import Text from "@/refresh-components/texts/Text";
// ---------------------------------------------------------------------------
// Popover UI
// ---------------------------------------------------------------------------
interface SortingPopoverProps<TData extends RowData = RowData> {
table: Table<TData>;
sorting: SortingState;
size?: "regular" | "small";
footerText?: string;
ascendingLabel?: string;
descendingLabel?: string;
}
function SortingPopover<TData extends RowData>({
table,
sorting,
size = "regular",
footerText,
ascendingLabel = "Ascending",
descendingLabel = "Descending",
}: SortingPopoverProps<TData>) {
const [open, setOpen] = useState(false);
const sortableColumns = table
.getAllLeafColumns()
.filter((col) => col.getCanSort());
const currentSort = sorting[0] ?? null;
return (
<Popover open={open} onOpenChange={setOpen}>
<Popover.Trigger asChild>
<Button
icon={currentSort === null ? SvgArrowUpDown : SvgSortOrder}
transient={open}
size={size === "small" ? "sm" : "md"}
prominence="internal"
tooltip="Sort"
/>
</Popover.Trigger>
<Popover.Content width="lg" align="end" side="bottom">
<Popover.Menu
footer={
footerText ? (
<div className="px-2 py-1">
<Text secondaryBody text03>
{footerText}
</Text>
</div>
) : undefined
}
>
<Divider showTitle text="Sort by" />
<LineItem
selected={currentSort === null}
emphasized
rightChildren={
currentSort === null ? <SvgCheck size={16} /> : undefined
}
onClick={() => {
table.resetSorting();
}}
>
Manual Ordering
</LineItem>
{sortableColumns.map((column) => {
const isSorted = currentSort?.id === column.id;
const label =
typeof column.columnDef.header === "string"
? column.columnDef.header
: column.id;
return (
<LineItem
key={column.id}
selected={isSorted}
emphasized
rightChildren={isSorted ? <SvgCheck size={16} /> : undefined}
onClick={() => {
if (isSorted) {
table.resetSorting();
return;
}
column.toggleSorting(false);
}}
>
{label}
</LineItem>
);
})}
{currentSort !== null && (
<>
<Divider showTitle text="Sorting Order" />
<LineItem
selected={!currentSort.desc}
emphasized
rightChildren={
!currentSort.desc ? <SvgCheck size={16} /> : undefined
}
onClick={() => {
table.setSorting([{ id: currentSort.id, desc: false }]);
}}
>
{ascendingLabel}
</LineItem>
<LineItem
selected={currentSort.desc}
emphasized
rightChildren={
currentSort.desc ? <SvgCheck size={16} /> : undefined
}
onClick={() => {
table.setSorting([{ id: currentSort.id, desc: true }]);
}}
>
{descendingLabel}
</LineItem>
</>
)}
</Popover.Menu>
</Popover.Content>
</Popover>
);
}
// ---------------------------------------------------------------------------
// Column definition factory
// ---------------------------------------------------------------------------
interface CreateSortingColumnOptions {
size?: "regular" | "small";
footerText?: string;
ascendingLabel?: string;
descendingLabel?: string;
}
function createSortingColumn<TData>(
options?: CreateSortingColumnOptions
): ColumnDef<TData, unknown> {
return {
id: "__sorting",
size: 44,
enableHiding: false,
enableSorting: false,
enableResizing: false,
header: ({ table }) => (
<SortingPopover
table={table}
sorting={table.getState().sorting}
size={options?.size}
footerText={options?.footerText}
ascendingLabel={options?.ascendingLabel}
descendingLabel={options?.descendingLabel}
/>
),
cell: () => null,
};
}
export { SortingPopover, createSortingColumn };