Compare commits

..

3 Commits

Author SHA1 Message Date
Raunak Bhagat
4417fec252 Merge branch 'main' into refactor/modal-header-content-layout 2026-02-27 17:43:30 -08:00
Raunak Bhagat
d59ff384c4 fix: use hidden instead of sr-only for Radix dialog primitives
sr-only elements are still matched by Playwright's getByText, causing
strict mode violations when the same text appears in both the Radix
primitive and the Content component. The hidden attribute (display:none)
prevents text matching while aria-labelledby/aria-describedby still
compute accessible names from hidden referenced elements per ARIA spec.
2026-02-27 17:41:58 -08:00
Raunak Bhagat
942c1796b3 refactor(opal): migrate ModalHeader to Content layout
Replace manual Text/Icon rendering in ModalHeader with the opal
Content component (sizePreset="section", variant="heading").
Radix DialogPrimitive.Title and Description are kept as sr-only
elements for accessibility.
2026-02-27 17:13:56 -08:00
7 changed files with 128 additions and 963 deletions

View File

@@ -241,7 +241,8 @@ _VECTOR_DB_BEAT_TASK_NAMES: set[str] = {
"check-for-index-attempt-cleanup",
"check-for-doc-permissions-sync",
"check-for-external-group-sync",
"migrate-chunks-from-vespa-to-opensearch",
"check-for-documents-for-opensearch-migration",
"migrate-documents-from-vespa-to-opensearch",
}
if DISABLE_VECTOR_DB:

View File

@@ -414,31 +414,34 @@ def _process_user_file_with_indexing(
raise RuntimeError(f"Indexing pipeline failed for user file {user_file_id}")
def _process_user_file_impl(
*, user_file_id: str, tenant_id: str, redis_locking: bool
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
"""Core implementation for processing a single user file.
When redis_locking=True, acquires a per-file Redis lock and clears the
queued-key guard (Celery path). When redis_locking=False, skips all Redis
operations (BackgroundTask path).
"""
task_logger.info(f"_process_user_file_impl - Starting id={user_file_id}")
task_logger.info(f"process_single_user_file - Starting id={user_file_id}")
start = time.monotonic()
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
redis_client.delete(_user_file_queued_key(user_file_id))
file_lock = redis_client.lock(
_user_file_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
redis_client = get_redis_client(tenant_id=tenant_id)
# Clear the "queued" guard set by the beat generator so that the next beat
# cycle can re-enqueue this file if it is still in PROCESSING state after
# this task completes or fails.
redis_client.delete(_user_file_queued_key(user_file_id))
file_lock: RedisLock = redis_client.lock(
_user_file_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
)
if not file_lock.acquire(blocking=False):
task_logger.info(
f"process_single_user_file - Lock held, skipping user_file_id={user_file_id}"
)
if file_lock is not None and not file_lock.acquire(blocking=False):
task_logger.info(
f"_process_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
)
return
return None
documents: list[Document] = []
try:
@@ -446,15 +449,15 @@ def _process_user_file_impl(
uf = db_session.get(UserFile, _as_uuid(user_file_id))
if not uf:
task_logger.warning(
f"_process_user_file_impl - UserFile not found id={user_file_id}"
f"process_single_user_file - UserFile not found id={user_file_id}"
)
return
return None
if uf.status != UserFileStatus.PROCESSING:
task_logger.info(
f"_process_user_file_impl - Skipping id={user_file_id} status={uf.status}"
f"process_single_user_file - Skipping id={user_file_id} status={uf.status}"
)
return
return None
connector = LocalFileConnector(
file_locations=[uf.file_id],
@@ -468,6 +471,7 @@ def _process_user_file_impl(
[doc for doc in batch if not isinstance(doc, HierarchyNode)]
)
# update the document id to userfile id in the documents
for document in documents:
document.id = str(user_file_id)
document.source = DocumentSource.USER_FILE
@@ -489,8 +493,9 @@ def _process_user_file_impl(
except Exception as e:
task_logger.exception(
f"_process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
# don't update the status if the user file is being deleted
current_user_file = db_session.get(UserFile, _as_uuid(user_file_id))
if (
current_user_file
@@ -499,42 +504,33 @@ def _process_user_file_impl(
uf.status = UserFileStatus.FAILED
db_session.add(uf)
db_session.commit()
return
return None
elapsed = time.monotonic() - start
task_logger.info(
f"_process_user_file_impl - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
f"process_single_user_file - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
)
return None
except Exception as e:
# Attempt to mark the file as failed
with get_session_with_current_tenant() as db_session:
uf = db_session.get(UserFile, _as_uuid(user_file_id))
if uf:
# don't update the status if the user file is being deleted
if uf.status != UserFileStatus.DELETING:
uf.status = UserFileStatus.FAILED
db_session.add(uf)
db_session.commit()
task_logger.exception(
f"_process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
return None
finally:
if file_lock is not None and file_lock.owned():
if file_lock.owned():
file_lock.release()
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
_process_user_file_impl(
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
)
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_USER_FILE_DELETE,
soft_time_limit=300,
@@ -585,38 +581,36 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
return None
def _delete_user_file_impl(
*, user_file_id: str, tenant_id: str, redis_locking: bool
@shared_task(
name=OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file_delete(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
"""Core implementation for deleting a single user file.
When redis_locking=True, acquires a per-file Redis lock (Celery path).
When redis_locking=False, skips Redis operations (BackgroundTask path).
"""
task_logger.info(f"_delete_user_file_impl - Starting id={user_file_id}")
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
file_lock = redis_client.lock(
_user_file_delete_lock_key(user_file_id),
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
"""Process a single user file delete."""
task_logger.info(f"process_single_user_file_delete - Starting id={user_file_id}")
redis_client = get_redis_client(tenant_id=tenant_id)
file_lock: RedisLock = redis_client.lock(
_user_file_delete_lock_key(user_file_id),
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
)
if not file_lock.acquire(blocking=False):
task_logger.info(
f"process_single_user_file_delete - Lock held, skipping user_file_id={user_file_id}"
)
if file_lock is not None and not file_lock.acquire(blocking=False):
task_logger.info(
f"_delete_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
)
return
return None
try:
with get_session_with_current_tenant() as db_session:
user_file = db_session.get(UserFile, _as_uuid(user_file_id))
if not user_file:
task_logger.info(
f"_delete_user_file_impl - User file not found id={user_file_id}"
f"process_single_user_file_delete - User file not found id={user_file_id}"
)
return
return None
# 1) Delete vector DB chunks (skip when disabled)
if not DISABLE_VECTOR_DB:
if MANAGED_VESPA:
httpx_init_vespa_pool(
@@ -654,6 +648,7 @@ def _delete_user_file_impl(
chunk_count=chunk_count,
)
# 2) Delete the user-uploaded file content from filestore (blob + metadata)
file_store = get_default_file_store()
try:
file_store.delete_file(user_file.file_id)
@@ -661,33 +656,26 @@ def _delete_user_file_impl(
user_file_id_to_plaintext_file_name(user_file.id)
)
except Exception as e:
# This block executed only if the file is not found in the filestore
task_logger.exception(
f"_delete_user_file_impl - Error deleting file id={user_file.id} - {e.__class__.__name__}"
f"process_single_user_file_delete - Error deleting file id={user_file.id} - {e.__class__.__name__}"
)
# 3) Finally, delete the UserFile row
db_session.delete(user_file)
db_session.commit()
task_logger.info(f"_delete_user_file_impl - Completed id={user_file_id}")
task_logger.info(
f"process_single_user_file_delete - Completed id={user_file_id}"
)
except Exception as e:
task_logger.exception(
f"_delete_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
f"process_single_user_file_delete - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
return None
finally:
if file_lock is not None and file_lock.owned():
if file_lock.owned():
file_lock.release()
@shared_task(
name=OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file_delete(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
_delete_user_file_impl(
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
)
return None
@shared_task(
@@ -759,30 +747,32 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
return None
def _project_sync_user_file_impl(
*, user_file_id: str, tenant_id: str, redis_locking: bool
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
bind=True,
ignore_result=True,
)
def process_single_user_file_project_sync(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
"""Core implementation for syncing a user file's project/persona metadata.
"""Process a single user file project sync."""
task_logger.info(
f"process_single_user_file_project_sync - Starting id={user_file_id}"
)
When redis_locking=True, acquires a per-file Redis lock and clears the
queued-key guard (Celery path). When redis_locking=False, skips Redis
operations (BackgroundTask path).
"""
task_logger.info(f"_project_sync_user_file_impl - Starting id={user_file_id}")
redis_client = get_redis_client(tenant_id=tenant_id)
redis_client.delete(_user_file_project_sync_queued_key(user_file_id))
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
redis_client.delete(_user_file_project_sync_queued_key(user_file_id))
file_lock = redis_client.lock(
user_file_project_sync_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
file_lock: RedisLock = redis_client.lock(
user_file_project_sync_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
)
if not file_lock.acquire(blocking=False):
task_logger.info(
f"process_single_user_file_project_sync - Lock held, skipping user_file_id={user_file_id}"
)
if file_lock is not None and not file_lock.acquire(blocking=False):
task_logger.info(
f"_project_sync_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
)
return
return None
try:
with get_session_with_current_tenant() as db_session:
@@ -793,10 +783,11 @@ def _project_sync_user_file_impl(
).scalar_one_or_none()
if not user_file:
task_logger.info(
f"_project_sync_user_file_impl - User file not found id={user_file_id}"
f"process_single_user_file_project_sync - User file not found id={user_file_id}"
)
return
return None
# Sync project metadata to vector DB (skip when disabled)
if not DISABLE_VECTOR_DB:
if MANAGED_VESPA:
httpx_init_vespa_pool(
@@ -831,7 +822,7 @@ def _project_sync_user_file_impl(
)
task_logger.info(
f"_project_sync_user_file_impl - User file id={user_file_id}"
f"process_single_user_file_project_sync - User file id={user_file_id}"
)
user_file.needs_project_sync = False
@@ -844,21 +835,11 @@ def _project_sync_user_file_impl(
except Exception as e:
task_logger.exception(
f"_project_sync_user_file_impl - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
f"process_single_user_file_project_sync - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
)
return None
finally:
if file_lock is not None and file_lock.owned():
if file_lock.owned():
file_lock.release()
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
bind=True,
ignore_result=True,
)
def process_single_user_file_project_sync(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
_project_sync_user_file_impl(
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
)
return None

View File

@@ -37,7 +37,6 @@ from onyx.configs.app_configs import APP_HOST
from onyx.configs.app_configs import APP_PORT
from onyx.configs.app_configs import AUTH_RATE_LIMITING_ENABLED
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import LOG_ENDPOINT_LATENCY
from onyx.configs.app_configs import OAUTH_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CLIENT_SECRET
@@ -255,38 +254,8 @@ def include_auth_router_with_prefix(
)
def validate_no_vector_db_settings() -> None:
"""Validate that DISABLE_VECTOR_DB is not combined with incompatible settings.
Raises RuntimeError if DISABLE_VECTOR_DB is set alongside MULTI_TENANT or ENABLE_CRAFT,
since these modes require infrastructure that is removed in no-vector-DB deployments.
"""
if not DISABLE_VECTOR_DB:
return
if MULTI_TENANT:
raise RuntimeError(
"DISABLE_VECTOR_DB cannot be used with MULTI_TENANT. "
"Multi-tenant deployments require the vector database for "
"per-tenant document indexing and search. Run in single-tenant "
"mode when disabling the vector database."
)
from onyx.server.features.build.configs import ENABLE_CRAFT
if ENABLE_CRAFT:
raise RuntimeError(
"DISABLE_VECTOR_DB cannot be used with ENABLE_CRAFT. "
"Onyx Craft requires background workers for sandbox lifecycle "
"management, which are removed in no-vector-DB deployments. "
"Disable Craft (ENABLE_CRAFT=false) when disabling the vector database."
)
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # noqa: ARG001
validate_no_vector_db_settings()
# Set recursion limit
if SYSTEM_RECURSION_LIMIT is not None:
sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT)

View File

@@ -1,291 +0,0 @@
"""Tests for the _impl functions' redis_locking parameter.
Verifies that:
- redis_locking=True acquires/releases Redis locks and clears queued keys
- redis_locking=False skips all Redis operations entirely
- Both paths execute the same business logic (DB lookup, status check)
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4
from onyx.background.celery.tasks.user_file_processing.tasks import (
_delete_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
_process_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
_project_sync_user_file_impl,
)
TASKS_MODULE = "onyx.background.celery.tasks.user_file_processing.tasks"
def _mock_session_returning_none() -> MagicMock:
"""Return a mock session whose .get() returns None (file not found)."""
session = MagicMock()
session.get.return_value = None
session.execute.return_value.scalar_one_or_none.return_value = None
return session
# ------------------------------------------------------------------
# _process_user_file_impl
# ------------------------------------------------------------------
class TestProcessUserFileImpl:
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_true_acquires_and_releases_lock(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
redis_client = MagicMock()
lock = MagicMock()
lock.acquire.return_value = True
lock.owned.return_value = True
redis_client.lock.return_value = lock
mock_get_redis.return_value = redis_client
session = _mock_session_returning_none()
mock_get_session.return_value.__enter__.return_value = session
user_file_id = str(uuid4())
_process_user_file_impl(
user_file_id=user_file_id,
tenant_id="test-tenant",
redis_locking=True,
)
mock_get_redis.assert_called_once_with(tenant_id="test-tenant")
redis_client.delete.assert_called_once()
lock.acquire.assert_called_once_with(blocking=False)
lock.release.assert_called_once()
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_true_skips_when_lock_held(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
redis_client = MagicMock()
lock = MagicMock()
lock.acquire.return_value = False
redis_client.lock.return_value = lock
mock_get_redis.return_value = redis_client
_process_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=True,
)
lock.acquire.assert_called_once()
mock_get_session.assert_not_called()
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_false_skips_redis_entirely(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
session = _mock_session_returning_none()
mock_get_session.return_value.__enter__.return_value = session
_process_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=False,
)
mock_get_redis.assert_not_called()
mock_get_session.assert_called_once()
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_both_paths_call_db_get(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
"""Both redis_locking=True and False should call db_session.get(UserFile, ...)."""
redis_client = MagicMock()
lock = MagicMock()
lock.acquire.return_value = True
lock.owned.return_value = True
redis_client.lock.return_value = lock
mock_get_redis.return_value = redis_client
session = _mock_session_returning_none()
mock_get_session.return_value.__enter__.return_value = session
uid = str(uuid4())
_process_user_file_impl(user_file_id=uid, tenant_id="t", redis_locking=True)
call_count_true = session.get.call_count
session.reset_mock()
mock_get_session.reset_mock()
mock_get_session.return_value.__enter__.return_value = session
_process_user_file_impl(user_file_id=uid, tenant_id="t", redis_locking=False)
call_count_false = session.get.call_count
assert call_count_true == call_count_false == 1
# ------------------------------------------------------------------
# _delete_user_file_impl
# ------------------------------------------------------------------
class TestDeleteUserFileImpl:
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_true_acquires_and_releases_lock(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
redis_client = MagicMock()
lock = MagicMock()
lock.acquire.return_value = True
lock.owned.return_value = True
redis_client.lock.return_value = lock
mock_get_redis.return_value = redis_client
session = _mock_session_returning_none()
mock_get_session.return_value.__enter__.return_value = session
_delete_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=True,
)
mock_get_redis.assert_called_once()
lock.acquire.assert_called_once_with(blocking=False)
lock.release.assert_called_once()
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_true_skips_when_lock_held(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
redis_client = MagicMock()
lock = MagicMock()
lock.acquire.return_value = False
redis_client.lock.return_value = lock
mock_get_redis.return_value = redis_client
_delete_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=True,
)
lock.acquire.assert_called_once()
mock_get_session.assert_not_called()
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_false_skips_redis_entirely(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
session = _mock_session_returning_none()
mock_get_session.return_value.__enter__.return_value = session
_delete_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=False,
)
mock_get_redis.assert_not_called()
mock_get_session.assert_called_once()
# ------------------------------------------------------------------
# _project_sync_user_file_impl
# ------------------------------------------------------------------
class TestProjectSyncUserFileImpl:
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_true_acquires_and_releases_lock(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
redis_client = MagicMock()
lock = MagicMock()
lock.acquire.return_value = True
lock.owned.return_value = True
redis_client.lock.return_value = lock
mock_get_redis.return_value = redis_client
session = _mock_session_returning_none()
mock_get_session.return_value.__enter__.return_value = session
_project_sync_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=True,
)
mock_get_redis.assert_called_once()
redis_client.delete.assert_called_once()
lock.acquire.assert_called_once_with(blocking=False)
lock.release.assert_called_once()
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_true_skips_when_lock_held(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
redis_client = MagicMock()
lock = MagicMock()
lock.acquire.return_value = False
redis_client.lock.return_value = lock
mock_get_redis.return_value = redis_client
_project_sync_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=True,
)
lock.acquire.assert_called_once()
mock_get_session.assert_not_called()
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
@patch(f"{TASKS_MODULE}.get_redis_client")
def test_redis_locking_false_skips_redis_entirely(
self,
mock_get_redis: MagicMock,
mock_get_session: MagicMock,
) -> None:
session = _mock_session_returning_none()
mock_get_session.return_value.__enter__.return_value = session
_project_sync_user_file_impl(
user_file_id=str(uuid4()),
tenant_id="test-tenant",
redis_locking=False,
)
mock_get_redis.assert_not_called()
mock_get_session.assert_called_once()

View File

@@ -1,421 +0,0 @@
"""Tests for no-vector-DB user file processing paths.
Verifies that when DISABLE_VECTOR_DB is True:
- _process_user_file_impl calls _process_user_file_without_vector_db (not indexing)
- _process_user_file_without_vector_db extracts text, counts tokens, stores plaintext,
sets status=COMPLETED and chunk_count=0
- _delete_user_file_impl skips vector DB chunk deletion
- _project_sync_user_file_impl skips vector DB metadata update
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4
from onyx.background.celery.tasks.user_file_processing.tasks import (
_delete_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
_process_user_file_impl,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
_process_user_file_without_vector_db,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
_project_sync_user_file_impl,
)
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.db.enums import UserFileStatus
TASKS_MODULE = "onyx.background.celery.tasks.user_file_processing.tasks"
LLM_FACTORY_MODULE = "onyx.llm.factory"
def _make_documents(texts: list[str]) -> list[Document]:
"""Build a list of Document objects with the given section texts."""
return [
Document(
id=str(uuid4()),
source=DocumentSource.USER_FILE,
sections=[TextSection(text=t)],
semantic_identifier=f"test-doc-{i}",
metadata={},
)
for i, t in enumerate(texts)
]
def _make_user_file(
*,
status: UserFileStatus = UserFileStatus.PROCESSING,
file_id: str = "test-file-id",
name: str = "test.txt",
) -> MagicMock:
"""Return a MagicMock mimicking a UserFile ORM instance."""
uf = MagicMock()
uf.id = uuid4()
uf.file_id = file_id
uf.name = name
uf.status = status
uf.token_count = None
uf.chunk_count = None
uf.last_project_sync_at = None
uf.projects = []
uf.assistants = []
uf.needs_project_sync = True
uf.needs_persona_sync = True
return uf
# ------------------------------------------------------------------
# _process_user_file_without_vector_db — direct tests
# ------------------------------------------------------------------
class TestProcessUserFileWithoutVectorDb:
@patch(f"{TASKS_MODULE}.store_user_file_plaintext")
@patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
@patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
def test_extracts_and_combines_text(
self,
mock_get_llm: MagicMock, # noqa: ARG002
mock_get_encode: MagicMock,
mock_store_plaintext: MagicMock,
) -> None:
mock_encode = MagicMock(return_value=[1, 2, 3, 4, 5])
mock_get_encode.return_value = mock_encode
uf = _make_user_file()
docs = _make_documents(["hello world", "foo bar"])
db_session = MagicMock()
_process_user_file_without_vector_db(uf, docs, db_session)
stored_text = mock_store_plaintext.call_args.kwargs["plaintext_content"]
assert "hello world" in stored_text
assert "foo bar" in stored_text
@patch(f"{TASKS_MODULE}.store_user_file_plaintext")
@patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
@patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
def test_computes_token_count(
self,
mock_get_llm: MagicMock, # noqa: ARG002
mock_get_encode: MagicMock,
mock_store_plaintext: MagicMock, # noqa: ARG002
) -> None:
mock_encode = MagicMock(return_value=list(range(42)))
mock_get_encode.return_value = mock_encode
uf = _make_user_file()
docs = _make_documents(["some text content"])
db_session = MagicMock()
_process_user_file_without_vector_db(uf, docs, db_session)
assert uf.token_count == 42
@patch(f"{TASKS_MODULE}.store_user_file_plaintext")
@patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
@patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
def test_token_count_falls_back_to_none_on_error(
self,
mock_get_llm: MagicMock,
mock_get_encode: MagicMock, # noqa: ARG002
mock_store_plaintext: MagicMock, # noqa: ARG002
) -> None:
mock_get_llm.side_effect = RuntimeError("No LLM configured")
uf = _make_user_file()
docs = _make_documents(["text"])
db_session = MagicMock()
_process_user_file_without_vector_db(uf, docs, db_session)
assert uf.token_count is None
@patch(f"{TASKS_MODULE}.store_user_file_plaintext")
@patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
@patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
def test_stores_plaintext(
self,
mock_get_llm: MagicMock, # noqa: ARG002
mock_get_encode: MagicMock,
mock_store_plaintext: MagicMock,
) -> None:
mock_get_encode.return_value = MagicMock(return_value=[1])
uf = _make_user_file()
docs = _make_documents(["content to store"])
db_session = MagicMock()
_process_user_file_without_vector_db(uf, docs, db_session)
mock_store_plaintext.assert_called_once_with(
user_file_id=uf.id,
plaintext_content="content to store",
)
@patch(f"{TASKS_MODULE}.store_user_file_plaintext")
@patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
@patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
def test_sets_completed_status_and_zero_chunk_count(
self,
mock_get_llm: MagicMock, # noqa: ARG002
mock_get_encode: MagicMock,
mock_store_plaintext: MagicMock, # noqa: ARG002
) -> None:
mock_get_encode.return_value = MagicMock(return_value=[1])
uf = _make_user_file()
docs = _make_documents(["text"])
db_session = MagicMock()
_process_user_file_without_vector_db(uf, docs, db_session)
assert uf.status == UserFileStatus.COMPLETED
assert uf.chunk_count == 0
assert uf.last_project_sync_at is not None
db_session.add.assert_called_once_with(uf)
db_session.commit.assert_called_once()
@patch(f"{TASKS_MODULE}.store_user_file_plaintext")
@patch(f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func")
@patch(f"{LLM_FACTORY_MODULE}.get_default_llm")
def test_preserves_deleting_status(
self,
mock_get_llm: MagicMock, # noqa: ARG002
mock_get_encode: MagicMock,
mock_store_plaintext: MagicMock, # noqa: ARG002
) -> None:
mock_get_encode.return_value = MagicMock(return_value=[1])
uf = _make_user_file(status=UserFileStatus.DELETING)
docs = _make_documents(["text"])
db_session = MagicMock()
_process_user_file_without_vector_db(uf, docs, db_session)
assert uf.status == UserFileStatus.DELETING
assert uf.chunk_count == 0
# ------------------------------------------------------------------
# _process_user_file_impl — branching on DISABLE_VECTOR_DB
# ------------------------------------------------------------------
class TestProcessImplBranching:
@patch(f"{TASKS_MODULE}._process_user_file_without_vector_db")
@patch(f"{TASKS_MODULE}._process_user_file_with_indexing")
@patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
def test_calls_without_vector_db_when_disabled(
self,
mock_get_session: MagicMock,
mock_with_indexing: MagicMock,
mock_without_vdb: MagicMock,
) -> None:
uf = _make_user_file()
session = MagicMock()
session.get.return_value = uf
mock_get_session.return_value.__enter__.return_value = session
connector_mock = MagicMock()
connector_mock.load_from_state.return_value = [_make_documents(["hello"])]
with patch(f"{TASKS_MODULE}.LocalFileConnector", return_value=connector_mock):
_process_user_file_impl(
user_file_id=str(uf.id),
tenant_id="test-tenant",
redis_locking=False,
)
mock_without_vdb.assert_called_once()
mock_with_indexing.assert_not_called()
@patch(f"{TASKS_MODULE}._process_user_file_without_vector_db")
@patch(f"{TASKS_MODULE}._process_user_file_with_indexing")
@patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", False)
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
def test_calls_with_indexing_when_vector_db_enabled(
self,
mock_get_session: MagicMock,
mock_with_indexing: MagicMock,
mock_without_vdb: MagicMock,
) -> None:
uf = _make_user_file()
session = MagicMock()
session.get.return_value = uf
mock_get_session.return_value.__enter__.return_value = session
connector_mock = MagicMock()
connector_mock.load_from_state.return_value = [_make_documents(["hello"])]
with patch(f"{TASKS_MODULE}.LocalFileConnector", return_value=connector_mock):
_process_user_file_impl(
user_file_id=str(uf.id),
tenant_id="test-tenant",
redis_locking=False,
)
mock_with_indexing.assert_called_once()
mock_without_vdb.assert_not_called()
@patch(f"{TASKS_MODULE}.run_indexing_pipeline")
@patch(f"{TASKS_MODULE}.store_user_file_plaintext")
@patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
def test_indexing_pipeline_not_called_when_disabled(
self,
mock_get_session: MagicMock,
mock_store_plaintext: MagicMock, # noqa: ARG002
mock_run_pipeline: MagicMock,
) -> None:
"""End-to-end: verify run_indexing_pipeline is never invoked."""
uf = _make_user_file()
session = MagicMock()
session.get.return_value = uf
mock_get_session.return_value.__enter__.return_value = session
connector_mock = MagicMock()
connector_mock.load_from_state.return_value = [_make_documents(["content"])]
with (
patch(f"{TASKS_MODULE}.LocalFileConnector", return_value=connector_mock),
patch(f"{LLM_FACTORY_MODULE}.get_default_llm"),
patch(
f"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func",
return_value=MagicMock(return_value=[1, 2, 3]),
),
):
_process_user_file_impl(
user_file_id=str(uf.id),
tenant_id="test-tenant",
redis_locking=False,
)
mock_run_pipeline.assert_not_called()
# ------------------------------------------------------------------
# _delete_user_file_impl — vector DB skip
# ------------------------------------------------------------------
class TestDeleteImplNoVectorDb:
@patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
@patch(f"{TASKS_MODULE}.get_default_file_store")
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
def test_skips_vector_db_deletion(
self,
mock_get_session: MagicMock,
mock_get_file_store: MagicMock,
) -> None:
uf = _make_user_file(status=UserFileStatus.DELETING)
session = MagicMock()
session.get.return_value = uf
mock_get_session.return_value.__enter__.return_value = session
mock_get_file_store.return_value = MagicMock()
with (
patch(f"{TASKS_MODULE}.get_all_document_indices") as mock_get_indices,
patch(f"{TASKS_MODULE}.get_active_search_settings") as mock_get_ss,
patch(f"{TASKS_MODULE}.httpx_init_vespa_pool") as mock_vespa_pool,
):
_delete_user_file_impl(
user_file_id=str(uf.id),
tenant_id="test-tenant",
redis_locking=False,
)
mock_get_indices.assert_not_called()
mock_get_ss.assert_not_called()
mock_vespa_pool.assert_not_called()
session.delete.assert_called_once_with(uf)
session.commit.assert_called_once()
@patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
@patch(f"{TASKS_MODULE}.get_default_file_store")
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
def test_still_deletes_file_store_and_db_record(
self,
mock_get_session: MagicMock,
mock_get_file_store: MagicMock,
) -> None:
uf = _make_user_file(status=UserFileStatus.DELETING)
session = MagicMock()
session.get.return_value = uf
mock_get_session.return_value.__enter__.return_value = session
file_store = MagicMock()
mock_get_file_store.return_value = file_store
_delete_user_file_impl(
user_file_id=str(uf.id),
tenant_id="test-tenant",
redis_locking=False,
)
assert file_store.delete_file.call_count == 2
session.delete.assert_called_once_with(uf)
session.commit.assert_called_once()
# ------------------------------------------------------------------
# _project_sync_user_file_impl — vector DB skip
# ------------------------------------------------------------------
class TestProjectSyncImplNoVectorDb:
@patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
def test_skips_vector_db_update(
self,
mock_get_session: MagicMock,
) -> None:
uf = _make_user_file(status=UserFileStatus.COMPLETED)
session = MagicMock()
session.execute.return_value.scalar_one_or_none.return_value = uf
mock_get_session.return_value.__enter__.return_value = session
with (
patch(f"{TASKS_MODULE}.get_all_document_indices") as mock_get_indices,
patch(f"{TASKS_MODULE}.get_active_search_settings") as mock_get_ss,
patch(f"{TASKS_MODULE}.httpx_init_vespa_pool") as mock_vespa_pool,
):
_project_sync_user_file_impl(
user_file_id=str(uf.id),
tenant_id="test-tenant",
redis_locking=False,
)
mock_get_indices.assert_not_called()
mock_get_ss.assert_not_called()
mock_vespa_pool.assert_not_called()
@patch(f"{TASKS_MODULE}.DISABLE_VECTOR_DB", True)
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
def test_still_clears_sync_flags(
self,
mock_get_session: MagicMock,
) -> None:
uf = _make_user_file(status=UserFileStatus.COMPLETED)
session = MagicMock()
session.execute.return_value.scalar_one_or_none.return_value = uf
mock_get_session.return_value.__enter__.return_value = session
_project_sync_user_file_impl(
user_file_id=str(uf.id),
tenant_id="test-tenant",
redis_locking=False,
)
assert uf.needs_project_sync is False
assert uf.needs_persona_sync is False
assert uf.last_project_sync_at is not None
session.add.assert_called_once_with(uf)
session.commit.assert_called_once()

View File

@@ -1,52 +0,0 @@
"""Tests for startup validation in no-vector-DB mode.
Verifies that DISABLE_VECTOR_DB raises RuntimeError when combined with
incompatible settings (MULTI_TENANT, ENABLE_CRAFT).
"""
from unittest.mock import patch
import pytest
class TestValidateNoVectorDbSettings:
@patch("onyx.main.DISABLE_VECTOR_DB", False)
def test_no_error_when_vector_db_enabled(self) -> None:
from onyx.main import validate_no_vector_db_settings
validate_no_vector_db_settings()
@patch("onyx.main.DISABLE_VECTOR_DB", True)
@patch("onyx.main.MULTI_TENANT", False)
@patch("onyx.server.features.build.configs.ENABLE_CRAFT", False)
def test_no_error_when_no_conflicts(self) -> None:
from onyx.main import validate_no_vector_db_settings
validate_no_vector_db_settings()
@patch("onyx.main.DISABLE_VECTOR_DB", True)
@patch("onyx.main.MULTI_TENANT", True)
def test_raises_on_multi_tenant(self) -> None:
from onyx.main import validate_no_vector_db_settings
with pytest.raises(RuntimeError, match="MULTI_TENANT"):
validate_no_vector_db_settings()
@patch("onyx.main.DISABLE_VECTOR_DB", True)
@patch("onyx.main.MULTI_TENANT", False)
@patch("onyx.server.features.build.configs.ENABLE_CRAFT", True)
def test_raises_on_enable_craft(self) -> None:
from onyx.main import validate_no_vector_db_settings
with pytest.raises(RuntimeError, match="ENABLE_CRAFT"):
validate_no_vector_db_settings()
@patch("onyx.main.DISABLE_VECTOR_DB", True)
@patch("onyx.main.MULTI_TENANT", True)
@patch("onyx.server.features.build.configs.ENABLE_CRAFT", True)
def test_multi_tenant_checked_before_craft(self) -> None:
"""MULTI_TENANT is checked first, so it should be the error raised."""
from onyx.main import validate_no_vector_db_settings
with pytest.raises(RuntimeError, match="MULTI_TENANT"):
validate_no_vector_db_settings()

View File

@@ -3,9 +3,9 @@
import React from "react";
import * as DialogPrimitive from "@radix-ui/react-dialog";
import { cn } from "@/lib/utils";
import type { IconProps } from "@opal/types";
import Text from "@/refresh-components/texts/Text";
import type { IconFunctionComponent } from "@opal/types";
import { Button } from "@opal/components";
import { Content } from "@opal/layouts";
import { SvgX } from "@opal/icons";
import { WithoutStyles } from "@/types";
import { Section, SectionProps } from "@/layouts/general-layouts";
@@ -407,7 +407,7 @@ ModalContent.displayName = DialogPrimitive.Content.displayName;
* ```
*/
interface ModalHeaderProps extends WithoutStyles<SectionProps> {
icon?: React.FunctionComponent<IconProps>;
icon?: IconFunctionComponent;
title: string;
description?: string;
onClose?: () => void;
@@ -440,52 +440,30 @@ const ModalHeader = React.forwardRef<HTMLDivElement, ModalHeaderProps>(
return (
<Section ref={ref} padding={1} alignItems="start" height="fit" {...props}>
<Section gap={0.5}>
{Icon && (
<Section
gap={0}
padding={0}
flexDirection="row"
justifyContent="between"
>
{/*
The `h-[1.5rem]` and `w-[1.5rem]` were added as backups here.
However, prop-resolution technically resolves to choosing classNames over size props, so technically the `size={24}` is the backup.
We specify both to be safe.
<DialogPrimitive.Title hidden>{title}</DialogPrimitive.Title>
{description && (
<DialogPrimitive.Description hidden>
{description}
</DialogPrimitive.Description>
)}
# Note
1.5rem === 24px
*/}
<Icon
className="stroke-text-04 h-[1.5rem] w-[1.5rem]"
size={24}
/>
{closeButton}
</Section>
)}
<Section
alignItems="start"
gap={0}
padding={0}
flexDirection="row"
justifyContent="between"
>
<Section alignItems="start" padding={0} gap={0}>
<DialogPrimitive.Title asChild>
<Text headingH3>{title}</Text>
</DialogPrimitive.Title>
{description && (
<DialogPrimitive.Description asChild>
<Text secondaryBody text03>
{description}
</Text>
</DialogPrimitive.Description>
)}
</Section>
{!Icon && closeButton}
</Section>
<Section
flexDirection="row"
justifyContent="between"
alignItems="start"
gap={0}
padding={0}
>
<Content
icon={Icon}
title={title}
description={description}
sizePreset="section"
variant="heading"
/>
{closeButton}
</Section>
{children}
</Section>
);