fix(desktop): prefer native scrollbar styling (#9879 )

chore: remove unused db rows (#9869 )
fix(opensearch): Doc IDs whose length would exceed OpenSearch's ID length are hashed (#9847 )
2026-04-03 05:52:42 +00:00 · 2026-04-03 00:33:18 +00:00 · 2026-04-02 22:17:10 +00:00 · 2026-04-02 21:35:17 +00:00 · 2026-04-02 21:26:08 +00:00
22 changed files with 421 additions and 756 deletions
--- a/backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py
@@ -1,20 +1,14 @@
-from datetime import datetime
-from datetime import timezone
 from uuid import UUID

 from celery import shared_task
 from celery import Task

 from ee.onyx.background.celery_utils import should_perform_chat_ttl_check
-from ee.onyx.background.task_name_builders import name_chat_ttl_task
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.chat import delete_chat_session
 from onyx.db.chat import get_chat_sessions_older_than
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.enums import TaskStatus
-from onyx.db.tasks import mark_task_as_finished_with_id
-from onyx.db.tasks import register_task
 from onyx.server.settings.store import load_settings
 from onyx.utils.logger import setup_logger

@@ -29,26 +23,16 @@ logger = setup_logger()
    trail=False,
 )
 def perform_ttl_management_task(
-    self: Task, retention_limit_days: int, *, tenant_id: str
+    self: Task, retention_limit_days: int, *, tenant_id: str  # noqa: ARG001
 ) -> None:
    task_id = self.request.id
    if not task_id:
        raise RuntimeError("No task id defined for this task; cannot identify it")

-    start_time = datetime.now(tz=timezone.utc)
-
    user_id: UUID | None = None
    session_id: UUID | None = None
    try:
        with get_session_with_current_tenant() as db_session:
-            # we generally want to move off this, but keeping for now
-            register_task(
-                db_session=db_session,
-                task_name=name_chat_ttl_task(retention_limit_days, tenant_id),
-                task_id=task_id,
-                status=TaskStatus.STARTED,
-                start_time=start_time,
-            )

            old_chat_sessions = get_chat_sessions_older_than(
                retention_limit_days, db_session
@@ -65,23 +49,10 @@ def perform_ttl_management_task(
                    hard_delete=True,
                )

-        with get_session_with_current_tenant() as db_session:
-            mark_task_as_finished_with_id(
-                db_session=db_session,
-                task_id=task_id,
-                success=True,
-            )
-
    except Exception:
        logger.exception(
            f"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}"
        )
-        with get_session_with_current_tenant() as db_session:
-            mark_task_as_finished_with_id(
-                db_session=db_session,
-                task_id=task_id,
-                success=False,
-            )
        raise


--- a/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
+++ b/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
@@ -36,6 +36,7 @@ from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapping
 from onyx.db.opensearch_migration import get_vespa_visit_state
+from onyx.db.opensearch_migration import is_migration_completed
 from onyx.db.opensearch_migration import (
    mark_migration_completed_time_if_not_set_with_commit,
 )
@@ -106,14 +107,19 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            acquired; effectively a no-op. True if the task completed
            successfully. False if the task errored.
    """
+    # 1. Check if we should run the task.
+    # 1.a. If OpenSearch indexing is disabled, we don't run the task.
    if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        task_logger.warning(
            "OpenSearch migration is not enabled, skipping chunk migration task."
        )
        return None
-
    task_logger.info("Starting chunk-level migration from Vespa to OpenSearch.")
    task_start_time = time.monotonic()
+
+    # 1.b. Only one instance per tenant of this task may run concurrently at
+    # once. If we fail to acquire a lock, we assume it is because another task
+    # has one and we exit.
    r = get_redis_client()
    lock: RedisLock = r.lock(
        name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
@@ -136,10 +142,11 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            f"Token: {lock.local.token}"
        )

+    # 2. Prepare to migrate.
    total_chunks_migrated_this_task = 0
    total_chunks_errored_this_task = 0
    try:
-        # Double check that tenant info is correct.
+        # 2.a. Double-check that tenant info is correct.
        if tenant_id != get_current_tenant_id():
            err_str = (
                f"Tenant ID mismatch in the OpenSearch migration task: "
@@ -148,16 +155,62 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            task_logger.error(err_str)
            return False

-        with (
-            get_session_with_current_tenant() as db_session,
-            get_vespa_http_client(
-                timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
-            ) as vespa_client,
-        ):
+        # Do as much as we can with a DB session in one spot to not hold a
+        # session during a migration batch.
+        with get_session_with_current_tenant() as db_session:
+            # 2.b. Immediately check to see if this tenant is done, to save
+            # having to do any other work. This function does not require a
+            # migration record to necessarily exist.
+            if is_migration_completed(db_session):
+                return True
+
+            # 2.c. Try to insert the OpenSearchTenantMigrationRecord table if it
+            # does not exist.
            try_insert_opensearch_tenant_migration_record_with_commit(db_session)
+
+            # 2.d. Get search settings.
            search_settings = get_current_search_settings(db_session)
-            tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
            indexing_setting = IndexingSetting.from_db_model(search_settings)
+
+            # 2.e. Build sanitized to original doc ID mapping to check for
+            # conflicts in the event we sanitize a doc ID to an
+            # already-existing doc ID.
+            # We reconstruct this mapping for every task invocation because
+            # a document may have been added in the time between two tasks.
+            sanitized_doc_start_time = time.monotonic()
+            sanitized_to_original_doc_id_mapping = (
+                build_sanitized_to_original_doc_id_mapping(db_session)
+            )
+            task_logger.debug(
+                f"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries "
+                f"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds."
+            )
+
+            # 2.f. Get the current migration state.
+            continuation_token_map, total_chunks_migrated = get_vespa_visit_state(
+                db_session
+            )
+            # 2.f.1. Double-check that the migration state does not imply
+            # completion. Really we should never have to enter this block as we
+            # would expect is_migration_completed to return True, but in the
+            # strange event that the migration is complete but the migration
+            # completed time was never stamped, we do so here.
+            if is_continuation_token_done_for_all_slices(continuation_token_map):
+                task_logger.info(
+                    f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
+                )
+                mark_migration_completed_time_if_not_set_with_commit(db_session)
+                return True
+        task_logger.debug(
+            f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
+            f"Continuation token map: {continuation_token_map}"
+        )
+
+        with get_vespa_http_client(
+            timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
+        ) as vespa_client:
+            # 2.g. Create the OpenSearch and Vespa document indexes.
+            tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
            opensearch_document_index = OpenSearchDocumentIndex(
                tenant_state=tenant_state,
                index_name=search_settings.index_name,
@@ -171,22 +224,14 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                httpx_client=vespa_client,
            )

-            sanitized_doc_start_time = time.monotonic()
-            # We reconstruct this mapping for every task invocation because a
-            # document may have been added in the time between two tasks.
-            sanitized_to_original_doc_id_mapping = (
-                build_sanitized_to_original_doc_id_mapping(db_session)
-            )
-            task_logger.debug(
-                f"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries "
-                f"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds."
-            )
-
+            # 2.h. Get the approximate chunk count in Vespa as of this time to
+            # update the migration record.
            approx_chunk_count_in_vespa: int | None = None
            get_chunk_count_start_time = time.monotonic()
            try:
                approx_chunk_count_in_vespa = vespa_document_index.get_chunk_count()
            except Exception:
+                # This failure should not be blocking.
                task_logger.exception(
                    "Error getting approximate chunk count in Vespa. Moving on..."
                )
@@ -195,25 +240,12 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                f"approximate chunk count in Vespa. Got {approx_chunk_count_in_vespa}."
            )

+            # 3. Do the actual migration in batches until we run out of time.
            while (
                time.monotonic() - task_start_time < MIGRATION_TASK_SOFT_TIME_LIMIT_S
                and lock.owned()
            ):
-                (
-                    continuation_token_map,
-                    total_chunks_migrated,
-                ) = get_vespa_visit_state(db_session)
-                if is_continuation_token_done_for_all_slices(continuation_token_map):
-                    task_logger.info(
-                        f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
-                    )
-                    mark_migration_completed_time_if_not_set_with_commit(db_session)
-                    break
-                task_logger.debug(
-                    f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
-                    f"Continuation token map: {continuation_token_map}"
-                )
-
+                # 3.a. Get the next batch of raw chunks from Vespa.
                get_vespa_chunks_start_time = time.monotonic()
                raw_vespa_chunks, next_continuation_token_map = (
                    vespa_document_index.get_all_raw_document_chunks_paginated(
@@ -226,6 +258,7 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                    f"seconds. Next continuation token map: {next_continuation_token_map}"
                )

+                # 3.b. Transform the raw chunks to OpenSearch chunks in memory.
                opensearch_document_chunks, errored_chunks = (
                    transform_vespa_chunks_to_opensearch_chunks(
                        raw_vespa_chunks,
@@ -240,6 +273,7 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                        "errored."
                    )

+                # 3.c. Index the OpenSearch chunks into OpenSearch.
                index_opensearch_chunks_start_time = time.monotonic()
                opensearch_document_index.index_raw_chunks(
                    chunks=opensearch_document_chunks
@@ -251,12 +285,38 @@ def migrate_chunks_from_vespa_to_opensearch_task(

                total_chunks_migrated_this_task += len(opensearch_document_chunks)
                total_chunks_errored_this_task += len(errored_chunks)
-                update_vespa_visit_progress_with_commit(
-                    db_session,
-                    continuation_token_map=next_continuation_token_map,
-                    chunks_processed=len(opensearch_document_chunks),
-                    chunks_errored=len(errored_chunks),
-                    approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
+
+                # Do as much as we can with a DB session in one spot to not hold a
+                # session during a migration batch.
+                with get_session_with_current_tenant() as db_session:
+                    # 3.d. Update the migration state.
+                    update_vespa_visit_progress_with_commit(
+                        db_session,
+                        continuation_token_map=next_continuation_token_map,
+                        chunks_processed=len(opensearch_document_chunks),
+                        chunks_errored=len(errored_chunks),
+                        approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
+                    )
+
+                    # 3.e. Get the current migration state. Even thought we
+                    # technically have it in-memory since we just wrote it, we
+                    # want to reference the DB as the source of truth at all
+                    # times.
+                    continuation_token_map, total_chunks_migrated = (
+                        get_vespa_visit_state(db_session)
+                    )
+                    # 3.e.1. Check if the migration is done.
+                    if is_continuation_token_done_for_all_slices(
+                        continuation_token_map
+                    ):
+                        task_logger.info(
+                            f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
+                        )
+                        mark_migration_completed_time_if_not_set_with_commit(db_session)
+                        return True
+                task_logger.debug(
+                    f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
+                    f"Continuation token map: {continuation_token_map}"
                )
    except Exception:
        traceback.print_exc()
--- a/backend/onyx/db/opensearch_migration.py
+++ b/backend/onyx/db/opensearch_migration.py
@@ -324,6 +324,15 @@ def mark_migration_completed_time_if_not_set_with_commit(
    db_session.commit()


+def is_migration_completed(db_session: Session) -> bool:
+    """Returns True if the migration is completed.
+
+    Can be run even if the migration record does not exist.
+    """
+    record = db_session.query(OpenSearchTenantMigrationRecord).first()
+    return record is not None and record.migration_completed_at is not None
+
+
 def build_sanitized_to_original_doc_id_mapping(
    db_session: Session,
 ) -> dict[str, str]:
--- a/backend/onyx/document_index/opensearch/schema.py
+++ b/backend/onyx/document_index/opensearch/schema.py
@@ -1,3 +1,4 @@
+import hashlib
 from datetime import datetime
 from datetime import timezone
 from typing import Any
@@ -20,9 +21,13 @@ from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
 from onyx.document_index.opensearch.constants import EF_CONSTRUCTION
 from onyx.document_index.opensearch.constants import EF_SEARCH
 from onyx.document_index.opensearch.constants import M
+from onyx.document_index.opensearch.string_filtering import DocumentIDTooLongError
 from onyx.document_index.opensearch.string_filtering import (
    filter_and_validate_document_id,
 )
+from onyx.document_index.opensearch.string_filtering import (
+    MAX_DOCUMENT_ID_ENCODED_LENGTH,
+)
 from onyx.utils.tenant import get_tenant_id_short_string
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import get_current_tenant_id
@@ -75,17 +80,50 @@ def get_opensearch_doc_chunk_id(

    This will be the string used to identify the chunk in OpenSearch. Any direct
    chunk queries should use this function.
+
+    If the document ID is too long, a hash of the ID is used instead.
    """
-    sanitized_document_id = filter_and_validate_document_id(document_id)
-    opensearch_doc_chunk_id = (
-        f"{sanitized_document_id}__{max_chunk_size}__{chunk_index}"
+    opensearch_doc_chunk_id_suffix: str = f"__{max_chunk_size}__{chunk_index}"
+    encoded_suffix_length: int = len(opensearch_doc_chunk_id_suffix.encode("utf-8"))
+    max_encoded_permissible_doc_id_length: int = (
+        MAX_DOCUMENT_ID_ENCODED_LENGTH - encoded_suffix_length
    )
+    opensearch_doc_chunk_id_tenant_prefix: str = ""
    if tenant_state.multitenant:
+        short_tenant_id: str = get_tenant_id_short_string(tenant_state.tenant_id)
        # Use tenant ID because in multitenant mode each tenant has its own
        # Documents table, so there is a very small chance that doc IDs are not
        # actually unique across all tenants.
-        short_tenant_id = get_tenant_id_short_string(tenant_state.tenant_id)
-        opensearch_doc_chunk_id = f"{short_tenant_id}__{opensearch_doc_chunk_id}"
+        opensearch_doc_chunk_id_tenant_prefix = f"{short_tenant_id}__"
+        encoded_prefix_length: int = len(
+            opensearch_doc_chunk_id_tenant_prefix.encode("utf-8")
+        )
+        max_encoded_permissible_doc_id_length -= encoded_prefix_length
+
+    try:
+        sanitized_document_id: str = filter_and_validate_document_id(
+            document_id, max_encoded_length=max_encoded_permissible_doc_id_length
+        )
+    except DocumentIDTooLongError:
+        # If the document ID is too long, use a hash instead.
+        # We use blake2b because it is faster and equally secure as SHA256, and
+        # accepts digest_size which controls the number of bytes returned in the
+        # hash.
+        # digest_size is the size of the returned hash in bytes. Since we're
+        # decoding the hash bytes as a hex string, the digest_size should be
+        # half the max target size of the hash string.
+        # Subtract 1 because filter_and_validate_document_id compares on >= on
+        # max_encoded_length.
+        # 64 is the max digest_size blake2b returns.
+        digest_size: int = min((max_encoded_permissible_doc_id_length - 1) // 2, 64)
+        sanitized_document_id = hashlib.blake2b(
+            document_id.encode("utf-8"), digest_size=digest_size
+        ).hexdigest()
+
+    opensearch_doc_chunk_id: str = (
+        f"{opensearch_doc_chunk_id_tenant_prefix}{sanitized_document_id}{opensearch_doc_chunk_id_suffix}"
+    )
+
    # Do one more validation to ensure we haven't exceeded the max length.
    opensearch_doc_chunk_id = filter_and_validate_document_id(opensearch_doc_chunk_id)
    return opensearch_doc_chunk_id
--- a/backend/onyx/document_index/opensearch/string_filtering.py
+++ b/backend/onyx/document_index/opensearch/string_filtering.py
@@ -1,7 +1,15 @@
 import re

+MAX_DOCUMENT_ID_ENCODED_LENGTH: int = 512

-def filter_and_validate_document_id(document_id: str) -> str:
+
+class DocumentIDTooLongError(ValueError):
+    """Raised when a document ID is too long for OpenSearch after filtering."""
+
+
+def filter_and_validate_document_id(
+    document_id: str, max_encoded_length: int = MAX_DOCUMENT_ID_ENCODED_LENGTH
+) -> str:
    """
    Filters and validates a document ID such that it can be used as an ID in
    OpenSearch.
@@ -19,9 +27,13 @@ def filter_and_validate_document_id(document_id: str) -> str:

    Args:
        document_id: The document ID to filter and validate.
+        max_encoded_length: The maximum length of the document ID after
+            filtering in bytes. Compared with >= for extra resilience, so
+            encoded values of this length will fail.

    Raises:
-        ValueError: If the document ID is empty or too long after filtering.
+        DocumentIDTooLongError: If the document ID is too long after filtering.
+        ValueError: If the document ID is empty after filtering.

    Returns:
        str: The filtered document ID.
@@ -29,6 +41,8 @@ def filter_and_validate_document_id(document_id: str) -> str:
    filtered_document_id = re.sub(r"[^A-Za-z0-9_.\-~]", "", document_id)
    if not filtered_document_id:
        raise ValueError(f"Document ID {document_id} is empty after filtering.")
-    if len(filtered_document_id.encode("utf-8")) >= 512:
-        raise ValueError(f"Document ID {document_id} is too long after filtering.")
+    if len(filtered_document_id.encode("utf-8")) >= max_encoded_length:
+        raise DocumentIDTooLongError(
+            f"Document ID {document_id} is too long after filtering."
+        )
    return filtered_document_id
--- a/backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py
+++ b/backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py
@@ -0,0 +1,203 @@
+import pytest
+
+from onyx.document_index.interfaces_new import TenantState
+from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
+from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
+from onyx.document_index.opensearch.string_filtering import (
+    MAX_DOCUMENT_ID_ENCODED_LENGTH,
+)
+from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
+
+
+SINGLE_TENANT_STATE = TenantState(
+    tenant_id=POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE, multitenant=False
+)
+MULTI_TENANT_STATE = TenantState(
+    tenant_id="tenant_abcdef12-3456-7890-abcd-ef1234567890", multitenant=True
+)
+EXPECTED_SHORT_TENANT = "abcdef12"
+
+
+class TestGetOpensearchDocChunkIdSingleTenant:
+    def test_basic(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "my-doc-id", chunk_index=0
+        )
+        assert result == f"my-doc-id__{DEFAULT_MAX_CHUNK_SIZE}__0"
+
+    def test_custom_chunk_size(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "doc1", chunk_index=3, max_chunk_size=1024
+        )
+        assert result == "doc1__1024__3"
+
+    def test_special_chars_are_stripped(self) -> None:
+        """Tests characters not matching [A-Za-z0-9_.-~] are removed."""
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "doc/with?special#chars&more%stuff", chunk_index=0
+        )
+        assert "/" not in result
+        assert "?" not in result
+        assert "#" not in result
+        assert result == f"docwithspecialcharsmorestuff__{DEFAULT_MAX_CHUNK_SIZE}__0"
+
+    def test_short_doc_id_not_hashed(self) -> None:
+        """
+        Tests that a short doc ID should appear directly in the result, not as a
+        hash.
+        """
+        doc_id = "short-id"
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        assert "short-id" in result
+
+    def test_long_doc_id_is_hashed(self) -> None:
+        """
+        Tests that a doc ID exceeding the max length should be replaced with a
+        blake2b hash.
+        """
+        # Create a doc ID that will exceed max length after the suffix is
+        # appended.
+        doc_id = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        # The original doc ID should NOT appear in the result.
+        assert doc_id not in result
+        # The suffix should still be present.
+        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
+
+    def test_long_doc_id_hash_is_deterministic(self) -> None:
+        doc_id = "x" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result1 = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id, chunk_index=5
+        )
+        result2 = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id, chunk_index=5
+        )
+        assert result1 == result2
+
+    def test_long_doc_id_different_inputs_produce_different_hashes(self) -> None:
+        doc_id_a = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        doc_id_b = "b" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result_a = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id_a, chunk_index=0
+        )
+        result_b = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id_b, chunk_index=0
+        )
+        assert result_a != result_b
+
+    def test_result_never_exceeds_max_length(self) -> None:
+        """
+        Tests that the final result should always be under
+        MAX_DOCUMENT_ID_ENCODED_LENGTH bytes.
+        """
+        doc_id = "z" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
+        )
+        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
+
+    def test_no_tenant_prefix_in_single_tenant(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "mydoc", chunk_index=0
+        )
+        assert not result.startswith(SINGLE_TENANT_STATE.tenant_id)
+
+
+class TestGetOpensearchDocChunkIdMultiTenant:
+    def test_includes_tenant_prefix(self) -> None:
+        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, "mydoc", chunk_index=0)
+        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
+
+    def test_format(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            MULTI_TENANT_STATE, "mydoc", chunk_index=2, max_chunk_size=256
+        )
+        assert result == f"{EXPECTED_SHORT_TENANT}__mydoc__256__2"
+
+    def test_long_doc_id_is_hashed_multitenant(self) -> None:
+        doc_id = "d" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
+        # Should still have tenant prefix.
+        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
+        # The original doc ID should NOT appear in the result.
+        assert doc_id not in result
+        # The suffix should still be present.
+        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
+
+    def test_result_never_exceeds_max_length_multitenant(self) -> None:
+        doc_id = "q" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
+        result = get_opensearch_doc_chunk_id(
+            MULTI_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
+        )
+        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
+
+    def test_different_tenants_produce_different_ids(self) -> None:
+        tenant_a = TenantState(
+            tenant_id="tenant_aaaaaaaa-0000-0000-0000-000000000000", multitenant=True
+        )
+        tenant_b = TenantState(
+            tenant_id="tenant_bbbbbbbb-0000-0000-0000-000000000000", multitenant=True
+        )
+        result_a = get_opensearch_doc_chunk_id(tenant_a, "same-doc", chunk_index=0)
+        result_b = get_opensearch_doc_chunk_id(tenant_b, "same-doc", chunk_index=0)
+        assert result_a != result_b
+
+
+class TestGetOpensearchDocChunkIdEdgeCases:
+    def test_chunk_index_zero(self) -> None:
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "doc", chunk_index=0)
+        assert result.endswith("__0")
+
+    def test_large_chunk_index(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "doc", chunk_index=99999
+        )
+        assert result.endswith("__99999")
+
+    def test_doc_id_with_only_special_chars_raises(self) -> None:
+        """
+        Tests that a doc ID that becomes empty after filtering should raise
+        ValueError.
+        """
+        with pytest.raises(ValueError, match="empty after filtering"):
+            get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "###???///", chunk_index=0)
+
+    def test_doc_id_at_boundary_length(self) -> None:
+        """
+        Tests that a doc ID right at the boundary should not be hashed.
+        """
+        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
+        suffix_len = len(suffix.encode("utf-8"))
+        # Max doc ID length that won't trigger hashing (must be <
+        # max_encoded_length).
+        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - 1
+        doc_id = "a" * max_doc_len
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        assert doc_id in result
+
+    def test_doc_id_at_boundary_length_multitenant(self) -> None:
+        """
+        Tests that a doc ID right at the boundary should not be hashed in
+        multitenant mode.
+        """
+        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
+        suffix_len = len(suffix.encode("utf-8"))
+        prefix = f"{EXPECTED_SHORT_TENANT}__"
+        prefix_len = len(prefix.encode("utf-8"))
+        # Max doc ID length that won't trigger hashing (must be <
+        # max_encoded_length).
+        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - prefix_len - 1
+        doc_id = "a" * max_doc_len
+        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
+        assert doc_id in result
+
+    def test_doc_id_one_over_boundary_is_hashed(self) -> None:
+        """
+        Tests that a doc ID one byte over the boundary should be hashed.
+        """
+        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
+        suffix_len = len(suffix.encode("utf-8"))
+        # This length will trigger the >= check in filter_and_validate_document_id
+        doc_id = "a" * (MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len)
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        assert doc_id not in result
--- a/cli/cmd/agents.go
+++ b/cli/cmd/agents.go
@@ -7,7 +7,6 @@ import (

 	"github.com/onyx-dot-app/onyx/cli/internal/api"
 	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/exitcodes"
 	"github.com/spf13/cobra"
 )

@@ -17,23 +16,16 @@ func newAgentsCmd() *cobra.Command {
 	cmd := &cobra.Command{
 		Use:   "agents",
 		Short: "List available agents",
-		Long: `List all visible agents configured on the Onyx server.
-
-By default, output is a human-readable table with ID, name, and description.
-Use --json for machine-readable output.`,
-		Example: `  onyx-cli agents
-  onyx-cli agents --json
-  onyx-cli agents --json | jq '.[].name'`,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			cfg := config.Load()
 			if !cfg.IsConfigured() {
-				return exitcodes.New(exitcodes.NotConfigured, "onyx CLI is not configured\n  Run: onyx-cli configure")
+				return fmt.Errorf("onyx CLI is not configured — run 'onyx-cli configure' first")
 			}

 			client := api.NewClient(cfg)
 			agents, err := client.ListAgents(cmd.Context())
 			if err != nil {
-				return fmt.Errorf("failed to list agents: %w\n  Check your connection with: onyx-cli validate-config", err)
+				return fmt.Errorf("failed to list agents: %w", err)
 			}

 			if agentsJSON {
--- a/cli/cmd/ask.go
+++ b/cli/cmd/ask.go
@@ -4,65 +4,33 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"io"
 	"os"
 	"os/signal"
-	"strings"
 	"syscall"

 	"github.com/onyx-dot-app/onyx/cli/internal/api"
 	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/exitcodes"
 	"github.com/onyx-dot-app/onyx/cli/internal/models"
-	"github.com/onyx-dot-app/onyx/cli/internal/overflow"
 	"github.com/spf13/cobra"
-	"golang.org/x/term"
 )

-const defaultMaxOutputBytes = 4096
-
 func newAskCmd() *cobra.Command {
 	var (
 		askAgentID int
 		askJSON    bool
-		askQuiet   bool
-		askPrompt  string
-		maxOutput  int
 	)

 	cmd := &cobra.Command{
 		Use:   "ask [question]",
 		Short: "Ask a one-shot question (non-interactive)",
-		Long: `Send a one-shot question to an Onyx agent and print the response.
-
-The question can be provided as a positional argument, via --prompt, or piped
-through stdin. When stdin contains piped data, it is sent as context along
-with the question from --prompt (or used as the question itself).
-
-When stdout is not a TTY (e.g., called by a script or AI agent), output is
-automatically truncated to --max-output bytes and the full response is saved
-to a temp file. Set --max-output 0 to disable truncation.`,
-		Args: cobra.MaximumNArgs(1),
-		Example: `  onyx-cli ask "What connectors are available?"
-  onyx-cli ask --agent-id 3 "Summarize our Q4 revenue"
-  onyx-cli ask --json "List all users" | jq '.event.content'
-  cat error.log | onyx-cli ask --prompt "Find the root cause"
-  echo "what is onyx?" | onyx-cli ask`,
+		Args:  cobra.ExactArgs(1),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			cfg := config.Load()
 			if !cfg.IsConfigured() {
-				return exitcodes.New(exitcodes.NotConfigured, "onyx CLI is not configured\n  Run: onyx-cli configure")
-			}
-
-			if askJSON && askQuiet {
-				return exitcodes.New(exitcodes.BadRequest, "--json and --quiet cannot be used together")
-			}
-
-			question, err := resolveQuestion(args, askPrompt)
-			if err != nil {
-				return err
+				return fmt.Errorf("onyx CLI is not configured — run 'onyx-cli configure' first")
 			}

+			question := args[0]
 			agentID := cfg.DefaultAgentID
 			if cmd.Flags().Changed("agent-id") {
 				agentID = askAgentID
@@ -82,23 +50,9 @@ to a temp file. Set --max-output 0 to disable truncation.`,
 				nil,
 			)

-			// Determine truncation threshold.
-			isTTY := term.IsTerminal(int(os.Stdout.Fd()))
-			truncateAt := 0 // 0 means no truncation
-			if cmd.Flags().Changed("max-output") {
-				truncateAt = maxOutput
-			} else if !isTTY {
-				truncateAt = defaultMaxOutputBytes
-			}
-
 			var sessionID string
 			var lastErr error
 			gotStop := false
-
-			// Overflow writer: tees to stdout and optionally to a temp file.
-			// In quiet mode, buffer everything and print once at the end.
-			ow := &overflow.Writer{Limit: truncateAt, Quiet: askQuiet}
-
 			for event := range ch {
 				if e, ok := event.(models.SessionCreatedEvent); ok {
 					sessionID = e.ChatSessionID
@@ -128,50 +82,22 @@ to a temp file. Set --max-output 0 to disable truncation.`,

 				switch e := event.(type) {
 				case models.MessageDeltaEvent:
-					ow.Write(e.Content)
-				case models.SearchStartEvent:
-					if isTTY && !askQuiet {
-						if e.IsInternetSearch {
-							fmt.Fprintf(os.Stderr, "\033[2mSearching the web...\033[0m\n")
-						} else {
-							fmt.Fprintf(os.Stderr, "\033[2mSearching documents...\033[0m\n")
-						}
-					}
-				case models.SearchQueriesEvent:
-					if isTTY && !askQuiet {
-						for _, q := range e.Queries {
-							fmt.Fprintf(os.Stderr, "\033[2m  → %s\033[0m\n", q)
-						}
-					}
-				case models.SearchDocumentsEvent:
-					if isTTY && !askQuiet && len(e.Documents) > 0 {
-						fmt.Fprintf(os.Stderr, "\033[2mFound %d documents\033[0m\n", len(e.Documents))
-					}
-				case models.ReasoningStartEvent:
-					if isTTY && !askQuiet {
-						fmt.Fprintf(os.Stderr, "\033[2mThinking...\033[0m\n")
-					}
-				case models.ToolStartEvent:
-					if isTTY && !askQuiet && e.ToolName != "" {
-						fmt.Fprintf(os.Stderr, "\033[2mUsing %s...\033[0m\n", e.ToolName)
-					}
+					fmt.Print(e.Content)
 				case models.ErrorEvent:
-					ow.Finish()
 					return fmt.Errorf("%s", e.Error)
 				case models.StopEvent:
-					ow.Finish()
+					fmt.Println()
 					return nil
 				}
 			}

-			if !askJSON {
-				ow.Finish()
-			}
-
 			if ctx.Err() != nil {
 				if sessionID != "" {
 					client.StopChatSession(context.Background(), sessionID)
 				}
+				if !askJSON {
+					fmt.Println()
+				}
 				return nil
 			}

@@ -179,56 +105,20 @@ to a temp file. Set --max-output 0 to disable truncation.`,
 				return lastErr
 			}
 			if !gotStop {
+				if !askJSON {
+					fmt.Println()
+				}
 				return fmt.Errorf("stream ended unexpectedly")
 			}
+			if !askJSON {
+				fmt.Println()
+			}
 			return nil
 		},
 	}

 	cmd.Flags().IntVar(&askAgentID, "agent-id", 0, "Agent ID to use")
 	cmd.Flags().BoolVar(&askJSON, "json", false, "Output raw JSON events")
-	cmd.Flags().BoolVarP(&askQuiet, "quiet", "q", false, "Buffer output and print once at end (no streaming)")
-	cmd.Flags().StringVar(&askPrompt, "prompt", "", "Question text (use with piped stdin context)")
-	cmd.Flags().IntVar(&maxOutput, "max-output", defaultMaxOutputBytes,
-		"Max bytes to print before truncating (0 to disable, auto-enabled for non-TTY)")
+	// Suppress cobra's default error/usage on RunE errors
 	return cmd
 }
-
-// resolveQuestion builds the final question string from args, --prompt, and stdin.
-func resolveQuestion(args []string, prompt string) (string, error) {
-	hasArg := len(args) > 0
-	hasPrompt := prompt != ""
-	hasStdin := !term.IsTerminal(int(os.Stdin.Fd()))
-
-	if hasArg && hasPrompt {
-		return "", exitcodes.New(exitcodes.BadRequest, "specify the question as an argument or --prompt, not both")
-	}
-
-	var stdinContent string
-	if hasStdin {
-		const maxStdinBytes = 10 * 1024 * 1024 // 10MB
-		data, err := io.ReadAll(io.LimitReader(os.Stdin, maxStdinBytes))
-		if err != nil {
-			return "", fmt.Errorf("failed to read stdin: %w", err)
-		}
-		stdinContent = strings.TrimSpace(string(data))
-	}
-
-	switch {
-	case hasArg && stdinContent != "":
-		// arg is the question, stdin is context
-		return args[0] + "\n\n" + stdinContent, nil
-	case hasArg:
-		return args[0], nil
-	case hasPrompt && stdinContent != "":
-		// --prompt is the question, stdin is context
-		return prompt + "\n\n" + stdinContent, nil
-	case hasPrompt:
-		return prompt, nil
-	case stdinContent != "":
-		return stdinContent, nil
-	default:
-		return "", exitcodes.New(exitcodes.BadRequest, "no question provided\n  Usage: onyx-cli ask \"your question\"\n  Or:    echo \"context\" | onyx-cli ask --prompt \"your question\"")
-	}
-}
-
--- a/cli/cmd/chat.go
+++ b/cli/cmd/chat.go
@@ -13,11 +13,6 @@ func newChatCmd() *cobra.Command {
 	return &cobra.Command{
 		Use:   "chat",
 		Short: "Launch the interactive chat TUI (default)",
-		Long: `Launch the interactive terminal UI for chatting with your Onyx agent.
-This is the default command when no subcommand is specified. On first run,
-an interactive setup wizard will guide you through configuration.`,
-		Example: `  onyx-cli chat
-  onyx-cli`,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			cfg := config.Load()

--- a/cli/cmd/configure.go
+++ b/cli/cmd/configure.go
@@ -1,126 +1,19 @@
 package cmd

 import (
-	"context"
-	"errors"
-	"fmt"
-	"io"
-	"os"
-	"strings"
-	"time"
-
-	"github.com/onyx-dot-app/onyx/cli/internal/api"
 	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/exitcodes"
 	"github.com/onyx-dot-app/onyx/cli/internal/onboarding"
 	"github.com/spf13/cobra"
-	"golang.org/x/term"
 )

 func newConfigureCmd() *cobra.Command {
-	var (
-		serverURL   string
-		apiKey      string
-		apiKeyStdin bool
-		dryRun      bool
-	)
-
-	cmd := &cobra.Command{
+	return &cobra.Command{
 		Use:   "configure",
 		Short: "Configure server URL and API key",
-		Long: `Set up the Onyx CLI with your server URL and API key.
-
-When --server-url and --api-key are both provided, the configuration is saved
-non-interactively (useful for scripts and AI agents). Otherwise, an interactive
-setup wizard is launched.
-
-If --api-key is omitted but stdin has piped data, the API key is read from
-stdin automatically. You can also use --api-key-stdin to make this explicit.
-This avoids leaking the key in shell history.
-
-Use --dry-run to test the connection without saving the configuration.`,
-		Example: `  onyx-cli configure
-  onyx-cli configure --server-url https://my-onyx.com --api-key sk-...
-  echo "$ONYX_API_KEY" | onyx-cli configure --server-url https://my-onyx.com
-  echo "$ONYX_API_KEY" | onyx-cli configure --server-url https://my-onyx.com --api-key-stdin
-  onyx-cli configure --server-url https://my-onyx.com --api-key sk-... --dry-run`,
 		RunE: func(cmd *cobra.Command, args []string) error {
-			// Read API key from stdin if piped (implicit) or --api-key-stdin (explicit)
-			if apiKeyStdin && apiKey != "" {
-				return exitcodes.New(exitcodes.BadRequest, "--api-key and --api-key-stdin cannot be used together")
-			}
-			if (apiKey == "" && !term.IsTerminal(int(os.Stdin.Fd()))) || apiKeyStdin {
-				data, err := io.ReadAll(os.Stdin)
-				if err != nil {
-					return fmt.Errorf("failed to read API key from stdin: %w", err)
-				}
-				apiKey = strings.TrimSpace(string(data))
-			}
-
-			if serverURL != "" && apiKey != "" {
-				return configureNonInteractive(serverURL, apiKey, dryRun)
-			}
-
-			if dryRun {
-				return exitcodes.New(exitcodes.BadRequest, "--dry-run requires --server-url and --api-key")
-			}
-
-			if serverURL != "" || apiKey != "" {
-				return exitcodes.New(exitcodes.BadRequest, "both --server-url and --api-key are required for non-interactive setup\n  Run 'onyx-cli configure' without flags for interactive setup")
-			}
-
 			cfg := config.Load()
 			onboarding.Run(&cfg)
 			return nil
 		},
 	}
-
-	cmd.Flags().StringVar(&serverURL, "server-url", "", "Onyx server URL (e.g., https://cloud.onyx.app)")
-	cmd.Flags().StringVar(&apiKey, "api-key", "", "API key for authentication (or pipe via stdin)")
-	cmd.Flags().BoolVar(&apiKeyStdin, "api-key-stdin", false, "Read API key from stdin (explicit; also happens automatically when stdin is piped)")
-	cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Test connection without saving config (requires --server-url and --api-key)")
-
-	return cmd
-}
-
-func configureNonInteractive(serverURL, apiKey string, dryRun bool) error {
-	cfg := config.OnyxCliConfig{
-		ServerURL:      serverURL,
-		APIKey:         apiKey,
-		DefaultAgentID: 0,
-	}
-
-	// Preserve existing default agent ID from disk (not env overrides)
-	if existing := config.LoadFromDisk(); existing.DefaultAgentID != 0 {
-		cfg.DefaultAgentID = existing.DefaultAgentID
-	}
-
-	// Test connection
-	client := api.NewClient(cfg)
-	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
-	defer cancel()
-
-	if err := client.TestConnection(ctx); err != nil {
-		var authErr *api.AuthError
-		if errors.As(err, &authErr) {
-			return exitcodes.Newf(exitcodes.AuthFailure, "authentication failed: %v\n  Check your API key", err)
-		}
-		return exitcodes.Newf(exitcodes.Unreachable, "connection failed: %v\n  Check your server URL", err)
-	}
-
-	if dryRun {
-		fmt.Printf("Server:  %s\n", serverURL)
-		fmt.Println("Status:  connected and authenticated")
-		fmt.Println("Dry run: config was NOT saved")
-		return nil
-	}
-
-	if err := config.Save(cfg); err != nil {
-		return fmt.Errorf("could not save config: %w", err)
-	}
-
-	fmt.Printf("Config:  %s\n", config.ConfigFilePath())
-	fmt.Printf("Server:  %s\n", serverURL)
-	fmt.Println("Status:  connected and authenticated")
-	return nil
 }
--- a/cli/cmd/serve.go
+++ b/cli/cmd/serve.go
@@ -23,7 +23,6 @@ import (
 	"github.com/charmbracelet/wish/ratelimiter"
 	"github.com/onyx-dot-app/onyx/cli/internal/api"
 	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/exitcodes"
 	"github.com/onyx-dot-app/onyx/cli/internal/tui"
 	"github.com/spf13/cobra"
 	"golang.org/x/time/rate"
@@ -296,15 +295,15 @@ provided via the ONYX_API_KEY environment variable to skip the prompt:
 The server URL is taken from the server operator's config. The server
 auto-generates an Ed25519 host key on first run if the key file does not
 already exist. The host key path can also be set via the ONYX_SSH_HOST_KEY
-environment variable (the --host-key flag takes precedence).`,
-		Example: `  onyx-cli serve --port 2222
-  ssh localhost -p 2222
-  onyx-cli serve --host 0.0.0.0 --port 2222
-  onyx-cli serve --idle-timeout 30m --max-session-timeout 2h`,
+environment variable (the --host-key flag takes precedence).
+
+Example:
+  onyx-cli serve --port 2222
+  ssh localhost -p 2222`,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			serverCfg := config.Load()
 			if serverCfg.ServerURL == "" {
-				return exitcodes.New(exitcodes.NotConfigured, "server URL is not configured\n  Run: onyx-cli configure")
+				return fmt.Errorf("server URL is not configured; run 'onyx-cli configure' first")
 			}
 			if !cmd.Flags().Changed("host-key") {
 				if v := os.Getenv(config.EnvSSHHostKey); v != "" {
--- a/cli/cmd/validate.go
+++ b/cli/cmd/validate.go
@@ -2,13 +2,11 @@ package cmd

 import (
 	"context"
-	"errors"
 	"fmt"
 	"time"

 	"github.com/onyx-dot-app/onyx/cli/internal/api"
 	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/exitcodes"
 	"github.com/onyx-dot-app/onyx/cli/internal/version"
 	log "github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"
@@ -18,21 +16,17 @@ func newValidateConfigCmd() *cobra.Command {
 	return &cobra.Command{
 		Use:   "validate-config",
 		Short: "Validate configuration and test server connection",
-		Long: `Check that the CLI is configured, the server is reachable, and the API key
-is valid. Also reports the server version and warns if it is below the
-minimum required.`,
-		Example: `  onyx-cli validate-config`,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			// Check config file
 			if !config.ConfigExists() {
-				return exitcodes.Newf(exitcodes.NotConfigured, "config file not found at %s\n  Run: onyx-cli configure", config.ConfigFilePath())
+				return fmt.Errorf("config file not found at %s\n  Run 'onyx-cli configure' to set up", config.ConfigFilePath())
 			}

 			cfg := config.Load()

 			// Check API key
 			if !cfg.IsConfigured() {
-				return exitcodes.New(exitcodes.NotConfigured, "API key is missing\n  Run: onyx-cli configure")
+				return fmt.Errorf("API key is missing\n  Run 'onyx-cli configure' to set up")
 			}

 			_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Config:  %s\n", config.ConfigFilePath())
@@ -41,11 +35,7 @@ minimum required.`,
 			// Test connection
 			client := api.NewClient(cfg)
 			if err := client.TestConnection(cmd.Context()); err != nil {
-				var authErr *api.AuthError
-				if errors.As(err, &authErr) {
-					return exitcodes.Newf(exitcodes.AuthFailure, "authentication failed: %v\n  Reconfigure with: onyx-cli configure", err)
-				}
-				return exitcodes.Newf(exitcodes.Unreachable, "connection failed: %v\n  Reconfigure with: onyx-cli configure", err)
+				return fmt.Errorf("connection failed: %w", err)
 			}

 			_, _ = fmt.Fprintln(cmd.OutOrStdout(), "Status:  connected and authenticated")
--- a/cli/internal/api/client.go
+++ b/cli/internal/api/client.go
@@ -149,12 +149,12 @@ func (c *Client) TestConnection(ctx context.Context) error {

 	if resp2.StatusCode == 401 || resp2.StatusCode == 403 {
 		if isHTML || strings.Contains(respServer, "awselb") {
-			return &AuthError{Message: fmt.Sprintf("HTTP %d from a reverse proxy (not the Onyx backend).\n  Check your deployment's ingress / proxy configuration", resp2.StatusCode)}
+			return fmt.Errorf("HTTP %d from a reverse proxy (not the Onyx backend).\n  Check your deployment's ingress / proxy configuration", resp2.StatusCode)
 		}
 		if resp2.StatusCode == 401 {
-			return &AuthError{Message: fmt.Sprintf("invalid API key or token.\n  %s", body)}
+			return fmt.Errorf("invalid API key or token.\n  %s", body)
 		}
-		return &AuthError{Message: fmt.Sprintf("access denied — check that the API key is valid.\n  %s", body)}
+		return fmt.Errorf("access denied — check that the API key is valid.\n  %s", body)
 	}

 	detail := fmt.Sprintf("HTTP %d", resp2.StatusCode)
--- a/cli/internal/api/errors.go
+++ b/cli/internal/api/errors.go
@@ -11,12 +11,3 @@ type OnyxAPIError struct {
 func (e *OnyxAPIError) Error() string {
 	return fmt.Sprintf("HTTP %d: %s", e.StatusCode, e.Detail)
 }
-
-// AuthError is returned when authentication or authorization fails.
-type AuthError struct {
-	Message string
-}
-
-func (e *AuthError) Error() string {
-	return e.Message
-}
--- a/cli/internal/config/config.go
+++ b/cli/internal/config/config.go
@@ -59,10 +59,8 @@ func ConfigExists() bool {
 	return err == nil
 }

-// LoadFromDisk reads config from the file only, without applying environment
-// variable overrides. Use this when you need the persisted config values
-// (e.g., to preserve them during a save operation).
-func LoadFromDisk() OnyxCliConfig {
+// Load reads config from file and applies environment variable overrides.
+func Load() OnyxCliConfig {
 	cfg := DefaultConfig()

 	data, err := os.ReadFile(ConfigFilePath())
@@ -72,13 +70,6 @@ func LoadFromDisk() OnyxCliConfig {
 		}
 	}

-	return cfg
-}
-
-// Load reads config from file and applies environment variable overrides.
-func Load() OnyxCliConfig {
-	cfg := LoadFromDisk()
-
 	// Environment overrides
 	if v := os.Getenv(EnvServerURL); v != "" {
 		cfg.ServerURL = v
--- a/cli/internal/exitcodes/codes.go
+++ b/cli/internal/exitcodes/codes.go
@@ -1,33 +0,0 @@
-// Package exitcodes defines semantic exit codes for the Onyx CLI.
-package exitcodes
-
-import "fmt"
-
-const (
-	Success       = 0
-	General       = 1
-	BadRequest    = 2 // invalid args / command-line errors (convention)
-	NotConfigured = 3
-	AuthFailure   = 4
-	Unreachable   = 5
-)
-
-// ExitError wraps an error with a specific exit code.
-type ExitError struct {
-	Code int
-	Err  error
-}
-
-func (e *ExitError) Error() string {
-	return e.Err.Error()
-}
-
-// New creates an ExitError with the given code and message.
-func New(code int, msg string) *ExitError {
-	return &ExitError{Code: code, Err: fmt.Errorf("%s", msg)}
-}
-
-// Newf creates an ExitError with a formatted message.
-func Newf(code int, format string, args ...any) *ExitError {
-	return &ExitError{Code: code, Err: fmt.Errorf(format, args...)}
-}
--- a/cli/internal/exitcodes/codes_test.go
+++ b/cli/internal/exitcodes/codes_test.go
@@ -1,40 +0,0 @@
-package exitcodes
-
-import (
-	"errors"
-	"fmt"
-	"testing"
-)
-
-func TestExitError_Error(t *testing.T) {
-	e := New(NotConfigured, "not configured")
-	if e.Error() != "not configured" {
-		t.Fatalf("expected 'not configured', got %q", e.Error())
-	}
-	if e.Code != NotConfigured {
-		t.Fatalf("expected code %d, got %d", NotConfigured, e.Code)
-	}
-}
-
-func TestExitError_Newf(t *testing.T) {
-	e := Newf(Unreachable, "cannot reach %s", "server")
-	if e.Error() != "cannot reach server" {
-		t.Fatalf("expected 'cannot reach server', got %q", e.Error())
-	}
-	if e.Code != Unreachable {
-		t.Fatalf("expected code %d, got %d", Unreachable, e.Code)
-	}
-}
-
-func TestExitError_ErrorsAs(t *testing.T) {
-	e := New(BadRequest, "bad input")
-	wrapped := fmt.Errorf("wrapper: %w", e)
-
-	var exitErr *ExitError
-	if !errors.As(wrapped, &exitErr) {
-		t.Fatal("errors.As should find ExitError")
-	}
-	if exitErr.Code != BadRequest {
-		t.Fatalf("expected code %d, got %d", BadRequest, exitErr.Code)
-	}
-}
--- a/cli/internal/overflow/writer.go
+++ b/cli/internal/overflow/writer.go
@@ -1,121 +0,0 @@
-// Package overflow provides a streaming writer that auto-truncates output
-// for non-TTY callers (e.g., AI agents, scripts). Full content is saved to
-// a temp file on disk; only the first N bytes are printed to stdout.
-package overflow
-
-import (
-	"fmt"
-	"os"
-	"strings"
-
-	log "github.com/sirupsen/logrus"
-)
-
-// Writer handles streaming output with optional truncation.
-// When Limit > 0, it streams to a temp file on disk (not memory) and stops
-// writing to stdout after Limit bytes. When Limit == 0, it writes directly
-// to stdout. In Quiet mode, it buffers in memory and prints once at the end.
-type Writer struct {
-	Limit      int
-	Quiet      bool
-	written    int
-	totalBytes int
-	truncated  bool
-	buf        strings.Builder // used only in quiet mode
-	tmpFile    *os.File        // used only in truncation mode (Limit > 0)
-}
-
-// Write sends a chunk of content through the writer.
-func (w *Writer) Write(s string) {
-	w.totalBytes += len(s)
-
-	// Quiet mode: buffer in memory, print nothing
-	if w.Quiet {
-		w.buf.WriteString(s)
-		return
-	}
-
-	if w.Limit <= 0 {
-		fmt.Print(s)
-		return
-	}
-
-	// Truncation mode: stream all content to temp file on disk
-	if w.tmpFile == nil {
-		f, err := os.CreateTemp("", "onyx-ask-*.txt")
-		if err != nil {
-			// Fall back to no-truncation if we can't create the file
-			fmt.Fprintf(os.Stderr, "warning: could not create temp file: %v\n", err)
-			w.Limit = 0
-			fmt.Print(s)
-			return
-		}
-		w.tmpFile = f
-	}
-	if _, err := w.tmpFile.WriteString(s); err != nil {
-		// Disk write failed — abandon truncation, stream directly to stdout
-		fmt.Fprintf(os.Stderr, "warning: temp file write failed: %v\n", err)
-		w.closeTmpFile(true)
-		w.Limit = 0
-		w.truncated = false
-		fmt.Print(s)
-		return
-	}
-
-	if w.truncated {
-		return
-	}
-
-	remaining := w.Limit - w.written
-	if len(s) <= remaining {
-		fmt.Print(s)
-		w.written += len(s)
-	} else {
-		if remaining > 0 {
-			fmt.Print(s[:remaining])
-			w.written += remaining
-		}
-		w.truncated = true
-	}
-}
-
-// Finish flushes remaining output. Call once after all Write calls are done.
-func (w *Writer) Finish() {
-	// Quiet mode: print buffered content at once
-	if w.Quiet {
-		fmt.Println(w.buf.String())
-		return
-	}
-
-	if !w.truncated {
-		w.closeTmpFile(true) // clean up unused temp file
-		fmt.Println()
-		return
-	}
-
-	// Close the temp file so it's readable
-	tmpPath := w.tmpFile.Name()
-	w.closeTmpFile(false) // close but keep the file
-
-	fmt.Printf("\n\n--- response truncated (%d bytes total) ---\n", w.totalBytes)
-	fmt.Printf("Full response: %s\n", tmpPath)
-	fmt.Printf("Explore:\n")
-	fmt.Printf("  cat %s | grep \"<pattern>\"\n", tmpPath)
-	fmt.Printf("  cat %s | tail -50\n", tmpPath)
-}
-
-// closeTmpFile closes and optionally removes the temp file.
-func (w *Writer) closeTmpFile(remove bool) {
-	if w.tmpFile == nil {
-		return
-	}
-	if err := w.tmpFile.Close(); err != nil {
-		log.Debugf("warning: failed to close temp file: %v", err)
-	}
-	if remove {
-		if err := os.Remove(w.tmpFile.Name()); err != nil {
-			log.Debugf("warning: failed to remove temp file: %v", err)
-		}
-	}
-	w.tmpFile = nil
-}
--- a/cli/internal/overflow/writer_test.go
+++ b/cli/internal/overflow/writer_test.go
@@ -1,95 +0,0 @@
-package overflow
-
-import (
-	"os"
-	"testing"
-)
-
-func TestWriter_NoLimit(t *testing.T) {
-	w := &Writer{Limit: 0}
-	w.Write("hello world")
-	if w.truncated {
-		t.Fatal("should not be truncated with limit 0")
-	}
-	if w.totalBytes != 11 {
-		t.Fatalf("expected 11 total bytes, got %d", w.totalBytes)
-	}
-}
-
-func TestWriter_UnderLimit(t *testing.T) {
-	w := &Writer{Limit: 100}
-	w.Write("hello")
-	w.Write(" world")
-	if w.truncated {
-		t.Fatal("should not be truncated when under limit")
-	}
-	if w.written != 11 {
-		t.Fatalf("expected 11 written bytes, got %d", w.written)
-	}
-}
-
-func TestWriter_OverLimit(t *testing.T) {
-	w := &Writer{Limit: 5}
-	w.Write("hello world") // 11 bytes, limit 5
-	if !w.truncated {
-		t.Fatal("should be truncated")
-	}
-	if w.written != 5 {
-		t.Fatalf("expected 5 written bytes, got %d", w.written)
-	}
-	if w.totalBytes != 11 {
-		t.Fatalf("expected 11 total bytes, got %d", w.totalBytes)
-	}
-	if w.tmpFile == nil {
-		t.Fatal("temp file should have been created")
-	}
-	_ = w.tmpFile.Close()
-	data, _ := os.ReadFile(w.tmpFile.Name())
-	_ = os.Remove(w.tmpFile.Name())
-	if string(data) != "hello world" {
-		t.Fatalf("temp file should contain full content, got %q", string(data))
-	}
-}
-
-func TestWriter_MultipleChunks(t *testing.T) {
-	w := &Writer{Limit: 10}
-	w.Write("hello") // 5 bytes
-	w.Write(" ")     // 6 bytes
-	w.Write("world") // 11 bytes, crosses limit
-	w.Write("!")     // 12 bytes, already truncated
-
-	if !w.truncated {
-		t.Fatal("should be truncated")
-	}
-	if w.written != 10 {
-		t.Fatalf("expected 10 written bytes, got %d", w.written)
-	}
-	if w.totalBytes != 12 {
-		t.Fatalf("expected 12 total bytes, got %d", w.totalBytes)
-	}
-	if w.tmpFile == nil {
-		t.Fatal("temp file should have been created")
-	}
-	_ = w.tmpFile.Close()
-	data, _ := os.ReadFile(w.tmpFile.Name())
-	_ = os.Remove(w.tmpFile.Name())
-	if string(data) != "hello world!" {
-		t.Fatalf("temp file should contain full content, got %q", string(data))
-	}
-}
-
-func TestWriter_QuietMode(t *testing.T) {
-	w := &Writer{Limit: 0, Quiet: true}
-	w.Write("hello")
-	w.Write(" world")
-
-	if w.written != 0 {
-		t.Fatalf("quiet mode should not write to stdout, got %d written", w.written)
-	}
-	if w.totalBytes != 11 {
-		t.Fatalf("expected 11 total bytes, got %d", w.totalBytes)
-	}
-	if w.buf.String() != "hello world" {
-		t.Fatalf("buffer should contain full content, got %q", w.buf.String())
-	}
-}
--- a/cli/main.go
+++ b/cli/main.go
@@ -1,12 +1,10 @@
 package main

 import (
-	"errors"
 	"fmt"
 	"os"

 	"github.com/onyx-dot-app/onyx/cli/cmd"
-	"github.com/onyx-dot-app/onyx/cli/internal/exitcodes"
 )

 var (
@@ -20,10 +18,6 @@ func main() {

 	if err := cmd.Execute(); err != nil {
 		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
-		var exitErr *exitcodes.ExitError
-		if errors.As(err, &exitErr) {
-			os.Exit(exitErr.Code)
-		}
 		os.Exit(1)
 	}
 }
--- a/web/src/app/css/colors.css
+++ b/web/src/app/css/colors.css
@@ -467,6 +467,10 @@

  /* Frost Overlay (for FrostedDiv component) - lighter in light mode */
  --frost-overlay: var(--alpha-grey-00-10);
+
+  /* Scrollbar */
+  --scrollbar-track: transparent;
+  --scrollbar-thumb: var(--alpha-grey-100-20);
 }

 /* Dark Colors */
@@ -671,4 +675,8 @@

  /* Frost Overlay (for FrostedDiv component) - darker in dark mode */
  --frost-overlay: var(--alpha-grey-100-10);
+
+  /* Scrollbar */
+  --scrollbar-track: transparent;
+  --scrollbar-thumb: var(--alpha-grey-00-20);
 }
--- a/web/src/app/globals.css
+++ b/web/src/app/globals.css
@@ -127,17 +127,8 @@
 }

@layer utilities {
-  /* Hide scrollbar for Chrome, Safari and Opera */
-  .no-scrollbar::-webkit-scrollbar {
-    display: none;
-  }
-
-  /* Hide scrollbar for IE, Edge and Firefox */
  .no-scrollbar {
-    -ms-overflow-style: none;
-    /* IE and Edge */
    scrollbar-width: none;
-    /* Firefox */
  }

  /* SHADOWS */
@@ -362,27 +353,9 @@

 /* SCROLL BAR */

-.default-scrollbar::-webkit-scrollbar {
-  width: 6px;
-}
-
-.default-scrollbar::-webkit-scrollbar-track {
-  background: #f1f1f1;
-}
-
-.default-scrollbar::-webkit-scrollbar-thumb {
-  background: #888;
-  border-radius: 4px;
-}
-
-.default-scrollbar::-webkit-scrollbar-thumb:hover {
-  background: #555;
-}
-
 .default-scrollbar {
  scrollbar-width: thin;
  scrollbar-color: #888 transparent;
-  overflow: overlay;
  overflow-y: scroll;
  overflow-x: hidden;
 }
@@ -392,78 +365,21 @@
  height: 100%;
 }

-.inputscroll::-webkit-scrollbar-track {
-  background: #e5e7eb;
+.inputscroll {
  scrollbar-width: none;
 }

-::-webkit-scrollbar {
-  width: 0px;
-  /* Vertical scrollbar width */
-  height: 8px;
-  /* Horizontal scrollbar height */
-}
-
-::-webkit-scrollbar-track {
-  background: transparent;
-  /* background: theme("colors.scrollbar.track"); */
-  /* Track background color */
-}
-
-/* Style the scrollbar handle */
-::-webkit-scrollbar-thumb {
-  background: transparent;
-  /* background: theme("colors.scrollbar.thumb"); */
-  /* Handle color */
-  border-radius: 10px;
-}
-
-/* Handle on hover */
-::-webkit-scrollbar-thumb:hover {
-  background: transparent;
-  /* background: theme("colors.scrollbar.thumb-hover"); */
-  /* Handle color on hover */
-}
-
-.dark-scrollbar::-webkit-scrollbar-thumb {
-  background: transparent;
-  /* background: theme("colors.scrollbar.dark.thumb"); */
-  /* Handle color */
-  border-radius: 10px;
-}
-
-.dark-scrollbar::-webkit-scrollbar-thumb:hover {
-  background: transparent;
-  /* background: theme("colors.scrollbar.dark.thumb-hover"); */
-  /* Handle color on hover */
+/* Ensure native scrollbars are visible */
+@layer base {
+  * {
+    scrollbar-width: auto;
+  }
 }

 /* TEXTAREA */

-textarea::-webkit-scrollbar {
-  width: 8px;
-}
-
-textarea::-webkit-scrollbar-track {
-  background: var(--scrollbar-track);
-  border-radius: 4px;
-}
-
-textarea::-webkit-scrollbar-thumb {
-  background: var(--scrollbar-thumb);
-  border-radius: 4px;
-}
-
-textarea::-webkit-scrollbar-thumb:hover {
-  background: var(--scrollbar-thumb-hover);
-}
-
 textarea {
  resize: vertical;
-}
-
-/* For Firefox */
-textarea {
  scrollbar-width: thin;
  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
 }
Author	SHA1	Message	Date
Jamison Lahman	d4a96d70f3	fix(desktop): prefer native scrollbar styling (#9879 )	2026-04-03 00:33:18 +00:00
Evan Lohn	5b000c2173	chore: remove unused db rows (#9869 )	2026-04-02 22:17:10 +00:00
acaprau	d62af28e40	fix(opensearch): Doc IDs whose length would exceed OpenSearch's ID length are hashed (#9847 )	2026-04-02 21:35:17 +00:00
acaprau	593678a14f	fix(opensearch): Re-order migration task logic to not hold DB sessions too long (#9872 )	2026-04-02 21:26:08 +00:00