fix(desktop): prefer native scrollbar styling (#9879 )

chore: remove unused db rows (#9869 )
fix(opensearch): Doc IDs whose length would exceed OpenSearch's ID length are hashed (#9847 )
2026-04-03 14:02:42 +00:00 · 2026-04-03 00:33:18 +00:00 · 2026-04-02 22:17:10 +00:00 · 2026-04-02 21:35:17 +00:00
7 changed files with 280 additions and 130 deletions
--- a/backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py
@@ -1,20 +1,14 @@
-from datetime import datetime
-from datetime import timezone
 from uuid import UUID

 from celery import shared_task
 from celery import Task

 from ee.onyx.background.celery_utils import should_perform_chat_ttl_check
-from ee.onyx.background.task_name_builders import name_chat_ttl_task
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.chat import delete_chat_session
 from onyx.db.chat import get_chat_sessions_older_than
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.enums import TaskStatus
-from onyx.db.tasks import mark_task_as_finished_with_id
-from onyx.db.tasks import register_task
 from onyx.server.settings.store import load_settings
 from onyx.utils.logger import setup_logger

@@ -29,26 +23,16 @@ logger = setup_logger()
    trail=False,
 )
 def perform_ttl_management_task(
-    self: Task, retention_limit_days: int, *, tenant_id: str
+    self: Task, retention_limit_days: int, *, tenant_id: str  # noqa: ARG001
 ) -> None:
    task_id = self.request.id
    if not task_id:
        raise RuntimeError("No task id defined for this task; cannot identify it")

-    start_time = datetime.now(tz=timezone.utc)
-
    user_id: UUID | None = None
    session_id: UUID | None = None
    try:
        with get_session_with_current_tenant() as db_session:
-            # we generally want to move off this, but keeping for now
-            register_task(
-                db_session=db_session,
-                task_name=name_chat_ttl_task(retention_limit_days, tenant_id),
-                task_id=task_id,
-                status=TaskStatus.STARTED,
-                start_time=start_time,
-            )

            old_chat_sessions = get_chat_sessions_older_than(
                retention_limit_days, db_session
@@ -65,23 +49,10 @@ def perform_ttl_management_task(
                    hard_delete=True,
                )

-        with get_session_with_current_tenant() as db_session:
-            mark_task_as_finished_with_id(
-                db_session=db_session,
-                task_id=task_id,
-                success=True,
-            )
-
    except Exception:
        logger.exception(
            f"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}"
        )
-        with get_session_with_current_tenant() as db_session:
-            mark_task_as_finished_with_id(
-                db_session=db_session,
-                task_id=task_id,
-                success=False,
-            )
        raise


--- a/backend/onyx/document_index/opensearch/schema.py
+++ b/backend/onyx/document_index/opensearch/schema.py
@@ -1,3 +1,4 @@
+import hashlib
 from datetime import datetime
 from datetime import timezone
 from typing import Any
@@ -20,9 +21,13 @@ from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
 from onyx.document_index.opensearch.constants import EF_CONSTRUCTION
 from onyx.document_index.opensearch.constants import EF_SEARCH
 from onyx.document_index.opensearch.constants import M
+from onyx.document_index.opensearch.string_filtering import DocumentIDTooLongError
 from onyx.document_index.opensearch.string_filtering import (
    filter_and_validate_document_id,
 )
+from onyx.document_index.opensearch.string_filtering import (
+    MAX_DOCUMENT_ID_ENCODED_LENGTH,
+)
 from onyx.utils.tenant import get_tenant_id_short_string
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import get_current_tenant_id
@@ -75,17 +80,50 @@ def get_opensearch_doc_chunk_id(

    This will be the string used to identify the chunk in OpenSearch. Any direct
    chunk queries should use this function.
+
+    If the document ID is too long, a hash of the ID is used instead.
    """
-    sanitized_document_id = filter_and_validate_document_id(document_id)
-    opensearch_doc_chunk_id = (
-        f"{sanitized_document_id}__{max_chunk_size}__{chunk_index}"
+    opensearch_doc_chunk_id_suffix: str = f"__{max_chunk_size}__{chunk_index}"
+    encoded_suffix_length: int = len(opensearch_doc_chunk_id_suffix.encode("utf-8"))
+    max_encoded_permissible_doc_id_length: int = (
+        MAX_DOCUMENT_ID_ENCODED_LENGTH - encoded_suffix_length
    )
+    opensearch_doc_chunk_id_tenant_prefix: str = ""
    if tenant_state.multitenant:
+        short_tenant_id: str = get_tenant_id_short_string(tenant_state.tenant_id)
        # Use tenant ID because in multitenant mode each tenant has its own
        # Documents table, so there is a very small chance that doc IDs are not
        # actually unique across all tenants.
-        short_tenant_id = get_tenant_id_short_string(tenant_state.tenant_id)
-        opensearch_doc_chunk_id = f"{short_tenant_id}__{opensearch_doc_chunk_id}"
+        opensearch_doc_chunk_id_tenant_prefix = f"{short_tenant_id}__"
+        encoded_prefix_length: int = len(
+            opensearch_doc_chunk_id_tenant_prefix.encode("utf-8")
+        )
+        max_encoded_permissible_doc_id_length -= encoded_prefix_length
+
+    try:
+        sanitized_document_id: str = filter_and_validate_document_id(
+            document_id, max_encoded_length=max_encoded_permissible_doc_id_length
+        )
+    except DocumentIDTooLongError:
+        # If the document ID is too long, use a hash instead.
+        # We use blake2b because it is faster and equally secure as SHA256, and
+        # accepts digest_size which controls the number of bytes returned in the
+        # hash.
+        # digest_size is the size of the returned hash in bytes. Since we're
+        # decoding the hash bytes as a hex string, the digest_size should be
+        # half the max target size of the hash string.
+        # Subtract 1 because filter_and_validate_document_id compares on >= on
+        # max_encoded_length.
+        # 64 is the max digest_size blake2b returns.
+        digest_size: int = min((max_encoded_permissible_doc_id_length - 1) // 2, 64)
+        sanitized_document_id = hashlib.blake2b(
+            document_id.encode("utf-8"), digest_size=digest_size
+        ).hexdigest()
+
+    opensearch_doc_chunk_id: str = (
+        f"{opensearch_doc_chunk_id_tenant_prefix}{sanitized_document_id}{opensearch_doc_chunk_id_suffix}"
+    )
+
    # Do one more validation to ensure we haven't exceeded the max length.
    opensearch_doc_chunk_id = filter_and_validate_document_id(opensearch_doc_chunk_id)
    return opensearch_doc_chunk_id
--- a/backend/onyx/document_index/opensearch/string_filtering.py
+++ b/backend/onyx/document_index/opensearch/string_filtering.py
@@ -1,7 +1,15 @@
 import re

+MAX_DOCUMENT_ID_ENCODED_LENGTH: int = 512

-def filter_and_validate_document_id(document_id: str) -> str:
+
+class DocumentIDTooLongError(ValueError):
+    """Raised when a document ID is too long for OpenSearch after filtering."""
+
+
+def filter_and_validate_document_id(
+    document_id: str, max_encoded_length: int = MAX_DOCUMENT_ID_ENCODED_LENGTH
+) -> str:
    """
    Filters and validates a document ID such that it can be used as an ID in
    OpenSearch.
@@ -19,9 +27,13 @@ def filter_and_validate_document_id(document_id: str) -> str:

    Args:
        document_id: The document ID to filter and validate.
+        max_encoded_length: The maximum length of the document ID after
+            filtering in bytes. Compared with >= for extra resilience, so
+            encoded values of this length will fail.

    Raises:
-        ValueError: If the document ID is empty or too long after filtering.
+        DocumentIDTooLongError: If the document ID is too long after filtering.
+        ValueError: If the document ID is empty after filtering.

    Returns:
        str: The filtered document ID.
@@ -29,6 +41,8 @@ def filter_and_validate_document_id(document_id: str) -> str:
    filtered_document_id = re.sub(r"[^A-Za-z0-9_.\-~]", "", document_id)
    if not filtered_document_id:
        raise ValueError(f"Document ID {document_id} is empty after filtering.")
-    if len(filtered_document_id.encode("utf-8")) >= 512:
-        raise ValueError(f"Document ID {document_id} is too long after filtering.")
+    if len(filtered_document_id.encode("utf-8")) >= max_encoded_length:
+        raise DocumentIDTooLongError(
+            f"Document ID {document_id} is too long after filtering."
+        )
    return filtered_document_id
--- a/backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py
+++ b/backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py
@@ -0,0 +1,203 @@
+import pytest
+
+from onyx.document_index.interfaces_new import TenantState
+from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
+from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
+from onyx.document_index.opensearch.string_filtering import (
+    MAX_DOCUMENT_ID_ENCODED_LENGTH,
+)
+from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
+
+
+SINGLE_TENANT_STATE = TenantState(
+    tenant_id=POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE, multitenant=False
+)
+MULTI_TENANT_STATE = TenantState(
+    tenant_id="tenant_abcdef12-3456-7890-abcd-ef1234567890", multitenant=True
+)
+EXPECTED_SHORT_TENANT = "abcdef12"
+
+
+class TestGetOpensearchDocChunkIdSingleTenant:
+    def test_basic(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "my-doc-id", chunk_index=0
+        )
+        assert result == f"my-doc-id__{DEFAULT_MAX_CHUNK_SIZE}__0"
+
+    def test_custom_chunk_size(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "doc1", chunk_index=3, max_chunk_size=1024
+        )
+        assert result == "doc1__1024__3"
+
+    def test_special_chars_are_stripped(self) -> None:
+        """Tests characters not matching [A-Za-z0-9_.-~] are removed."""
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "doc/with?special#chars&more%stuff", chunk_index=0
+        )
+        assert "/" not in result
+        assert "?" not in result
+        assert "#" not in result
+        assert result == f"docwithspecialcharsmorestuff__{DEFAULT_MAX_CHUNK_SIZE}__0"
+
+    def test_short_doc_id_not_hashed(self) -> None:
+        """
+        Tests that a short doc ID should appear directly in the result, not as a
+        hash.
+        """
+        doc_id = "short-id"
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        assert "short-id" in result
+
+    def test_long_doc_id_is_hashed(self) -> None:
+        """
+        Tests that a doc ID exceeding the max length should be replaced with a
+        blake2b hash.
+        """
+        # Create a doc ID that will exceed max length after the suffix is
+        # appended.
+        doc_id = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        # The original doc ID should NOT appear in the result.
+        assert doc_id not in result
+        # The suffix should still be present.
+        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
+
+    def test_long_doc_id_hash_is_deterministic(self) -> None:
+        doc_id = "x" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result1 = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id, chunk_index=5
+        )
+        result2 = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id, chunk_index=5
+        )
+        assert result1 == result2
+
+    def test_long_doc_id_different_inputs_produce_different_hashes(self) -> None:
+        doc_id_a = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        doc_id_b = "b" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result_a = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id_a, chunk_index=0
+        )
+        result_b = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id_b, chunk_index=0
+        )
+        assert result_a != result_b
+
+    def test_result_never_exceeds_max_length(self) -> None:
+        """
+        Tests that the final result should always be under
+        MAX_DOCUMENT_ID_ENCODED_LENGTH bytes.
+        """
+        doc_id = "z" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
+        )
+        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
+
+    def test_no_tenant_prefix_in_single_tenant(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "mydoc", chunk_index=0
+        )
+        assert not result.startswith(SINGLE_TENANT_STATE.tenant_id)
+
+
+class TestGetOpensearchDocChunkIdMultiTenant:
+    def test_includes_tenant_prefix(self) -> None:
+        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, "mydoc", chunk_index=0)
+        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
+
+    def test_format(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            MULTI_TENANT_STATE, "mydoc", chunk_index=2, max_chunk_size=256
+        )
+        assert result == f"{EXPECTED_SHORT_TENANT}__mydoc__256__2"
+
+    def test_long_doc_id_is_hashed_multitenant(self) -> None:
+        doc_id = "d" * MAX_DOCUMENT_ID_ENCODED_LENGTH
+        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
+        # Should still have tenant prefix.
+        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
+        # The original doc ID should NOT appear in the result.
+        assert doc_id not in result
+        # The suffix should still be present.
+        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
+
+    def test_result_never_exceeds_max_length_multitenant(self) -> None:
+        doc_id = "q" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
+        result = get_opensearch_doc_chunk_id(
+            MULTI_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
+        )
+        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
+
+    def test_different_tenants_produce_different_ids(self) -> None:
+        tenant_a = TenantState(
+            tenant_id="tenant_aaaaaaaa-0000-0000-0000-000000000000", multitenant=True
+        )
+        tenant_b = TenantState(
+            tenant_id="tenant_bbbbbbbb-0000-0000-0000-000000000000", multitenant=True
+        )
+        result_a = get_opensearch_doc_chunk_id(tenant_a, "same-doc", chunk_index=0)
+        result_b = get_opensearch_doc_chunk_id(tenant_b, "same-doc", chunk_index=0)
+        assert result_a != result_b
+
+
+class TestGetOpensearchDocChunkIdEdgeCases:
+    def test_chunk_index_zero(self) -> None:
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "doc", chunk_index=0)
+        assert result.endswith("__0")
+
+    def test_large_chunk_index(self) -> None:
+        result = get_opensearch_doc_chunk_id(
+            SINGLE_TENANT_STATE, "doc", chunk_index=99999
+        )
+        assert result.endswith("__99999")
+
+    def test_doc_id_with_only_special_chars_raises(self) -> None:
+        """
+        Tests that a doc ID that becomes empty after filtering should raise
+        ValueError.
+        """
+        with pytest.raises(ValueError, match="empty after filtering"):
+            get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "###???///", chunk_index=0)
+
+    def test_doc_id_at_boundary_length(self) -> None:
+        """
+        Tests that a doc ID right at the boundary should not be hashed.
+        """
+        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
+        suffix_len = len(suffix.encode("utf-8"))
+        # Max doc ID length that won't trigger hashing (must be <
+        # max_encoded_length).
+        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - 1
+        doc_id = "a" * max_doc_len
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        assert doc_id in result
+
+    def test_doc_id_at_boundary_length_multitenant(self) -> None:
+        """
+        Tests that a doc ID right at the boundary should not be hashed in
+        multitenant mode.
+        """
+        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
+        suffix_len = len(suffix.encode("utf-8"))
+        prefix = f"{EXPECTED_SHORT_TENANT}__"
+        prefix_len = len(prefix.encode("utf-8"))
+        # Max doc ID length that won't trigger hashing (must be <
+        # max_encoded_length).
+        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - prefix_len - 1
+        doc_id = "a" * max_doc_len
+        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
+        assert doc_id in result
+
+    def test_doc_id_one_over_boundary_is_hashed(self) -> None:
+        """
+        Tests that a doc ID one byte over the boundary should be hashed.
+        """
+        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
+        suffix_len = len(suffix.encode("utf-8"))
+        # This length will trigger the >= check in filter_and_validate_document_id
+        doc_id = "a" * (MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len)
+        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
+        assert doc_id not in result
--- a/web/lib/opal/src/components/buttons/sidebar-tab/components.tsx
+++ b/web/lib/opal/src/components/buttons/sidebar-tab/components.tsx
@@ -127,7 +127,7 @@ function SidebarTab({
              rightChildren={truncationSpacer}
            />
          ) : (
-            <div className="flex flex-row items-center gap-2 w-full">
+            <div className="flex flex-row items-center gap-2 flex-1">
              {Icon && (
                <div className="flex items-center justify-center p-0.5">
                  <Icon className="h-[1rem] w-[1rem] text-text-03" />
--- a/web/src/app/css/colors.css
+++ b/web/src/app/css/colors.css
@@ -467,6 +467,10 @@

  /* Frost Overlay (for FrostedDiv component) - lighter in light mode */
  --frost-overlay: var(--alpha-grey-00-10);
+
+  /* Scrollbar */
+  --scrollbar-track: transparent;
+  --scrollbar-thumb: var(--alpha-grey-100-20);
 }

 /* Dark Colors */
@@ -671,4 +675,8 @@

  /* Frost Overlay (for FrostedDiv component) - darker in dark mode */
  --frost-overlay: var(--alpha-grey-100-10);
+
+  /* Scrollbar */
+  --scrollbar-track: transparent;
+  --scrollbar-thumb: var(--alpha-grey-00-20);
 }
--- a/web/src/app/globals.css
+++ b/web/src/app/globals.css
@@ -127,17 +127,8 @@
 }

@layer utilities {
-  /* Hide scrollbar for Chrome, Safari and Opera */
-  .no-scrollbar::-webkit-scrollbar {
-    display: none;
-  }
-
-  /* Hide scrollbar for IE, Edge and Firefox */
  .no-scrollbar {
-    -ms-overflow-style: none;
-    /* IE and Edge */
    scrollbar-width: none;
-    /* Firefox */
  }

  /* SHADOWS */
@@ -362,27 +353,9 @@

 /* SCROLL BAR */

-.default-scrollbar::-webkit-scrollbar {
-  width: 6px;
-}
-
-.default-scrollbar::-webkit-scrollbar-track {
-  background: #f1f1f1;
-}
-
-.default-scrollbar::-webkit-scrollbar-thumb {
-  background: #888;
-  border-radius: 4px;
-}
-
-.default-scrollbar::-webkit-scrollbar-thumb:hover {
-  background: #555;
-}
-
 .default-scrollbar {
  scrollbar-width: thin;
  scrollbar-color: #888 transparent;
-  overflow: overlay;
  overflow-y: scroll;
  overflow-x: hidden;
 }
@@ -392,78 +365,21 @@
  height: 100%;
 }

-.inputscroll::-webkit-scrollbar-track {
-  background: #e5e7eb;
+.inputscroll {
  scrollbar-width: none;
 }

-::-webkit-scrollbar {
-  width: 0px;
-  /* Vertical scrollbar width */
-  height: 8px;
-  /* Horizontal scrollbar height */
-}
-
-::-webkit-scrollbar-track {
-  background: transparent;
-  /* background: theme("colors.scrollbar.track"); */
-  /* Track background color */
-}
-
-/* Style the scrollbar handle */
-::-webkit-scrollbar-thumb {
-  background: transparent;
-  /* background: theme("colors.scrollbar.thumb"); */
-  /* Handle color */
-  border-radius: 10px;
-}
-
-/* Handle on hover */
-::-webkit-scrollbar-thumb:hover {
-  background: transparent;
-  /* background: theme("colors.scrollbar.thumb-hover"); */
-  /* Handle color on hover */
-}
-
-.dark-scrollbar::-webkit-scrollbar-thumb {
-  background: transparent;
-  /* background: theme("colors.scrollbar.dark.thumb"); */
-  /* Handle color */
-  border-radius: 10px;
-}
-
-.dark-scrollbar::-webkit-scrollbar-thumb:hover {
-  background: transparent;
-  /* background: theme("colors.scrollbar.dark.thumb-hover"); */
-  /* Handle color on hover */
+/* Ensure native scrollbars are visible */
+@layer base {
+  * {
+    scrollbar-width: auto;
+  }
 }

 /* TEXTAREA */

-textarea::-webkit-scrollbar {
-  width: 8px;
-}
-
-textarea::-webkit-scrollbar-track {
-  background: var(--scrollbar-track);
-  border-radius: 4px;
-}
-
-textarea::-webkit-scrollbar-thumb {
-  background: var(--scrollbar-thumb);
-  border-radius: 4px;
-}
-
-textarea::-webkit-scrollbar-thumb:hover {
-  background: var(--scrollbar-thumb-hover);
-}
-
 textarea {
  resize: vertical;
-}
-
-/* For Firefox */
-textarea {
  scrollbar-width: thin;
  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
 }
Author	SHA1	Message	Date
Jamison Lahman	d4a96d70f3	fix(desktop): prefer native scrollbar styling (#9879 )	2026-04-03 00:33:18 +00:00
Evan Lohn	5b000c2173	chore: remove unused db rows (#9869 )	2026-04-02 22:17:10 +00:00
acaprau	d62af28e40	fix(opensearch): Doc IDs whose length would exceed OpenSearch's ID length are hashed (#9847 )	2026-04-02 21:35:17 +00:00