feat(hook): improve disconnect error popover

2026-04-03 05:52:42 +00:00 · 2026-04-02 15:33:32 -07:00
12 changed files with 356 additions and 451 deletions
--- a/backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py
@@ -1,14 +1,20 @@
+from datetime import datetime
+from datetime import timezone
 from uuid import UUID

 from celery import shared_task
 from celery import Task

 from ee.onyx.background.celery_utils import should_perform_chat_ttl_check
+from ee.onyx.background.task_name_builders import name_chat_ttl_task
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.chat import delete_chat_session
 from onyx.db.chat import get_chat_sessions_older_than
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.enums import TaskStatus
+from onyx.db.tasks import mark_task_as_finished_with_id
+from onyx.db.tasks import register_task
 from onyx.server.settings.store import load_settings
 from onyx.utils.logger import setup_logger

@@ -23,16 +29,26 @@ logger = setup_logger()
    trail=False,
 )
 def perform_ttl_management_task(
-    self: Task, retention_limit_days: int, *, tenant_id: str  # noqa: ARG001
+    self: Task, retention_limit_days: int, *, tenant_id: str
 ) -> None:
    task_id = self.request.id
    if not task_id:
        raise RuntimeError("No task id defined for this task; cannot identify it")

+    start_time = datetime.now(tz=timezone.utc)
+
    user_id: UUID | None = None
    session_id: UUID | None = None
    try:
        with get_session_with_current_tenant() as db_session:
+            # we generally want to move off this, but keeping for now
+            register_task(
+                db_session=db_session,
+                task_name=name_chat_ttl_task(retention_limit_days, tenant_id),
+                task_id=task_id,
+                status=TaskStatus.STARTED,
+                start_time=start_time,
+            )

            old_chat_sessions = get_chat_sessions_older_than(
                retention_limit_days, db_session
@@ -49,10 +65,23 @@ def perform_ttl_management_task(
                    hard_delete=True,
                )

+        with get_session_with_current_tenant() as db_session:
+            mark_task_as_finished_with_id(
+                db_session=db_session,
+                task_id=task_id,
+                success=True,
+            )
+
    except Exception:
        logger.exception(
            f"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}"
        )
+        with get_session_with_current_tenant() as db_session:
+            mark_task_as_finished_with_id(
+                db_session=db_session,
+                task_id=task_id,
+                success=False,
+            )
        raise


--- a/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
+++ b/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
@@ -36,7 +36,6 @@ from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapping
 from onyx.db.opensearch_migration import get_vespa_visit_state
-from onyx.db.opensearch_migration import is_migration_completed
 from onyx.db.opensearch_migration import (
    mark_migration_completed_time_if_not_set_with_commit,
 )
@@ -107,19 +106,14 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            acquired; effectively a no-op. True if the task completed
            successfully. False if the task errored.
    """
-    # 1. Check if we should run the task.
-    # 1.a. If OpenSearch indexing is disabled, we don't run the task.
    if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        task_logger.warning(
            "OpenSearch migration is not enabled, skipping chunk migration task."
        )
        return None
+
    task_logger.info("Starting chunk-level migration from Vespa to OpenSearch.")
    task_start_time = time.monotonic()
-
-    # 1.b. Only one instance per tenant of this task may run concurrently at
-    # once. If we fail to acquire a lock, we assume it is because another task
-    # has one and we exit.
    r = get_redis_client()
    lock: RedisLock = r.lock(
        name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
@@ -142,11 +136,10 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            f"Token: {lock.local.token}"
        )

-    # 2. Prepare to migrate.
    total_chunks_migrated_this_task = 0
    total_chunks_errored_this_task = 0
    try:
-        # 2.a. Double-check that tenant info is correct.
+        # Double check that tenant info is correct.
        if tenant_id != get_current_tenant_id():
            err_str = (
                f"Tenant ID mismatch in the OpenSearch migration task: "
@@ -155,62 +148,16 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            task_logger.error(err_str)
            return False

-        # Do as much as we can with a DB session in one spot to not hold a
-        # session during a migration batch.
-        with get_session_with_current_tenant() as db_session:
-            # 2.b. Immediately check to see if this tenant is done, to save
-            # having to do any other work. This function does not require a
-            # migration record to necessarily exist.
-            if is_migration_completed(db_session):
-                return True
-
-            # 2.c. Try to insert the OpenSearchTenantMigrationRecord table if it
-            # does not exist.
+        with (
+            get_session_with_current_tenant() as db_session,
+            get_vespa_http_client(
+                timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
+            ) as vespa_client,
+        ):
            try_insert_opensearch_tenant_migration_record_with_commit(db_session)
-
-            # 2.d. Get search settings.
            search_settings = get_current_search_settings(db_session)
-            indexing_setting = IndexingSetting.from_db_model(search_settings)
-
-            # 2.e. Build sanitized to original doc ID mapping to check for
-            # conflicts in the event we sanitize a doc ID to an
-            # already-existing doc ID.
-            # We reconstruct this mapping for every task invocation because
-            # a document may have been added in the time between two tasks.
-            sanitized_doc_start_time = time.monotonic()
-            sanitized_to_original_doc_id_mapping = (
-                build_sanitized_to_original_doc_id_mapping(db_session)
-            )
-            task_logger.debug(
-                f"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries "
-                f"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds."
-            )
-
-            # 2.f. Get the current migration state.
-            continuation_token_map, total_chunks_migrated = get_vespa_visit_state(
-                db_session
-            )
-            # 2.f.1. Double-check that the migration state does not imply
-            # completion. Really we should never have to enter this block as we
-            # would expect is_migration_completed to return True, but in the
-            # strange event that the migration is complete but the migration
-            # completed time was never stamped, we do so here.
-            if is_continuation_token_done_for_all_slices(continuation_token_map):
-                task_logger.info(
-                    f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
-                )
-                mark_migration_completed_time_if_not_set_with_commit(db_session)
-                return True
-        task_logger.debug(
-            f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
-            f"Continuation token map: {continuation_token_map}"
-        )
-
-        with get_vespa_http_client(
-            timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
-        ) as vespa_client:
-            # 2.g. Create the OpenSearch and Vespa document indexes.
            tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
+            indexing_setting = IndexingSetting.from_db_model(search_settings)
            opensearch_document_index = OpenSearchDocumentIndex(
                tenant_state=tenant_state,
                index_name=search_settings.index_name,
@@ -224,14 +171,22 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                httpx_client=vespa_client,
            )

-            # 2.h. Get the approximate chunk count in Vespa as of this time to
-            # update the migration record.
+            sanitized_doc_start_time = time.monotonic()
+            # We reconstruct this mapping for every task invocation because a
+            # document may have been added in the time between two tasks.
+            sanitized_to_original_doc_id_mapping = (
+                build_sanitized_to_original_doc_id_mapping(db_session)
+            )
+            task_logger.debug(
+                f"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries "
+                f"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds."
+            )
+
            approx_chunk_count_in_vespa: int | None = None
            get_chunk_count_start_time = time.monotonic()
            try:
                approx_chunk_count_in_vespa = vespa_document_index.get_chunk_count()
            except Exception:
-                # This failure should not be blocking.
                task_logger.exception(
                    "Error getting approximate chunk count in Vespa. Moving on..."
                )
@@ -240,12 +195,25 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                f"approximate chunk count in Vespa. Got {approx_chunk_count_in_vespa}."
            )

-            # 3. Do the actual migration in batches until we run out of time.
            while (
                time.monotonic() - task_start_time < MIGRATION_TASK_SOFT_TIME_LIMIT_S
                and lock.owned()
            ):
-                # 3.a. Get the next batch of raw chunks from Vespa.
+                (
+                    continuation_token_map,
+                    total_chunks_migrated,
+                ) = get_vespa_visit_state(db_session)
+                if is_continuation_token_done_for_all_slices(continuation_token_map):
+                    task_logger.info(
+                        f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
+                    )
+                    mark_migration_completed_time_if_not_set_with_commit(db_session)
+                    break
+                task_logger.debug(
+                    f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
+                    f"Continuation token map: {continuation_token_map}"
+                )
+
                get_vespa_chunks_start_time = time.monotonic()
                raw_vespa_chunks, next_continuation_token_map = (
                    vespa_document_index.get_all_raw_document_chunks_paginated(
@@ -258,7 +226,6 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                    f"seconds. Next continuation token map: {next_continuation_token_map}"
                )

-                # 3.b. Transform the raw chunks to OpenSearch chunks in memory.
                opensearch_document_chunks, errored_chunks = (
                    transform_vespa_chunks_to_opensearch_chunks(
                        raw_vespa_chunks,
@@ -273,7 +240,6 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                        "errored."
                    )

-                # 3.c. Index the OpenSearch chunks into OpenSearch.
                index_opensearch_chunks_start_time = time.monotonic()
                opensearch_document_index.index_raw_chunks(
                    chunks=opensearch_document_chunks
@@ -285,38 +251,12 @@ def migrate_chunks_from_vespa_to_opensearch_task(

                total_chunks_migrated_this_task += len(opensearch_document_chunks)
                total_chunks_errored_this_task += len(errored_chunks)
-
-                # Do as much as we can with a DB session in one spot to not hold a
-                # session during a migration batch.
-                with get_session_with_current_tenant() as db_session:
-                    # 3.d. Update the migration state.
-                    update_vespa_visit_progress_with_commit(
-                        db_session,
-                        continuation_token_map=next_continuation_token_map,
-                        chunks_processed=len(opensearch_document_chunks),
-                        chunks_errored=len(errored_chunks),
-                        approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
-                    )
-
-                    # 3.e. Get the current migration state. Even thought we
-                    # technically have it in-memory since we just wrote it, we
-                    # want to reference the DB as the source of truth at all
-                    # times.
-                    continuation_token_map, total_chunks_migrated = (
-                        get_vespa_visit_state(db_session)
-                    )
-                    # 3.e.1. Check if the migration is done.
-                    if is_continuation_token_done_for_all_slices(
-                        continuation_token_map
-                    ):
-                        task_logger.info(
-                            f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
-                        )
-                        mark_migration_completed_time_if_not_set_with_commit(db_session)
-                        return True
-                task_logger.debug(
-                    f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
-                    f"Continuation token map: {continuation_token_map}"
+                update_vespa_visit_progress_with_commit(
+                    db_session,
+                    continuation_token_map=next_continuation_token_map,
+                    chunks_processed=len(opensearch_document_chunks),
+                    chunks_errored=len(errored_chunks),
+                    approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
                )
    except Exception:
        traceback.print_exc()
--- a/backend/onyx/db/opensearch_migration.py
+++ b/backend/onyx/db/opensearch_migration.py
@@ -324,15 +324,6 @@ def mark_migration_completed_time_if_not_set_with_commit(
    db_session.commit()


-def is_migration_completed(db_session: Session) -> bool:
-    """Returns True if the migration is completed.
-
-    Can be run even if the migration record does not exist.
-    """
-    record = db_session.query(OpenSearchTenantMigrationRecord).first()
-    return record is not None and record.migration_completed_at is not None
-
-
 def build_sanitized_to_original_doc_id_mapping(
    db_session: Session,
 ) -> dict[str, str]:
--- a/backend/onyx/document_index/opensearch/schema.py
+++ b/backend/onyx/document_index/opensearch/schema.py
@@ -1,4 +1,3 @@
-import hashlib
 from datetime import datetime
 from datetime import timezone
 from typing import Any
@@ -21,13 +20,9 @@ from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
 from onyx.document_index.opensearch.constants import EF_CONSTRUCTION
 from onyx.document_index.opensearch.constants import EF_SEARCH
 from onyx.document_index.opensearch.constants import M
-from onyx.document_index.opensearch.string_filtering import DocumentIDTooLongError
 from onyx.document_index.opensearch.string_filtering import (
    filter_and_validate_document_id,
 )
-from onyx.document_index.opensearch.string_filtering import (
-    MAX_DOCUMENT_ID_ENCODED_LENGTH,
-)
 from onyx.utils.tenant import get_tenant_id_short_string
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import get_current_tenant_id
@@ -80,50 +75,17 @@ def get_opensearch_doc_chunk_id(

    This will be the string used to identify the chunk in OpenSearch. Any direct
    chunk queries should use this function.
-
-    If the document ID is too long, a hash of the ID is used instead.
    """
-    opensearch_doc_chunk_id_suffix: str = f"__{max_chunk_size}__{chunk_index}"
-    encoded_suffix_length: int = len(opensearch_doc_chunk_id_suffix.encode("utf-8"))
-    max_encoded_permissible_doc_id_length: int = (
-        MAX_DOCUMENT_ID_ENCODED_LENGTH - encoded_suffix_length
+    sanitized_document_id = filter_and_validate_document_id(document_id)
+    opensearch_doc_chunk_id = (
+        f"{sanitized_document_id}__{max_chunk_size}__{chunk_index}"
    )
-    opensearch_doc_chunk_id_tenant_prefix: str = ""
    if tenant_state.multitenant:
-        short_tenant_id: str = get_tenant_id_short_string(tenant_state.tenant_id)
        # Use tenant ID because in multitenant mode each tenant has its own
        # Documents table, so there is a very small chance that doc IDs are not
        # actually unique across all tenants.
-        opensearch_doc_chunk_id_tenant_prefix = f"{short_tenant_id}__"
-        encoded_prefix_length: int = len(
-            opensearch_doc_chunk_id_tenant_prefix.encode("utf-8")
-        )
-        max_encoded_permissible_doc_id_length -= encoded_prefix_length
-
-    try:
-        sanitized_document_id: str = filter_and_validate_document_id(
-            document_id, max_encoded_length=max_encoded_permissible_doc_id_length
-        )
-    except DocumentIDTooLongError:
-        # If the document ID is too long, use a hash instead.
-        # We use blake2b because it is faster and equally secure as SHA256, and
-        # accepts digest_size which controls the number of bytes returned in the
-        # hash.
-        # digest_size is the size of the returned hash in bytes. Since we're
-        # decoding the hash bytes as a hex string, the digest_size should be
-        # half the max target size of the hash string.
-        # Subtract 1 because filter_and_validate_document_id compares on >= on
-        # max_encoded_length.
-        # 64 is the max digest_size blake2b returns.
-        digest_size: int = min((max_encoded_permissible_doc_id_length - 1) // 2, 64)
-        sanitized_document_id = hashlib.blake2b(
-            document_id.encode("utf-8"), digest_size=digest_size
-        ).hexdigest()
-
-    opensearch_doc_chunk_id: str = (
-        f"{opensearch_doc_chunk_id_tenant_prefix}{sanitized_document_id}{opensearch_doc_chunk_id_suffix}"
-    )
-
+        short_tenant_id = get_tenant_id_short_string(tenant_state.tenant_id)
+        opensearch_doc_chunk_id = f"{short_tenant_id}__{opensearch_doc_chunk_id}"
    # Do one more validation to ensure we haven't exceeded the max length.
    opensearch_doc_chunk_id = filter_and_validate_document_id(opensearch_doc_chunk_id)
    return opensearch_doc_chunk_id
--- a/backend/onyx/document_index/opensearch/string_filtering.py
+++ b/backend/onyx/document_index/opensearch/string_filtering.py
@@ -1,15 +1,7 @@
 import re

-MAX_DOCUMENT_ID_ENCODED_LENGTH: int = 512

-
-class DocumentIDTooLongError(ValueError):
-    """Raised when a document ID is too long for OpenSearch after filtering."""
-
-
-def filter_and_validate_document_id(
-    document_id: str, max_encoded_length: int = MAX_DOCUMENT_ID_ENCODED_LENGTH
-) -> str:
+def filter_and_validate_document_id(document_id: str) -> str:
    """
    Filters and validates a document ID such that it can be used as an ID in
    OpenSearch.
@@ -27,13 +19,9 @@ def filter_and_validate_document_id(

    Args:
        document_id: The document ID to filter and validate.
-        max_encoded_length: The maximum length of the document ID after
-            filtering in bytes. Compared with >= for extra resilience, so
-            encoded values of this length will fail.

    Raises:
-        DocumentIDTooLongError: If the document ID is too long after filtering.
-        ValueError: If the document ID is empty after filtering.
+        ValueError: If the document ID is empty or too long after filtering.

    Returns:
        str: The filtered document ID.
@@ -41,8 +29,6 @@ def filter_and_validate_document_id(
    filtered_document_id = re.sub(r"[^A-Za-z0-9_.\-~]", "", document_id)
    if not filtered_document_id:
        raise ValueError(f"Document ID {document_id} is empty after filtering.")
-    if len(filtered_document_id.encode("utf-8")) >= max_encoded_length:
-        raise DocumentIDTooLongError(
-            f"Document ID {document_id} is too long after filtering."
-        )
+    if len(filtered_document_id.encode("utf-8")) >= 512:
+        raise ValueError(f"Document ID {document_id} is too long after filtering.")
    return filtered_document_id
--- a/backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py
+++ b/backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py
@@ -1,203 +0,0 @@
-import pytest
-
-from onyx.document_index.interfaces_new import TenantState
-from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
-from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
-from onyx.document_index.opensearch.string_filtering import (
-    MAX_DOCUMENT_ID_ENCODED_LENGTH,
-)
-from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
-
-
-SINGLE_TENANT_STATE = TenantState(
-    tenant_id=POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE, multitenant=False
-)
-MULTI_TENANT_STATE = TenantState(
-    tenant_id="tenant_abcdef12-3456-7890-abcd-ef1234567890", multitenant=True
-)
-EXPECTED_SHORT_TENANT = "abcdef12"
-
-
-class TestGetOpensearchDocChunkIdSingleTenant:
-    def test_basic(self) -> None:
-        result = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, "my-doc-id", chunk_index=0
-        )
-        assert result == f"my-doc-id__{DEFAULT_MAX_CHUNK_SIZE}__0"
-
-    def test_custom_chunk_size(self) -> None:
-        result = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, "doc1", chunk_index=3, max_chunk_size=1024
-        )
-        assert result == "doc1__1024__3"
-
-    def test_special_chars_are_stripped(self) -> None:
-        """Tests characters not matching [A-Za-z0-9_.-~] are removed."""
-        result = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, "doc/with?special#chars&more%stuff", chunk_index=0
-        )
-        assert "/" not in result
-        assert "?" not in result
-        assert "#" not in result
-        assert result == f"docwithspecialcharsmorestuff__{DEFAULT_MAX_CHUNK_SIZE}__0"
-
-    def test_short_doc_id_not_hashed(self) -> None:
-        """
-        Tests that a short doc ID should appear directly in the result, not as a
-        hash.
-        """
-        doc_id = "short-id"
-        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
-        assert "short-id" in result
-
-    def test_long_doc_id_is_hashed(self) -> None:
-        """
-        Tests that a doc ID exceeding the max length should be replaced with a
-        blake2b hash.
-        """
-        # Create a doc ID that will exceed max length after the suffix is
-        # appended.
-        doc_id = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
-        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
-        # The original doc ID should NOT appear in the result.
-        assert doc_id not in result
-        # The suffix should still be present.
-        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
-
-    def test_long_doc_id_hash_is_deterministic(self) -> None:
-        doc_id = "x" * MAX_DOCUMENT_ID_ENCODED_LENGTH
-        result1 = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, doc_id, chunk_index=5
-        )
-        result2 = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, doc_id, chunk_index=5
-        )
-        assert result1 == result2
-
-    def test_long_doc_id_different_inputs_produce_different_hashes(self) -> None:
-        doc_id_a = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
-        doc_id_b = "b" * MAX_DOCUMENT_ID_ENCODED_LENGTH
-        result_a = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, doc_id_a, chunk_index=0
-        )
-        result_b = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, doc_id_b, chunk_index=0
-        )
-        assert result_a != result_b
-
-    def test_result_never_exceeds_max_length(self) -> None:
-        """
-        Tests that the final result should always be under
-        MAX_DOCUMENT_ID_ENCODED_LENGTH bytes.
-        """
-        doc_id = "z" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
-        result = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
-        )
-        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
-
-    def test_no_tenant_prefix_in_single_tenant(self) -> None:
-        result = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, "mydoc", chunk_index=0
-        )
-        assert not result.startswith(SINGLE_TENANT_STATE.tenant_id)
-
-
-class TestGetOpensearchDocChunkIdMultiTenant:
-    def test_includes_tenant_prefix(self) -> None:
-        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, "mydoc", chunk_index=0)
-        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
-
-    def test_format(self) -> None:
-        result = get_opensearch_doc_chunk_id(
-            MULTI_TENANT_STATE, "mydoc", chunk_index=2, max_chunk_size=256
-        )
-        assert result == f"{EXPECTED_SHORT_TENANT}__mydoc__256__2"
-
-    def test_long_doc_id_is_hashed_multitenant(self) -> None:
-        doc_id = "d" * MAX_DOCUMENT_ID_ENCODED_LENGTH
-        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
-        # Should still have tenant prefix.
-        assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
-        # The original doc ID should NOT appear in the result.
-        assert doc_id not in result
-        # The suffix should still be present.
-        assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
-
-    def test_result_never_exceeds_max_length_multitenant(self) -> None:
-        doc_id = "q" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
-        result = get_opensearch_doc_chunk_id(
-            MULTI_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
-        )
-        assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
-
-    def test_different_tenants_produce_different_ids(self) -> None:
-        tenant_a = TenantState(
-            tenant_id="tenant_aaaaaaaa-0000-0000-0000-000000000000", multitenant=True
-        )
-        tenant_b = TenantState(
-            tenant_id="tenant_bbbbbbbb-0000-0000-0000-000000000000", multitenant=True
-        )
-        result_a = get_opensearch_doc_chunk_id(tenant_a, "same-doc", chunk_index=0)
-        result_b = get_opensearch_doc_chunk_id(tenant_b, "same-doc", chunk_index=0)
-        assert result_a != result_b
-
-
-class TestGetOpensearchDocChunkIdEdgeCases:
-    def test_chunk_index_zero(self) -> None:
-        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "doc", chunk_index=0)
-        assert result.endswith("__0")
-
-    def test_large_chunk_index(self) -> None:
-        result = get_opensearch_doc_chunk_id(
-            SINGLE_TENANT_STATE, "doc", chunk_index=99999
-        )
-        assert result.endswith("__99999")
-
-    def test_doc_id_with_only_special_chars_raises(self) -> None:
-        """
-        Tests that a doc ID that becomes empty after filtering should raise
-        ValueError.
-        """
-        with pytest.raises(ValueError, match="empty after filtering"):
-            get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "###???///", chunk_index=0)
-
-    def test_doc_id_at_boundary_length(self) -> None:
-        """
-        Tests that a doc ID right at the boundary should not be hashed.
-        """
-        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
-        suffix_len = len(suffix.encode("utf-8"))
-        # Max doc ID length that won't trigger hashing (must be <
-        # max_encoded_length).
-        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - 1
-        doc_id = "a" * max_doc_len
-        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
-        assert doc_id in result
-
-    def test_doc_id_at_boundary_length_multitenant(self) -> None:
-        """
-        Tests that a doc ID right at the boundary should not be hashed in
-        multitenant mode.
-        """
-        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
-        suffix_len = len(suffix.encode("utf-8"))
-        prefix = f"{EXPECTED_SHORT_TENANT}__"
-        prefix_len = len(prefix.encode("utf-8"))
-        # Max doc ID length that won't trigger hashing (must be <
-        # max_encoded_length).
-        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - prefix_len - 1
-        doc_id = "a" * max_doc_len
-        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
-        assert doc_id in result
-
-    def test_doc_id_one_over_boundary_is_hashed(self) -> None:
-        """
-        Tests that a doc ID one byte over the boundary should be hashed.
-        """
-        suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
-        suffix_len = len(suffix.encode("utf-8"))
-        # This length will trigger the >= check in filter_and_validate_document_id
-        doc_id = "a" * (MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len)
-        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
-        assert doc_id not in result
--- a/web/src/app/css/colors.css
+++ b/web/src/app/css/colors.css
@@ -467,10 +467,6 @@

  /* Frost Overlay (for FrostedDiv component) - lighter in light mode */
  --frost-overlay: var(--alpha-grey-00-10);
-
-  /* Scrollbar */
-  --scrollbar-track: transparent;
-  --scrollbar-thumb: var(--alpha-grey-100-20);
 }

 /* Dark Colors */
@@ -675,8 +671,4 @@

  /* Frost Overlay (for FrostedDiv component) - darker in dark mode */
  --frost-overlay: var(--alpha-grey-100-10);
-
-  /* Scrollbar */
-  --scrollbar-track: transparent;
-  --scrollbar-thumb: var(--alpha-grey-00-20);
 }
--- a/web/src/app/globals.css
+++ b/web/src/app/globals.css
@@ -127,8 +127,17 @@
 }

@layer utilities {
+  /* Hide scrollbar for Chrome, Safari and Opera */
+  .no-scrollbar::-webkit-scrollbar {
+    display: none;
+  }
+
+  /* Hide scrollbar for IE, Edge and Firefox */
  .no-scrollbar {
+    -ms-overflow-style: none;
+    /* IE and Edge */
    scrollbar-width: none;
+    /* Firefox */
  }

  /* SHADOWS */
@@ -353,9 +362,27 @@

 /* SCROLL BAR */

+.default-scrollbar::-webkit-scrollbar {
+  width: 6px;
+}
+
+.default-scrollbar::-webkit-scrollbar-track {
+  background: #f1f1f1;
+}
+
+.default-scrollbar::-webkit-scrollbar-thumb {
+  background: #888;
+  border-radius: 4px;
+}
+
+.default-scrollbar::-webkit-scrollbar-thumb:hover {
+  background: #555;
+}
+
 .default-scrollbar {
  scrollbar-width: thin;
  scrollbar-color: #888 transparent;
+  overflow: overlay;
  overflow-y: scroll;
  overflow-x: hidden;
 }
@@ -365,21 +392,78 @@
  height: 100%;
 }

-.inputscroll {
+.inputscroll::-webkit-scrollbar-track {
+  background: #e5e7eb;
  scrollbar-width: none;
 }

-/* Ensure native scrollbars are visible */
-@layer base {
-  * {
-    scrollbar-width: auto;
-  }
+::-webkit-scrollbar {
+  width: 0px;
+  /* Vertical scrollbar width */
+  height: 8px;
+  /* Horizontal scrollbar height */
+}
+
+::-webkit-scrollbar-track {
+  background: transparent;
+  /* background: theme("colors.scrollbar.track"); */
+  /* Track background color */
+}
+
+/* Style the scrollbar handle */
+::-webkit-scrollbar-thumb {
+  background: transparent;
+  /* background: theme("colors.scrollbar.thumb"); */
+  /* Handle color */
+  border-radius: 10px;
+}
+
+/* Handle on hover */
+::-webkit-scrollbar-thumb:hover {
+  background: transparent;
+  /* background: theme("colors.scrollbar.thumb-hover"); */
+  /* Handle color on hover */
+}
+
+.dark-scrollbar::-webkit-scrollbar-thumb {
+  background: transparent;
+  /* background: theme("colors.scrollbar.dark.thumb"); */
+  /* Handle color */
+  border-radius: 10px;
+}
+
+.dark-scrollbar::-webkit-scrollbar-thumb:hover {
+  background: transparent;
+  /* background: theme("colors.scrollbar.dark.thumb-hover"); */
+  /* Handle color on hover */
 }

 /* TEXTAREA */

+textarea::-webkit-scrollbar {
+  width: 8px;
+}
+
+textarea::-webkit-scrollbar-track {
+  background: var(--scrollbar-track);
+  border-radius: 4px;
+}
+
+textarea::-webkit-scrollbar-thumb {
+  background: var(--scrollbar-thumb);
+  border-radius: 4px;
+}
+
+textarea::-webkit-scrollbar-thumb:hover {
+  background: var(--scrollbar-thumb-hover);
+}
+
 textarea {
  resize: vertical;
+}
+
+/* For Firefox */
+textarea {
  scrollbar-width: thin;
  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
 }
--- a/web/src/ee/refresh-pages/admin/HooksPage/HookLogsModal.tsx
+++ b/web/src/ee/refresh-pages/admin/HooksPage/HookLogsModal.tsx
@@ -5,6 +5,7 @@ import { SvgDownload, SvgTextLines } from "@opal/icons";
 import Modal from "@/refresh-components/Modal";
 import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
 import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
+import { Hoverable } from "@opal/core";
 import { useHookExecutionLogs } from "@/ee/hooks/useHookExecutionLogs";
 import { formatDateTimeLog } from "@/lib/dateUtils";
 import { downloadFile } from "@/lib/download";
@@ -40,33 +41,40 @@ function SectionHeader({ label }: { label: string }) {
  );
 }

-function LogRow({ log }: { log: HookExecutionRecord }) {
+function LogRow({ log, group }: { log: HookExecutionRecord; group: string }) {
  return (
-    <Section
-      flexDirection="row"
-      justifyContent="start"
-      alignItems="start"
-      gap={0.5}
-      height="fit"
-      className="py-2"
-    >
-      {/* 1. Timestamp */}
-      <span className="shrink-0 text-code-code">
-        <Text font="secondary-mono-label" color="inherit" nowrap>
-          {formatDateTimeLog(log.created_at)}
-        </Text>
-      </span>
-      {/* 2. Error message */}
-      <span className="flex-1 min-w-0 break-all whitespace-pre-wrap text-code-code">
-        <Text font="secondary-mono" color="inherit">
-          {log.error_message ?? "Unknown error"}
-        </Text>
-      </span>
-      {/* 3. Copy button */}
-      <Section width="fit" height="fit" alignItems="center">
-        <CopyIconButton size="xs" getCopyText={() => log.error_message ?? ""} />
+    <Hoverable.Root group={group}>
+      <Section
+        flexDirection="row"
+        justifyContent="start"
+        alignItems="start"
+        gap={0.5}
+        height="fit"
+        className="py-2"
+      >
+        {/* 1. Timestamp */}
+        <span className="shrink-0 text-code-code">
+          <Text font="secondary-mono-label" color="inherit" nowrap>
+            {formatDateTimeLog(log.created_at)}
+          </Text>
+        </span>
+        {/* 2. Error message */}
+        <span className="flex-1 min-w-0 break-all whitespace-pre-wrap text-code-code">
+          <Text font="secondary-mono" color="inherit">
+            {log.error_message ?? "Unknown error"}
+          </Text>
+        </span>
+        {/* 3. Copy button */}
+        <Section width="fit" height="fit" alignItems="center">
+          <Hoverable.Item group={group} variant="opacity-on-hover">
+            <CopyIconButton
+              size="xs"
+              getCopyText={() => log.error_message ?? ""}
+            />
+          </Hoverable.Item>
+        </Section>
      </Section>
-    </Section>
+    </Hoverable.Root>
  );
 }

@@ -126,7 +134,11 @@ export default function HookLogsModal({ hook, spec }: HookLogsModalProps) {
                <>
                  <SectionHeader label="Past Hour" />
                  {recentErrors.map((log, idx) => (
-                    <LogRow key={log.created_at + String(idx)} log={log} />
+                    <LogRow
+                      key={log.created_at + String(idx)}
+                      log={log}
+                      group={log.created_at + String(idx)}
+                    />
                  ))}
                </>
              )}
@@ -134,7 +146,11 @@ export default function HookLogsModal({ hook, spec }: HookLogsModalProps) {
                <>
                  <SectionHeader label="Older" />
                  {olderErrors.map((log, idx) => (
-                    <LogRow key={log.created_at + String(idx)} log={log} />
+                    <LogRow
+                      key={log.created_at + String(idx)}
+                      log={log}
+                      group={log.created_at + String(idx)}
+                    />
                  ))}
                </>
              )}
--- a/web/src/ee/refresh-pages/admin/HooksPage/HookStatusPopover.tsx
+++ b/web/src/ee/refresh-pages/admin/HooksPage/HookStatusPopover.tsx
@@ -3,7 +3,7 @@
 import { useEffect, useRef, useState } from "react";
 import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
 import { noProp } from "@/lib/utils";
-import { formatTimeOnly } from "@/lib/dateUtils";
+import { formatDateTimeLog } from "@/lib/dateUtils";
 import { Button, Text } from "@opal/components";
 import { Content } from "@opal/layouts";
 import LineItem from "@/refresh-components/buttons/LineItem";
@@ -18,6 +18,7 @@ import {
  SvgXOctagon,
 } from "@opal/icons";
 import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
+import { Hoverable } from "@opal/core";
 import { useHookExecutionLogs } from "@/ee/hooks/useHookExecutionLogs";
 import HookLogsModal from "@/ee/refresh-pages/admin/HooksPage/HookLogsModal";
 import type {
@@ -26,6 +27,52 @@ import type {
 } from "@/ee/refresh-pages/admin/HooksPage/interfaces";
 import { cn } from "@opal/utils";

+function ErrorLogRow({
+  log,
+  group,
+}: {
+  log: { created_at: string; error_message: string | null };
+  group: string;
+}) {
+  return (
+    <Hoverable.Root group={group}>
+      <Section
+        flexDirection="column"
+        justifyContent="start"
+        alignItems="start"
+        gap={0.25}
+        padding={0.25}
+        height="fit"
+      >
+        <Section
+          flexDirection="row"
+          justifyContent="between"
+          alignItems="center"
+          gap={0}
+          height="fit"
+        >
+          <span className="text-code-code">
+            <Text font="secondary-mono-label" color="inherit">
+              {formatDateTimeLog(log.created_at)}
+            </Text>
+          </span>
+          <Hoverable.Item group={group} variant="opacity-on-hover">
+            <CopyIconButton
+              size="xs"
+              getCopyText={() => log.error_message ?? ""}
+            />
+          </Hoverable.Item>
+        </Section>
+        <span className="break-all">
+          <Text font="secondary-mono" color="text-03">
+            {log.error_message ?? "Unknown error"}
+          </Text>
+        </span>
+      </Section>
+    </Hoverable.Root>
+  );
+}
+
 interface HookStatusPopoverProps {
  hook: HookResponse;
  spec: HookPointMeta | undefined;
@@ -43,9 +90,16 @@ export default function HookStatusPopover({
  const [clickOpened, setClickOpened] = useState(false);
  const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);

-  const { hasRecentErrors, recentErrors, isLoading, error } =
+  const { hasRecentErrors, recentErrors, olderErrors, isLoading, error } =
    useHookExecutionLogs(hook.id);

+  const topErrors = [...recentErrors, ...olderErrors]
+    .sort(
+      (a, b) =>
+        new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
+    )
+    .slice(0, 3);
+
  useEffect(() => {
    return () => {
      if (closeTimerRef.current) clearTimeout(closeTimerRef.current);
@@ -162,7 +216,15 @@ export default function HookStatusPopover({
            justifyContent="start"
            alignItems="start"
            height="fit"
-            width={hasRecentErrors ? 20 : 12.5}
+            width={
+              hook.is_reachable === false
+                ? topErrors.length > 0
+                  ? 20
+                  : 12.5
+                : hasRecentErrors
+                  ? 20
+                  : 12.5
+            }
            padding={0.125}
            gap={0.25}
          >
@@ -174,13 +236,70 @@ export default function HookStatusPopover({
              <Text font="secondary-body" color="text-03">
                Failed to load logs.
              </Text>
+            ) : hook.is_reachable === false ? (
+              <>
+                <div className="p-1">
+                  <Content
+                    sizePreset="secondary"
+                    variant="section"
+                    icon={(props) => (
+                      <SvgXOctagon
+                        {...props}
+                        className="text-status-error-05"
+                      />
+                    )}
+                    title="Most Recent Errors"
+                  />
+                </div>
+
+                {topErrors.length > 0 ? (
+                  <>
+                    <Separator noPadding className="px-2" />
+
+                    <Section
+                      flexDirection="column"
+                      justifyContent="start"
+                      alignItems="start"
+                      gap={0.25}
+                      padding={0.25}
+                      height="fit"
+                    >
+                      {topErrors.map((log, idx) => (
+                        <ErrorLogRow
+                          key={log.created_at + String(idx)}
+                          log={log}
+                          group={log.created_at + String(idx)}
+                        />
+                      ))}
+                    </Section>
+                  </>
+                ) : (
+                  <Separator noPadding className="px-2" />
+                )}
+
+                <LineItem
+                  muted
+                  icon={SvgMaximize2}
+                  onClick={noProp(() => {
+                    handleOpenChange(false);
+                    logsModal.toggle(true);
+                  })}
+                >
+                  View More Lines
+                </LineItem>
+              </>
            ) : hasRecentErrors ? (
              <>
                <div className="p-1">
                  <Content
                    sizePreset="secondary"
                    variant="section"
-                    icon={SvgXOctagon}
+                    icon={(props) => (
+                      <SvgXOctagon
+                        {...props}
+                        className="text-status-error-05"
+                      />
+                    )}
                    title={
                      recentErrors.length <= 3
                        ? `${recentErrors.length} ${
@@ -204,38 +323,11 @@ export default function HookStatusPopover({
                  height="fit"
                >
                  {recentErrors.slice(0, 3).map((log, idx) => (
-                    <Section
+                    <ErrorLogRow
                      key={log.created_at + String(idx)}
-                      flexDirection="column"
-                      justifyContent="start"
-                      alignItems="start"
-                      gap={0.25}
-                      padding={0.25}
-                      height="fit"
-                    >
-                      <Section
-                        flexDirection="row"
-                        justifyContent="between"
-                        alignItems="center"
-                        gap={0}
-                        height="fit"
-                      >
-                        <span className="text-code-code">
-                          <Text font="secondary-mono-label" color="inherit">
-                            {formatTimeOnly(log.created_at)}
-                          </Text>
-                        </span>
-                        <CopyIconButton
-                          size="xs"
-                          getCopyText={() => log.error_message ?? ""}
-                        />
-                      </Section>
-                      <span className="break-all">
-                        <Text font="secondary-mono" color="text-03">
-                          {log.error_message ?? "Unknown error"}
-                        </Text>
-                      </span>
-                    </Section>
+                      log={log}
+                      group={log.created_at + String(idx)}
+                    />
                  ))}
                </Section>

--- a/web/src/ee/refresh-pages/admin/HooksPage/index.tsx
+++ b/web/src/ee/refresh-pages/admin/HooksPage/index.tsx
@@ -41,6 +41,7 @@ import {
  activateHook,
  deactivateHook,
  deleteHook,
+  getHook,
  validateHook,
 } from "@/ee/refresh-pages/admin/HooksPage/svc";
 import type {
@@ -319,9 +320,16 @@ function ConnectedHookCard({
      toast.error(
        err instanceof Error ? err.message : "Failed to validate hook."
      );
+      return;
    } finally {
      setIsBusy(false);
    }
+    try {
+      const updated = await getHook(hook.id);
+      onToggled(updated);
+    } catch (err) {
+      console.error("Failed to refresh hook after validation:", err);
+    }
  }

  const HookIcon = getHookPointIcon(hook.hook_point);
--- a/web/src/ee/refresh-pages/admin/HooksPage/svc.ts
+++ b/web/src/ee/refresh-pages/admin/HooksPage/svc.ts
@@ -87,6 +87,14 @@ export async function deactivateHook(id: number): Promise<HookResponse> {
  return res.json();
 }

+export async function getHook(id: number): Promise<HookResponse> {
+  const res = await fetch(`/api/admin/hooks/${id}`);
+  if (!res.ok) {
+    throw await parseError(res, "Failed to fetch hook");
+  }
+  return res.json();
+}
+
 export async function validateHook(id: number): Promise<HookValidateResponse> {
  const res = await fetch(`/api/admin/hooks/${id}/validate`, {
    method: "POST",