Compare commits

..

1 Commits

Author SHA1 Message Date
Bo-Onyx
084d875b3a feat(hook): improve disconnect error popover 2026-04-02 15:33:32 -07:00
12 changed files with 356 additions and 451 deletions

View File

@@ -1,14 +1,20 @@
from datetime import datetime
from datetime import timezone
from uuid import UUID
from celery import shared_task
from celery import Task
from ee.onyx.background.celery_utils import should_perform_chat_ttl_check
from ee.onyx.background.task_name_builders import name_chat_ttl_task
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.chat import delete_chat_session
from onyx.db.chat import get_chat_sessions_older_than
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import TaskStatus
from onyx.db.tasks import mark_task_as_finished_with_id
from onyx.db.tasks import register_task
from onyx.server.settings.store import load_settings
from onyx.utils.logger import setup_logger
@@ -23,16 +29,26 @@ logger = setup_logger()
trail=False,
)
def perform_ttl_management_task(
self: Task, retention_limit_days: int, *, tenant_id: str # noqa: ARG001
self: Task, retention_limit_days: int, *, tenant_id: str
) -> None:
task_id = self.request.id
if not task_id:
raise RuntimeError("No task id defined for this task; cannot identify it")
start_time = datetime.now(tz=timezone.utc)
user_id: UUID | None = None
session_id: UUID | None = None
try:
with get_session_with_current_tenant() as db_session:
# we generally want to move off this, but keeping for now
register_task(
db_session=db_session,
task_name=name_chat_ttl_task(retention_limit_days, tenant_id),
task_id=task_id,
status=TaskStatus.STARTED,
start_time=start_time,
)
old_chat_sessions = get_chat_sessions_older_than(
retention_limit_days, db_session
@@ -49,10 +65,23 @@ def perform_ttl_management_task(
hard_delete=True,
)
with get_session_with_current_tenant() as db_session:
mark_task_as_finished_with_id(
db_session=db_session,
task_id=task_id,
success=True,
)
except Exception:
logger.exception(
f"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}"
)
with get_session_with_current_tenant() as db_session:
mark_task_as_finished_with_id(
db_session=db_session,
task_id=task_id,
success=False,
)
raise

View File

@@ -36,7 +36,6 @@ from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapping
from onyx.db.opensearch_migration import get_vespa_visit_state
from onyx.db.opensearch_migration import is_migration_completed
from onyx.db.opensearch_migration import (
mark_migration_completed_time_if_not_set_with_commit,
)
@@ -107,19 +106,14 @@ def migrate_chunks_from_vespa_to_opensearch_task(
acquired; effectively a no-op. True if the task completed
successfully. False if the task errored.
"""
# 1. Check if we should run the task.
# 1.a. If OpenSearch indexing is disabled, we don't run the task.
if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
task_logger.warning(
"OpenSearch migration is not enabled, skipping chunk migration task."
)
return None
task_logger.info("Starting chunk-level migration from Vespa to OpenSearch.")
task_start_time = time.monotonic()
# 1.b. Only one instance per tenant of this task may run concurrently at
# once. If we fail to acquire a lock, we assume it is because another task
# has one and we exit.
r = get_redis_client()
lock: RedisLock = r.lock(
name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
@@ -142,11 +136,10 @@ def migrate_chunks_from_vespa_to_opensearch_task(
f"Token: {lock.local.token}"
)
# 2. Prepare to migrate.
total_chunks_migrated_this_task = 0
total_chunks_errored_this_task = 0
try:
# 2.a. Double-check that tenant info is correct.
# Double check that tenant info is correct.
if tenant_id != get_current_tenant_id():
err_str = (
f"Tenant ID mismatch in the OpenSearch migration task: "
@@ -155,62 +148,16 @@ def migrate_chunks_from_vespa_to_opensearch_task(
task_logger.error(err_str)
return False
# Do as much as we can with a DB session in one spot to not hold a
# session during a migration batch.
with get_session_with_current_tenant() as db_session:
# 2.b. Immediately check to see if this tenant is done, to save
# having to do any other work. This function does not require a
# migration record to necessarily exist.
if is_migration_completed(db_session):
return True
# 2.c. Try to insert the OpenSearchTenantMigrationRecord table if it
# does not exist.
with (
get_session_with_current_tenant() as db_session,
get_vespa_http_client(
timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
) as vespa_client,
):
try_insert_opensearch_tenant_migration_record_with_commit(db_session)
# 2.d. Get search settings.
search_settings = get_current_search_settings(db_session)
indexing_setting = IndexingSetting.from_db_model(search_settings)
# 2.e. Build sanitized to original doc ID mapping to check for
# conflicts in the event we sanitize a doc ID to an
# already-existing doc ID.
# We reconstruct this mapping for every task invocation because
# a document may have been added in the time between two tasks.
sanitized_doc_start_time = time.monotonic()
sanitized_to_original_doc_id_mapping = (
build_sanitized_to_original_doc_id_mapping(db_session)
)
task_logger.debug(
f"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries "
f"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds."
)
# 2.f. Get the current migration state.
continuation_token_map, total_chunks_migrated = get_vespa_visit_state(
db_session
)
# 2.f.1. Double-check that the migration state does not imply
# completion. Really we should never have to enter this block as we
# would expect is_migration_completed to return True, but in the
# strange event that the migration is complete but the migration
# completed time was never stamped, we do so here.
if is_continuation_token_done_for_all_slices(continuation_token_map):
task_logger.info(
f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
)
mark_migration_completed_time_if_not_set_with_commit(db_session)
return True
task_logger.debug(
f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
f"Continuation token map: {continuation_token_map}"
)
with get_vespa_http_client(
timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
) as vespa_client:
# 2.g. Create the OpenSearch and Vespa document indexes.
tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
indexing_setting = IndexingSetting.from_db_model(search_settings)
opensearch_document_index = OpenSearchDocumentIndex(
tenant_state=tenant_state,
index_name=search_settings.index_name,
@@ -224,14 +171,22 @@ def migrate_chunks_from_vespa_to_opensearch_task(
httpx_client=vespa_client,
)
# 2.h. Get the approximate chunk count in Vespa as of this time to
# update the migration record.
sanitized_doc_start_time = time.monotonic()
# We reconstruct this mapping for every task invocation because a
# document may have been added in the time between two tasks.
sanitized_to_original_doc_id_mapping = (
build_sanitized_to_original_doc_id_mapping(db_session)
)
task_logger.debug(
f"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries "
f"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds."
)
approx_chunk_count_in_vespa: int | None = None
get_chunk_count_start_time = time.monotonic()
try:
approx_chunk_count_in_vespa = vespa_document_index.get_chunk_count()
except Exception:
# This failure should not be blocking.
task_logger.exception(
"Error getting approximate chunk count in Vespa. Moving on..."
)
@@ -240,12 +195,25 @@ def migrate_chunks_from_vespa_to_opensearch_task(
f"approximate chunk count in Vespa. Got {approx_chunk_count_in_vespa}."
)
# 3. Do the actual migration in batches until we run out of time.
while (
time.monotonic() - task_start_time < MIGRATION_TASK_SOFT_TIME_LIMIT_S
and lock.owned()
):
# 3.a. Get the next batch of raw chunks from Vespa.
(
continuation_token_map,
total_chunks_migrated,
) = get_vespa_visit_state(db_session)
if is_continuation_token_done_for_all_slices(continuation_token_map):
task_logger.info(
f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
)
mark_migration_completed_time_if_not_set_with_commit(db_session)
break
task_logger.debug(
f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
f"Continuation token map: {continuation_token_map}"
)
get_vespa_chunks_start_time = time.monotonic()
raw_vespa_chunks, next_continuation_token_map = (
vespa_document_index.get_all_raw_document_chunks_paginated(
@@ -258,7 +226,6 @@ def migrate_chunks_from_vespa_to_opensearch_task(
f"seconds. Next continuation token map: {next_continuation_token_map}"
)
# 3.b. Transform the raw chunks to OpenSearch chunks in memory.
opensearch_document_chunks, errored_chunks = (
transform_vespa_chunks_to_opensearch_chunks(
raw_vespa_chunks,
@@ -273,7 +240,6 @@ def migrate_chunks_from_vespa_to_opensearch_task(
"errored."
)
# 3.c. Index the OpenSearch chunks into OpenSearch.
index_opensearch_chunks_start_time = time.monotonic()
opensearch_document_index.index_raw_chunks(
chunks=opensearch_document_chunks
@@ -285,38 +251,12 @@ def migrate_chunks_from_vespa_to_opensearch_task(
total_chunks_migrated_this_task += len(opensearch_document_chunks)
total_chunks_errored_this_task += len(errored_chunks)
# Do as much as we can with a DB session in one spot to not hold a
# session during a migration batch.
with get_session_with_current_tenant() as db_session:
# 3.d. Update the migration state.
update_vespa_visit_progress_with_commit(
db_session,
continuation_token_map=next_continuation_token_map,
chunks_processed=len(opensearch_document_chunks),
chunks_errored=len(errored_chunks),
approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
)
# 3.e. Get the current migration state. Even thought we
# technically have it in-memory since we just wrote it, we
# want to reference the DB as the source of truth at all
# times.
continuation_token_map, total_chunks_migrated = (
get_vespa_visit_state(db_session)
)
# 3.e.1. Check if the migration is done.
if is_continuation_token_done_for_all_slices(
continuation_token_map
):
task_logger.info(
f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
)
mark_migration_completed_time_if_not_set_with_commit(db_session)
return True
task_logger.debug(
f"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. "
f"Continuation token map: {continuation_token_map}"
update_vespa_visit_progress_with_commit(
db_session,
continuation_token_map=next_continuation_token_map,
chunks_processed=len(opensearch_document_chunks),
chunks_errored=len(errored_chunks),
approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,
)
except Exception:
traceback.print_exc()

View File

@@ -324,15 +324,6 @@ def mark_migration_completed_time_if_not_set_with_commit(
db_session.commit()
def is_migration_completed(db_session: Session) -> bool:
"""Returns True if the migration is completed.
Can be run even if the migration record does not exist.
"""
record = db_session.query(OpenSearchTenantMigrationRecord).first()
return record is not None and record.migration_completed_at is not None
def build_sanitized_to_original_doc_id_mapping(
db_session: Session,
) -> dict[str, str]:

View File

@@ -1,4 +1,3 @@
import hashlib
from datetime import datetime
from datetime import timezone
from typing import Any
@@ -21,13 +20,9 @@ from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.constants import EF_CONSTRUCTION
from onyx.document_index.opensearch.constants import EF_SEARCH
from onyx.document_index.opensearch.constants import M
from onyx.document_index.opensearch.string_filtering import DocumentIDTooLongError
from onyx.document_index.opensearch.string_filtering import (
filter_and_validate_document_id,
)
from onyx.document_index.opensearch.string_filtering import (
MAX_DOCUMENT_ID_ENCODED_LENGTH,
)
from onyx.utils.tenant import get_tenant_id_short_string
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
@@ -80,50 +75,17 @@ def get_opensearch_doc_chunk_id(
This will be the string used to identify the chunk in OpenSearch. Any direct
chunk queries should use this function.
If the document ID is too long, a hash of the ID is used instead.
"""
opensearch_doc_chunk_id_suffix: str = f"__{max_chunk_size}__{chunk_index}"
encoded_suffix_length: int = len(opensearch_doc_chunk_id_suffix.encode("utf-8"))
max_encoded_permissible_doc_id_length: int = (
MAX_DOCUMENT_ID_ENCODED_LENGTH - encoded_suffix_length
sanitized_document_id = filter_and_validate_document_id(document_id)
opensearch_doc_chunk_id = (
f"{sanitized_document_id}__{max_chunk_size}__{chunk_index}"
)
opensearch_doc_chunk_id_tenant_prefix: str = ""
if tenant_state.multitenant:
short_tenant_id: str = get_tenant_id_short_string(tenant_state.tenant_id)
# Use tenant ID because in multitenant mode each tenant has its own
# Documents table, so there is a very small chance that doc IDs are not
# actually unique across all tenants.
opensearch_doc_chunk_id_tenant_prefix = f"{short_tenant_id}__"
encoded_prefix_length: int = len(
opensearch_doc_chunk_id_tenant_prefix.encode("utf-8")
)
max_encoded_permissible_doc_id_length -= encoded_prefix_length
try:
sanitized_document_id: str = filter_and_validate_document_id(
document_id, max_encoded_length=max_encoded_permissible_doc_id_length
)
except DocumentIDTooLongError:
# If the document ID is too long, use a hash instead.
# We use blake2b because it is faster and equally secure as SHA256, and
# accepts digest_size which controls the number of bytes returned in the
# hash.
# digest_size is the size of the returned hash in bytes. Since we're
# decoding the hash bytes as a hex string, the digest_size should be
# half the max target size of the hash string.
# Subtract 1 because filter_and_validate_document_id compares on >= on
# max_encoded_length.
# 64 is the max digest_size blake2b returns.
digest_size: int = min((max_encoded_permissible_doc_id_length - 1) // 2, 64)
sanitized_document_id = hashlib.blake2b(
document_id.encode("utf-8"), digest_size=digest_size
).hexdigest()
opensearch_doc_chunk_id: str = (
f"{opensearch_doc_chunk_id_tenant_prefix}{sanitized_document_id}{opensearch_doc_chunk_id_suffix}"
)
short_tenant_id = get_tenant_id_short_string(tenant_state.tenant_id)
opensearch_doc_chunk_id = f"{short_tenant_id}__{opensearch_doc_chunk_id}"
# Do one more validation to ensure we haven't exceeded the max length.
opensearch_doc_chunk_id = filter_and_validate_document_id(opensearch_doc_chunk_id)
return opensearch_doc_chunk_id

View File

@@ -1,15 +1,7 @@
import re
MAX_DOCUMENT_ID_ENCODED_LENGTH: int = 512
class DocumentIDTooLongError(ValueError):
"""Raised when a document ID is too long for OpenSearch after filtering."""
def filter_and_validate_document_id(
document_id: str, max_encoded_length: int = MAX_DOCUMENT_ID_ENCODED_LENGTH
) -> str:
def filter_and_validate_document_id(document_id: str) -> str:
"""
Filters and validates a document ID such that it can be used as an ID in
OpenSearch.
@@ -27,13 +19,9 @@ def filter_and_validate_document_id(
Args:
document_id: The document ID to filter and validate.
max_encoded_length: The maximum length of the document ID after
filtering in bytes. Compared with >= for extra resilience, so
encoded values of this length will fail.
Raises:
DocumentIDTooLongError: If the document ID is too long after filtering.
ValueError: If the document ID is empty after filtering.
ValueError: If the document ID is empty or too long after filtering.
Returns:
str: The filtered document ID.
@@ -41,8 +29,6 @@ def filter_and_validate_document_id(
filtered_document_id = re.sub(r"[^A-Za-z0-9_.\-~]", "", document_id)
if not filtered_document_id:
raise ValueError(f"Document ID {document_id} is empty after filtering.")
if len(filtered_document_id.encode("utf-8")) >= max_encoded_length:
raise DocumentIDTooLongError(
f"Document ID {document_id} is too long after filtering."
)
if len(filtered_document_id.encode("utf-8")) >= 512:
raise ValueError(f"Document ID {document_id} is too long after filtering.")
return filtered_document_id

View File

@@ -1,203 +0,0 @@
import pytest
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.string_filtering import (
MAX_DOCUMENT_ID_ENCODED_LENGTH,
)
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
SINGLE_TENANT_STATE = TenantState(
tenant_id=POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE, multitenant=False
)
MULTI_TENANT_STATE = TenantState(
tenant_id="tenant_abcdef12-3456-7890-abcd-ef1234567890", multitenant=True
)
EXPECTED_SHORT_TENANT = "abcdef12"
class TestGetOpensearchDocChunkIdSingleTenant:
def test_basic(self) -> None:
result = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, "my-doc-id", chunk_index=0
)
assert result == f"my-doc-id__{DEFAULT_MAX_CHUNK_SIZE}__0"
def test_custom_chunk_size(self) -> None:
result = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, "doc1", chunk_index=3, max_chunk_size=1024
)
assert result == "doc1__1024__3"
def test_special_chars_are_stripped(self) -> None:
"""Tests characters not matching [A-Za-z0-9_.-~] are removed."""
result = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, "doc/with?special#chars&more%stuff", chunk_index=0
)
assert "/" not in result
assert "?" not in result
assert "#" not in result
assert result == f"docwithspecialcharsmorestuff__{DEFAULT_MAX_CHUNK_SIZE}__0"
def test_short_doc_id_not_hashed(self) -> None:
"""
Tests that a short doc ID should appear directly in the result, not as a
hash.
"""
doc_id = "short-id"
result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
assert "short-id" in result
def test_long_doc_id_is_hashed(self) -> None:
"""
Tests that a doc ID exceeding the max length should be replaced with a
blake2b hash.
"""
# Create a doc ID that will exceed max length after the suffix is
# appended.
doc_id = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
# The original doc ID should NOT appear in the result.
assert doc_id not in result
# The suffix should still be present.
assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
def test_long_doc_id_hash_is_deterministic(self) -> None:
doc_id = "x" * MAX_DOCUMENT_ID_ENCODED_LENGTH
result1 = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, doc_id, chunk_index=5
)
result2 = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, doc_id, chunk_index=5
)
assert result1 == result2
def test_long_doc_id_different_inputs_produce_different_hashes(self) -> None:
doc_id_a = "a" * MAX_DOCUMENT_ID_ENCODED_LENGTH
doc_id_b = "b" * MAX_DOCUMENT_ID_ENCODED_LENGTH
result_a = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, doc_id_a, chunk_index=0
)
result_b = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, doc_id_b, chunk_index=0
)
assert result_a != result_b
def test_result_never_exceeds_max_length(self) -> None:
"""
Tests that the final result should always be under
MAX_DOCUMENT_ID_ENCODED_LENGTH bytes.
"""
doc_id = "z" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
result = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
)
assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
def test_no_tenant_prefix_in_single_tenant(self) -> None:
result = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, "mydoc", chunk_index=0
)
assert not result.startswith(SINGLE_TENANT_STATE.tenant_id)
class TestGetOpensearchDocChunkIdMultiTenant:
def test_includes_tenant_prefix(self) -> None:
result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, "mydoc", chunk_index=0)
assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
def test_format(self) -> None:
result = get_opensearch_doc_chunk_id(
MULTI_TENANT_STATE, "mydoc", chunk_index=2, max_chunk_size=256
)
assert result == f"{EXPECTED_SHORT_TENANT}__mydoc__256__2"
def test_long_doc_id_is_hashed_multitenant(self) -> None:
doc_id = "d" * MAX_DOCUMENT_ID_ENCODED_LENGTH
result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
# Should still have tenant prefix.
assert result.startswith(f"{EXPECTED_SHORT_TENANT}__")
# The original doc ID should NOT appear in the result.
assert doc_id not in result
# The suffix should still be present.
assert f"__{DEFAULT_MAX_CHUNK_SIZE}__0" in result
def test_result_never_exceeds_max_length_multitenant(self) -> None:
doc_id = "q" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)
result = get_opensearch_doc_chunk_id(
MULTI_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999
)
assert len(result.encode("utf-8")) < MAX_DOCUMENT_ID_ENCODED_LENGTH
def test_different_tenants_produce_different_ids(self) -> None:
tenant_a = TenantState(
tenant_id="tenant_aaaaaaaa-0000-0000-0000-000000000000", multitenant=True
)
tenant_b = TenantState(
tenant_id="tenant_bbbbbbbb-0000-0000-0000-000000000000", multitenant=True
)
result_a = get_opensearch_doc_chunk_id(tenant_a, "same-doc", chunk_index=0)
result_b = get_opensearch_doc_chunk_id(tenant_b, "same-doc", chunk_index=0)
assert result_a != result_b
class TestGetOpensearchDocChunkIdEdgeCases:
def test_chunk_index_zero(self) -> None:
result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "doc", chunk_index=0)
assert result.endswith("__0")
def test_large_chunk_index(self) -> None:
result = get_opensearch_doc_chunk_id(
SINGLE_TENANT_STATE, "doc", chunk_index=99999
)
assert result.endswith("__99999")
def test_doc_id_with_only_special_chars_raises(self) -> None:
"""
Tests that a doc ID that becomes empty after filtering should raise
ValueError.
"""
with pytest.raises(ValueError, match="empty after filtering"):
get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, "###???///", chunk_index=0)
def test_doc_id_at_boundary_length(self) -> None:
"""
Tests that a doc ID right at the boundary should not be hashed.
"""
suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
suffix_len = len(suffix.encode("utf-8"))
# Max doc ID length that won't trigger hashing (must be <
# max_encoded_length).
max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - 1
doc_id = "a" * max_doc_len
result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
assert doc_id in result
def test_doc_id_at_boundary_length_multitenant(self) -> None:
"""
Tests that a doc ID right at the boundary should not be hashed in
multitenant mode.
"""
suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
suffix_len = len(suffix.encode("utf-8"))
prefix = f"{EXPECTED_SHORT_TENANT}__"
prefix_len = len(prefix.encode("utf-8"))
# Max doc ID length that won't trigger hashing (must be <
# max_encoded_length).
max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - prefix_len - 1
doc_id = "a" * max_doc_len
result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)
assert doc_id in result
def test_doc_id_one_over_boundary_is_hashed(self) -> None:
"""
Tests that a doc ID one byte over the boundary should be hashed.
"""
suffix = f"__{DEFAULT_MAX_CHUNK_SIZE}__0"
suffix_len = len(suffix.encode("utf-8"))
# This length will trigger the >= check in filter_and_validate_document_id
doc_id = "a" * (MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len)
result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)
assert doc_id not in result

View File

@@ -467,10 +467,6 @@
/* Frost Overlay (for FrostedDiv component) - lighter in light mode */
--frost-overlay: var(--alpha-grey-00-10);
/* Scrollbar */
--scrollbar-track: transparent;
--scrollbar-thumb: var(--alpha-grey-100-20);
}
/* Dark Colors */
@@ -675,8 +671,4 @@
/* Frost Overlay (for FrostedDiv component) - darker in dark mode */
--frost-overlay: var(--alpha-grey-100-10);
/* Scrollbar */
--scrollbar-track: transparent;
--scrollbar-thumb: var(--alpha-grey-00-20);
}

View File

@@ -127,8 +127,17 @@
}
@layer utilities {
/* Hide scrollbar for Chrome, Safari and Opera */
.no-scrollbar::-webkit-scrollbar {
display: none;
}
/* Hide scrollbar for IE, Edge and Firefox */
.no-scrollbar {
-ms-overflow-style: none;
/* IE and Edge */
scrollbar-width: none;
/* Firefox */
}
/* SHADOWS */
@@ -353,9 +362,27 @@
/* SCROLL BAR */
.default-scrollbar::-webkit-scrollbar {
width: 6px;
}
.default-scrollbar::-webkit-scrollbar-track {
background: #f1f1f1;
}
.default-scrollbar::-webkit-scrollbar-thumb {
background: #888;
border-radius: 4px;
}
.default-scrollbar::-webkit-scrollbar-thumb:hover {
background: #555;
}
.default-scrollbar {
scrollbar-width: thin;
scrollbar-color: #888 transparent;
overflow: overlay;
overflow-y: scroll;
overflow-x: hidden;
}
@@ -365,21 +392,78 @@
height: 100%;
}
.inputscroll {
.inputscroll::-webkit-scrollbar-track {
background: #e5e7eb;
scrollbar-width: none;
}
/* Ensure native scrollbars are visible */
@layer base {
* {
scrollbar-width: auto;
}
::-webkit-scrollbar {
width: 0px;
/* Vertical scrollbar width */
height: 8px;
/* Horizontal scrollbar height */
}
::-webkit-scrollbar-track {
background: transparent;
/* background: theme("colors.scrollbar.track"); */
/* Track background color */
}
/* Style the scrollbar handle */
::-webkit-scrollbar-thumb {
background: transparent;
/* background: theme("colors.scrollbar.thumb"); */
/* Handle color */
border-radius: 10px;
}
/* Handle on hover */
::-webkit-scrollbar-thumb:hover {
background: transparent;
/* background: theme("colors.scrollbar.thumb-hover"); */
/* Handle color on hover */
}
.dark-scrollbar::-webkit-scrollbar-thumb {
background: transparent;
/* background: theme("colors.scrollbar.dark.thumb"); */
/* Handle color */
border-radius: 10px;
}
.dark-scrollbar::-webkit-scrollbar-thumb:hover {
background: transparent;
/* background: theme("colors.scrollbar.dark.thumb-hover"); */
/* Handle color on hover */
}
/* TEXTAREA */
textarea::-webkit-scrollbar {
width: 8px;
}
textarea::-webkit-scrollbar-track {
background: var(--scrollbar-track);
border-radius: 4px;
}
textarea::-webkit-scrollbar-thumb {
background: var(--scrollbar-thumb);
border-radius: 4px;
}
textarea::-webkit-scrollbar-thumb:hover {
background: var(--scrollbar-thumb-hover);
}
textarea {
resize: vertical;
}
/* For Firefox */
textarea {
scrollbar-width: thin;
scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
}

View File

@@ -5,6 +5,7 @@ import { SvgDownload, SvgTextLines } from "@opal/icons";
import Modal from "@/refresh-components/Modal";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { Hoverable } from "@opal/core";
import { useHookExecutionLogs } from "@/ee/hooks/useHookExecutionLogs";
import { formatDateTimeLog } from "@/lib/dateUtils";
import { downloadFile } from "@/lib/download";
@@ -40,33 +41,40 @@ function SectionHeader({ label }: { label: string }) {
);
}
function LogRow({ log }: { log: HookExecutionRecord }) {
function LogRow({ log, group }: { log: HookExecutionRecord; group: string }) {
return (
<Section
flexDirection="row"
justifyContent="start"
alignItems="start"
gap={0.5}
height="fit"
className="py-2"
>
{/* 1. Timestamp */}
<span className="shrink-0 text-code-code">
<Text font="secondary-mono-label" color="inherit" nowrap>
{formatDateTimeLog(log.created_at)}
</Text>
</span>
{/* 2. Error message */}
<span className="flex-1 min-w-0 break-all whitespace-pre-wrap text-code-code">
<Text font="secondary-mono" color="inherit">
{log.error_message ?? "Unknown error"}
</Text>
</span>
{/* 3. Copy button */}
<Section width="fit" height="fit" alignItems="center">
<CopyIconButton size="xs" getCopyText={() => log.error_message ?? ""} />
<Hoverable.Root group={group}>
<Section
flexDirection="row"
justifyContent="start"
alignItems="start"
gap={0.5}
height="fit"
className="py-2"
>
{/* 1. Timestamp */}
<span className="shrink-0 text-code-code">
<Text font="secondary-mono-label" color="inherit" nowrap>
{formatDateTimeLog(log.created_at)}
</Text>
</span>
{/* 2. Error message */}
<span className="flex-1 min-w-0 break-all whitespace-pre-wrap text-code-code">
<Text font="secondary-mono" color="inherit">
{log.error_message ?? "Unknown error"}
</Text>
</span>
{/* 3. Copy button */}
<Section width="fit" height="fit" alignItems="center">
<Hoverable.Item group={group} variant="opacity-on-hover">
<CopyIconButton
size="xs"
getCopyText={() => log.error_message ?? ""}
/>
</Hoverable.Item>
</Section>
</Section>
</Section>
</Hoverable.Root>
);
}
@@ -126,7 +134,11 @@ export default function HookLogsModal({ hook, spec }: HookLogsModalProps) {
<>
<SectionHeader label="Past Hour" />
{recentErrors.map((log, idx) => (
<LogRow key={log.created_at + String(idx)} log={log} />
<LogRow
key={log.created_at + String(idx)}
log={log}
group={log.created_at + String(idx)}
/>
))}
</>
)}
@@ -134,7 +146,11 @@ export default function HookLogsModal({ hook, spec }: HookLogsModalProps) {
<>
<SectionHeader label="Older" />
{olderErrors.map((log, idx) => (
<LogRow key={log.created_at + String(idx)} log={log} />
<LogRow
key={log.created_at + String(idx)}
log={log}
group={log.created_at + String(idx)}
/>
))}
</>
)}

View File

@@ -3,7 +3,7 @@
import { useEffect, useRef, useState } from "react";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { noProp } from "@/lib/utils";
import { formatTimeOnly } from "@/lib/dateUtils";
import { formatDateTimeLog } from "@/lib/dateUtils";
import { Button, Text } from "@opal/components";
import { Content } from "@opal/layouts";
import LineItem from "@/refresh-components/buttons/LineItem";
@@ -18,6 +18,7 @@ import {
SvgXOctagon,
} from "@opal/icons";
import CopyIconButton from "@/refresh-components/buttons/CopyIconButton";
import { Hoverable } from "@opal/core";
import { useHookExecutionLogs } from "@/ee/hooks/useHookExecutionLogs";
import HookLogsModal from "@/ee/refresh-pages/admin/HooksPage/HookLogsModal";
import type {
@@ -26,6 +27,52 @@ import type {
} from "@/ee/refresh-pages/admin/HooksPage/interfaces";
import { cn } from "@opal/utils";
function ErrorLogRow({
log,
group,
}: {
log: { created_at: string; error_message: string | null };
group: string;
}) {
return (
<Hoverable.Root group={group}>
<Section
flexDirection="column"
justifyContent="start"
alignItems="start"
gap={0.25}
padding={0.25}
height="fit"
>
<Section
flexDirection="row"
justifyContent="between"
alignItems="center"
gap={0}
height="fit"
>
<span className="text-code-code">
<Text font="secondary-mono-label" color="inherit">
{formatDateTimeLog(log.created_at)}
</Text>
</span>
<Hoverable.Item group={group} variant="opacity-on-hover">
<CopyIconButton
size="xs"
getCopyText={() => log.error_message ?? ""}
/>
</Hoverable.Item>
</Section>
<span className="break-all">
<Text font="secondary-mono" color="text-03">
{log.error_message ?? "Unknown error"}
</Text>
</span>
</Section>
</Hoverable.Root>
);
}
interface HookStatusPopoverProps {
hook: HookResponse;
spec: HookPointMeta | undefined;
@@ -43,9 +90,16 @@ export default function HookStatusPopover({
const [clickOpened, setClickOpened] = useState(false);
const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const { hasRecentErrors, recentErrors, isLoading, error } =
const { hasRecentErrors, recentErrors, olderErrors, isLoading, error } =
useHookExecutionLogs(hook.id);
const topErrors = [...recentErrors, ...olderErrors]
.sort(
(a, b) =>
new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
)
.slice(0, 3);
useEffect(() => {
return () => {
if (closeTimerRef.current) clearTimeout(closeTimerRef.current);
@@ -162,7 +216,15 @@ export default function HookStatusPopover({
justifyContent="start"
alignItems="start"
height="fit"
width={hasRecentErrors ? 20 : 12.5}
width={
hook.is_reachable === false
? topErrors.length > 0
? 20
: 12.5
: hasRecentErrors
? 20
: 12.5
}
padding={0.125}
gap={0.25}
>
@@ -174,13 +236,70 @@ export default function HookStatusPopover({
<Text font="secondary-body" color="text-03">
Failed to load logs.
</Text>
) : hook.is_reachable === false ? (
<>
<div className="p-1">
<Content
sizePreset="secondary"
variant="section"
icon={(props) => (
<SvgXOctagon
{...props}
className="text-status-error-05"
/>
)}
title="Most Recent Errors"
/>
</div>
{topErrors.length > 0 ? (
<>
<Separator noPadding className="px-2" />
<Section
flexDirection="column"
justifyContent="start"
alignItems="start"
gap={0.25}
padding={0.25}
height="fit"
>
{topErrors.map((log, idx) => (
<ErrorLogRow
key={log.created_at + String(idx)}
log={log}
group={log.created_at + String(idx)}
/>
))}
</Section>
</>
) : (
<Separator noPadding className="px-2" />
)}
<LineItem
muted
icon={SvgMaximize2}
onClick={noProp(() => {
handleOpenChange(false);
logsModal.toggle(true);
})}
>
View More Lines
</LineItem>
</>
) : hasRecentErrors ? (
<>
<div className="p-1">
<Content
sizePreset="secondary"
variant="section"
icon={SvgXOctagon}
icon={(props) => (
<SvgXOctagon
{...props}
className="text-status-error-05"
/>
)}
title={
recentErrors.length <= 3
? `${recentErrors.length} ${
@@ -204,38 +323,11 @@ export default function HookStatusPopover({
height="fit"
>
{recentErrors.slice(0, 3).map((log, idx) => (
<Section
<ErrorLogRow
key={log.created_at + String(idx)}
flexDirection="column"
justifyContent="start"
alignItems="start"
gap={0.25}
padding={0.25}
height="fit"
>
<Section
flexDirection="row"
justifyContent="between"
alignItems="center"
gap={0}
height="fit"
>
<span className="text-code-code">
<Text font="secondary-mono-label" color="inherit">
{formatTimeOnly(log.created_at)}
</Text>
</span>
<CopyIconButton
size="xs"
getCopyText={() => log.error_message ?? ""}
/>
</Section>
<span className="break-all">
<Text font="secondary-mono" color="text-03">
{log.error_message ?? "Unknown error"}
</Text>
</span>
</Section>
log={log}
group={log.created_at + String(idx)}
/>
))}
</Section>

View File

@@ -41,6 +41,7 @@ import {
activateHook,
deactivateHook,
deleteHook,
getHook,
validateHook,
} from "@/ee/refresh-pages/admin/HooksPage/svc";
import type {
@@ -319,9 +320,16 @@ function ConnectedHookCard({
toast.error(
err instanceof Error ? err.message : "Failed to validate hook."
);
return;
} finally {
setIsBusy(false);
}
try {
const updated = await getHook(hook.id);
onToggled(updated);
} catch (err) {
console.error("Failed to refresh hook after validation:", err);
}
}
const HookIcon = getHookPointIcon(hook.hook_point);

View File

@@ -87,6 +87,14 @@ export async function deactivateHook(id: number): Promise<HookResponse> {
return res.json();
}
export async function getHook(id: number): Promise<HookResponse> {
const res = await fetch(`/api/admin/hooks/${id}`);
if (!res.ok) {
throw await parseError(res, "Failed to fetch hook");
}
return res.json();
}
export async function validateHook(id: number): Promise<HookValidateResponse> {
const res = await fetch(`/api/admin/hooks/${id}/validate`, {
method: "POST",