Compare commits

..

1 Commits

Author SHA1 Message Date
Jamison Lahman
b9022e74ae WIP 2026-03-18 08:40:59 -07:00
57 changed files with 2472 additions and 1864 deletions

View File

@@ -317,7 +317,6 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.docprocessing",
"onyx.background.celery.tasks.evals",
"onyx.background.celery.tasks.hierarchyfetching",
"onyx.background.celery.tasks.hooks",
"onyx.background.celery.tasks.periodic",
"onyx.background.celery.tasks.pruning",
"onyx.background.celery.tasks.shared",

View File

@@ -14,7 +14,6 @@ from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.hooks.utils import HOOKS_AVAILABLE
from shared_configs.configs import MULTI_TENANT
# choosing 15 minutes because it roughly gives us enough time to process many tasks
@@ -362,19 +361,6 @@ if not MULTI_TENANT:
tasks_to_schedule.extend(beat_task_templates)
if HOOKS_AVAILABLE:
tasks_to_schedule.append(
{
"name": "hook-execution-log-cleanup",
"task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
"schedule": timedelta(days=1),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
},
}
)
def generate_cloud_tasks(
beat_tasks: list[dict], beat_templates: list[dict], beat_multiplier: float

View File

@@ -1,35 +0,0 @@
from celery import shared_task
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.hook import cleanup_old_execution_logs__no_commit
from onyx.utils.logger import setup_logger
logger = setup_logger()
_HOOK_EXECUTION_LOG_RETENTION_DAYS: int = 30
@shared_task(
name=OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
trail=False,
)
def hook_execution_log_cleanup_task(*, tenant_id: str) -> None: # noqa: ARG001
try:
with get_session_with_current_tenant() as db_session:
deleted: int = cleanup_old_execution_logs__no_commit(
db_session=db_session,
max_age_days=_HOOK_EXECUTION_LOG_RETENTION_DAYS,
)
db_session.commit()
if deleted:
logger.info(
f"Deleted {deleted} hook execution log(s) older than "
f"{_HOOK_EXECUTION_LOG_RETENTION_DAYS} days."
)
except Exception:
logger.exception("Failed to clean up hook execution logs")
raise

View File

@@ -297,9 +297,7 @@ class PostgresCacheBackend(CacheBackend):
def _lock_id_for(self, name: str) -> int:
"""Map *name* to a 64-bit signed int for ``pg_advisory_lock``."""
h = hashlib.md5(
f"{self._tenant_id}:{name}".encode(), usedforsecurity=False
).digest()
h = hashlib.md5(f"{self._tenant_id}:{name}".encode()).digest()
return struct.unpack("q", h[:8])[0]

View File

@@ -318,17 +318,6 @@ VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(
os.environ.get("OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE") or 500
)
# If set, will override the default number of shards and replicas for the index.
OPENSEARCH_INDEX_NUM_SHARDS: int | None = (
int(os.environ["OPENSEARCH_INDEX_NUM_SHARDS"])
if os.environ.get("OPENSEARCH_INDEX_NUM_SHARDS", None) is not None
else None
)
OPENSEARCH_INDEX_NUM_REPLICAS: int | None = (
int(os.environ["OPENSEARCH_INDEX_NUM_REPLICAS"])
if os.environ.get("OPENSEARCH_INDEX_NUM_REPLICAS", None) is not None
else None
)
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
# NOTE: this is used if and only if the vespa config server is accessible via a

View File

@@ -597,9 +597,6 @@ class OnyxCeleryTask:
EXPORT_QUERY_HISTORY_TASK = "export_query_history_task"
EXPORT_QUERY_HISTORY_CLEANUP_TASK = "export_query_history_cleanup_task"
# Hook execution log retention
HOOK_EXECUTION_LOG_CLEANUP_TASK = "hook_execution_log_cleanup_task"
# Sandbox cleanup
CLEANUP_IDLE_SANDBOXES = "cleanup_idle_sandboxes"
CLEANUP_OLD_SNAPSHOTS = "cleanup_old_snapshots"

View File

@@ -18,7 +18,6 @@ from onyx.configs.app_configs import OPENSEARCH_HOST
from onyx.configs.app_configs import OPENSEARCH_REST_API_PORT
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW
from onyx.utils.logger import setup_logger
@@ -57,8 +56,8 @@ class SearchHit(BaseModel, Generic[SchemaDocumentModel]):
# Maps schema property name to a list of highlighted snippets with match
# terms wrapped in tags (e.g. "something <hi>keyword</hi> other thing").
match_highlights: dict[str, list[str]] = {}
# Score explanation from OpenSearch when "explain": true is set in the
# query. Contains detailed breakdown of how the score was calculated.
# Score explanation from OpenSearch when "explain": true is set in the query.
# Contains detailed breakdown of how the score was calculated.
explanation: dict[str, Any] | None = None
@@ -834,13 +833,9 @@ class OpenSearchIndexClient(OpenSearchClient):
@log_function_time(print_only=True, debug_only=True)
def search(
self, body: dict[str, Any], search_pipeline_id: str | None
) -> list[SearchHit[DocumentChunkWithoutVectors]]:
) -> list[SearchHit[DocumentChunk]]:
"""Searches the index.
NOTE: Does not return vector fields. In order to take advantage of
performance benefits, the search body should exclude the schema's vector
fields.
TODO(andrei): Ideally we could check that every field in the body is
present in the index, to avoid a class of runtime bugs that could easily
be caught during development. Or change the function signature to accept
@@ -888,7 +883,7 @@ class OpenSearchIndexClient(OpenSearchClient):
raise_on_timeout=True,
)
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
search_hits: list[SearchHit[DocumentChunk]] = []
for hit in hits:
document_chunk_source: dict[str, Any] | None = hit.get("_source")
if not document_chunk_source:
@@ -898,10 +893,8 @@ class OpenSearchIndexClient(OpenSearchClient):
document_chunk_score = hit.get("_score", None)
match_highlights: dict[str, list[str]] = hit.get("highlight", {})
explanation: dict[str, Any] | None = hit.get("_explanation", None)
search_hit = SearchHit[DocumentChunkWithoutVectors](
document_chunk=DocumentChunkWithoutVectors.model_validate(
document_chunk_source
),
search_hit = SearchHit[DocumentChunk](
document_chunk=DocumentChunk.model_validate(document_chunk_source),
score=document_chunk_score,
match_highlights=match_highlights,
explanation=explanation,

View File

@@ -47,7 +47,6 @@ from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
from onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import DocumentSchema
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME
@@ -118,7 +117,7 @@ def set_cluster_state(client: OpenSearchClient) -> None:
def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
chunk: DocumentChunkWithoutVectors,
chunk: DocumentChunk,
score: float | None,
highlights: dict[str, list[str]],
) -> InferenceChunkUncleaned:
@@ -881,7 +880,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
)
results: list[InferenceChunk] = []
for chunk_request in chunk_requests:
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
search_hits: list[SearchHit[DocumentChunk]] = []
query_body = DocumentQuery.get_from_document_id_query(
document_id=chunk_request.document_id,
tenant_state=self._tenant_state,
@@ -945,7 +944,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
include_hidden=False,
)
normalization_pipeline_name, _ = get_normalization_pipeline_name_and_config()
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
body=query_body,
search_pipeline_id=normalization_pipeline_name,
)
@@ -977,7 +976,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
index_filters=filters,
num_to_retrieve=num_to_retrieve,
)
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
body=query_body,
search_pipeline_id=None,
)

View File

@@ -11,8 +11,6 @@ from pydantic import model_serializer
from pydantic import model_validator
from pydantic import SerializerFunctionWrapHandler
from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_REPLICAS
from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_SHARDS
from onyx.configs.app_configs import OPENSEARCH_TEXT_ANALYZER
from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
from onyx.document_index.interfaces_new import TenantState
@@ -102,9 +100,9 @@ def set_or_convert_timezone_to_utc(value: datetime) -> datetime:
return value
class DocumentChunkWithoutVectors(BaseModel):
class DocumentChunk(BaseModel):
"""
Represents a chunk of a document in the OpenSearch index without vectors.
Represents a chunk of a document in the OpenSearch index.
The names of these fields are based on the OpenSearch schema. Changes to the
schema require changes here. See get_document_schema.
@@ -126,7 +124,9 @@ class DocumentChunkWithoutVectors(BaseModel):
# Either both should be None or both should be non-None.
title: str | None = None
title_vector: list[float] | None = None
content: str
content_vector: list[float]
source_type: str
# A list of key-value pairs separated by INDEX_SEPARATOR. See
@@ -176,9 +176,19 @@ class DocumentChunkWithoutVectors(BaseModel):
def __str__(self) -> str:
return (
f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
f"content length={len(self.content)}, tenant_id={self.tenant_id.tenant_id})."
f"content length={len(self.content)}, content vector length={len(self.content_vector)}, "
f"tenant_id={self.tenant_id.tenant_id})"
)
@model_validator(mode="after")
def check_title_and_title_vector_are_consistent(self) -> Self:
# title and title_vector should both either be None or not.
if self.title is not None and self.title_vector is None:
raise ValueError("Bug: Title vector must not be None if title is not None.")
if self.title_vector is not None and self.title is None:
raise ValueError("Bug: Title must not be None if title vector is not None.")
return self
@model_serializer(mode="wrap")
def serialize_model(
self, handler: SerializerFunctionWrapHandler
@@ -295,35 +305,6 @@ class DocumentChunkWithoutVectors(BaseModel):
return TenantState(tenant_id=value, multitenant=MULTI_TENANT)
class DocumentChunk(DocumentChunkWithoutVectors):
"""Represents a chunk of a document in the OpenSearch index.
The names of these fields are based on the OpenSearch schema. Changes to the
schema require changes here. See get_document_schema.
"""
model_config = {"frozen": True}
title_vector: list[float] | None = None
content_vector: list[float]
def __str__(self) -> str:
return (
f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
f"content length={len(self.content)}, content vector length={len(self.content_vector)}, "
f"tenant_id={self.tenant_id.tenant_id})"
)
@model_validator(mode="after")
def check_title_and_title_vector_are_consistent(self) -> Self:
# title and title_vector should both either be None or not.
if self.title is not None and self.title_vector is None:
raise ValueError("Bug: Title vector must not be None if title is not None.")
if self.title_vector is not None and self.title is None:
raise ValueError("Bug: Title must not be None if title vector is not None.")
return self
class DocumentSchema:
"""
Represents the schema and indexing strategies of the OpenSearch index.
@@ -536,34 +517,77 @@ class DocumentSchema:
return schema
@staticmethod
def get_index_settings_based_on_environment() -> dict[str, Any]:
def get_index_settings() -> dict[str, Any]:
"""
Returns the index settings based on the environment.
Standard settings for reasonable local index and search performance.
"""
if USING_AWS_MANAGED_OPENSEARCH:
# NOTE: The number of data copies, including the primary (not a
# replica) copy, must be divisible by the number of AZs.
if MULTI_TENANT:
number_of_shards = 324
number_of_replicas = 2
else:
number_of_shards = 3
number_of_replicas = 2
else:
number_of_shards = 1
number_of_replicas = 1
if OPENSEARCH_INDEX_NUM_SHARDS is not None:
number_of_shards = OPENSEARCH_INDEX_NUM_SHARDS
if OPENSEARCH_INDEX_NUM_REPLICAS is not None:
number_of_replicas = OPENSEARCH_INDEX_NUM_REPLICAS
return {
"index": {
"number_of_shards": number_of_shards,
"number_of_replicas": number_of_replicas,
"number_of_shards": 1,
"number_of_replicas": 1,
# Required for vector search.
"knn": True,
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_for_aws_managed_opensearch_st_dev() -> dict[str, Any]:
"""
Settings for AWS-managed OpenSearch.
Our AWS-managed OpenSearch cluster has 3 data nodes in 3 availability
zones.
- We use 3 shards to distribute load across all data nodes.
- We use 2 replicas to ensure each shard has a copy in each
availability zone. This is a hard requirement from AWS. The number
of data copies, including the primary (not a replica) copy, must be
divisible by the number of AZs.
"""
return {
"index": {
"number_of_shards": 3,
"number_of_replicas": 2,
# Required for vector search.
"knn": True,
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_for_aws_managed_opensearch_mt_cloud() -> dict[str, Any]:
"""
Settings for AWS-managed OpenSearch in multi-tenant cloud.
324 shards very roughly targets a storage load of ~30Gb per shard, which
according to AWS OpenSearch documentation is within a good target range.
As documented above we need 2 replicas for a total of 3 copies of the
data because the cluster is configured with 3-AZ awareness.
"""
return {
"index": {
"number_of_shards": 324,
"number_of_replicas": 2,
# Required for vector search.
"knn": True,
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_based_on_environment() -> dict[str, Any]:
"""
Returns the index settings based on the environment.
"""
if USING_AWS_MANAGED_OPENSEARCH:
if MULTI_TENANT:
return (
DocumentSchema.get_index_settings_for_aws_managed_opensearch_mt_cloud()
)
else:
return (
DocumentSchema.get_index_settings_for_aws_managed_opensearch_st_dev()
)
else:
return DocumentSchema.get_index_settings()

View File

@@ -235,17 +235,9 @@ class DocumentQuery:
# returning some number of results less than the index max allowed
# return size.
"size": DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW,
# By default exclude retrieving the vector fields in order to save
# on retrieval cost as we don't need them upstream.
"_source": {
"excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
},
"_source": get_full_document,
"timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
}
if not get_full_document:
# If we explicitly do not want the underlying document, we will only
# retrieve IDs.
final_get_ids_query["_source"] = False
if not OPENSEARCH_PROFILING_DISABLED:
final_get_ids_query["profile"] = True
@@ -395,11 +387,6 @@ class DocumentQuery:
"size": num_hits,
"highlight": match_highlights_configuration,
"timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
# Exclude retrieving the vector fields in order to save on
# retrieval cost as we don't need them upstream.
"_source": {
"excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
},
}
# Explain is for scoring breakdowns.
@@ -459,11 +446,6 @@ class DocumentQuery:
},
"size": num_to_retrieve,
"timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
# Exclude retrieving the vector fields in order to save on
# retrieval cost as we don't need them upstream.
"_source": {
"excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
},
}
if not OPENSEARCH_PROFILING_DISABLED:
final_random_search_query["profile"] = True

View File

@@ -88,7 +88,6 @@ class OnyxErrorCode(Enum):
SERVICE_UNAVAILABLE = ("SERVICE_UNAVAILABLE", 503)
BAD_GATEWAY = ("BAD_GATEWAY", 502)
LLM_PROVIDER_ERROR = ("LLM_PROVIDER_ERROR", 502)
HOOK_EXECUTION_FAILED = ("HOOK_EXECUTION_FAILED", 502)
GATEWAY_TIMEOUT = ("GATEWAY_TIMEOUT", 504)
def __init__(self, code: str, status_code: int) -> None:

View File

@@ -1,311 +0,0 @@
"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.
Usage (Celery tasks and FastAPI handlers):
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
)
if isinstance(result, HookSkipped):
# no active hook configured — continue with original behavior
...
elif isinstance(result, HookSoftFailed):
# hook failed but fail strategy is SOFT — continue with original behavior
...
else:
# result is the response payload dict from the customer's endpoint
...
DB session design
-----------------
The executor uses three sessions:
1. Caller's session (db_session) — used only for the hook lookup read. All
needed fields are extracted from the Hook object before the HTTP call, so
the caller's session is not held open during the external HTTP request.
2. Log session — a separate short-lived session opened after the HTTP call
completes to write the HookExecutionLog row on failure. Success runs are
not recorded. Committed independently of everything else.
3. Reachable session — a second short-lived session to update is_reachable on
the Hook. Kept separate from the log session so a concurrent hook deletion
(which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot
prevent the execution log from being written. This update is best-effort.
"""
import json
import time
from typing import Any
import httpx
from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.db.hook import create_hook_execution_log__no_commit
from onyx.db.hook import get_non_deleted_hook_by_hook_point
from onyx.db.hook import update_hook__no_commit
from onyx.db.models import Hook
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.utils import HOOKS_AVAILABLE
from onyx.utils.logger import setup_logger
logger = setup_logger()
class HookSkipped:
"""No active hook configured for this hook point."""
class HookSoftFailed:
"""Hook was called but failed with SOFT fail strategy — continuing."""
# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------
class _HttpOutcome(BaseModel):
"""Structured result of an HTTP hook call, returned by _process_response."""
is_success: bool
updated_is_reachable: (
bool | None
) # True/False = write to DB, None = unchanged (skip write)
status_code: int | None
error_message: str | None
response_payload: dict[str, Any] | None
def _lookup_hook(
db_session: Session,
hook_point: HookPoint,
) -> Hook | HookSkipped:
"""Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.
No HTTP call is made and no DB writes are performed for any HookSkipped path.
There is nothing to log and no reachability information to update.
"""
if not HOOKS_AVAILABLE:
return HookSkipped()
hook = get_non_deleted_hook_by_hook_point(
db_session=db_session, hook_point=hook_point
)
if hook is None or not hook.is_active:
return HookSkipped()
if not hook.endpoint_url:
return HookSkipped()
return hook
def _process_response(
*,
response: httpx.Response | None,
exc: Exception | None,
timeout: float,
) -> _HttpOutcome:
"""Process the result of an HTTP call and return a structured outcome.
Called after the client.post() try/except. If post() raised, exc is set and
response is None. Otherwise response is set and exc is None. Handles
raise_for_status(), JSON decoding, and the dict shape check.
"""
if exc is not None:
if isinstance(exc, httpx.ConnectError):
msg = f"Hook endpoint unreachable: {exc}"
logger.warning(msg, exc_info=exc)
return _HttpOutcome(
is_success=False,
updated_is_reachable=False,
status_code=None,
error_message=msg,
response_payload=None,
)
if isinstance(exc, httpx.TimeoutException):
msg = f"Hook timed out after {timeout}s: {exc}"
logger.warning(msg, exc_info=exc)
return _HttpOutcome(
is_success=False,
updated_is_reachable=True,
status_code=None,
error_message=msg,
response_payload=None,
)
msg = f"Hook call failed: {exc}"
logger.exception(msg, exc_info=exc)
return _HttpOutcome(
is_success=False,
updated_is_reachable=True,
status_code=None,
error_message=msg,
response_payload=None,
)
if response is None:
raise ValueError(
"exactly one of response or exc must be non-None; both are None"
)
status_code = response.status_code
try:
response.raise_for_status()
except httpx.HTTPStatusError as e:
msg = f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
logger.warning(msg, exc_info=e)
return _HttpOutcome(
is_success=False,
updated_is_reachable=True,
status_code=status_code,
error_message=msg,
response_payload=None,
)
try:
response_payload = response.json()
except (json.JSONDecodeError, httpx.DecodingError) as e:
msg = f"Hook returned non-JSON response: {e}"
logger.warning(msg, exc_info=e)
return _HttpOutcome(
is_success=False,
updated_is_reachable=True,
status_code=status_code,
error_message=msg,
response_payload=None,
)
if not isinstance(response_payload, dict):
msg = f"Hook returned non-dict JSON (got {type(response_payload).__name__})"
logger.warning(msg)
return _HttpOutcome(
is_success=False,
updated_is_reachable=True,
status_code=status_code,
error_message=msg,
response_payload=None,
)
return _HttpOutcome(
is_success=True,
updated_is_reachable=True,
status_code=status_code,
error_message=None,
response_payload=response_payload,
)
def _persist_result(
*,
hook_id: int,
outcome: _HttpOutcome,
duration_ms: int,
) -> None:
"""Write the execution log on failure and optionally update is_reachable, each
in its own session so a failure in one does not affect the other."""
# Only write the execution log on failure — success runs are not recorded.
# Must not be skipped if the is_reachable update fails (e.g. hook concurrently
# deleted between the initial lookup and here).
if not outcome.is_success:
try:
with get_session_with_current_tenant() as log_session:
create_hook_execution_log__no_commit(
db_session=log_session,
hook_id=hook_id,
is_success=False,
error_message=outcome.error_message,
status_code=outcome.status_code,
duration_ms=duration_ms,
)
log_session.commit()
except Exception:
logger.exception(
f"Failed to persist hook execution log for hook_id={hook_id}"
)
# Update is_reachable separately — best-effort, non-critical.
# None means the value is unchanged (set by the caller to skip the no-op write).
# update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was
# concurrently deleted, so keep this isolated from the log write above.
if outcome.updated_is_reachable is not None:
try:
with get_session_with_current_tenant() as reachable_session:
update_hook__no_commit(
db_session=reachable_session,
hook_id=hook_id,
is_reachable=outcome.updated_is_reachable,
)
reachable_session.commit()
except Exception:
logger.warning(f"Failed to update is_reachable for hook_id={hook_id}")
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def execute_hook(
*,
db_session: Session,
hook_point: HookPoint,
payload: dict[str, Any],
) -> dict[str, Any] | HookSkipped | HookSoftFailed:
"""Execute the hook for the given hook point synchronously."""
hook = _lookup_hook(db_session, hook_point)
if isinstance(hook, HookSkipped):
return hook
timeout = hook.timeout_seconds
hook_id = hook.id
fail_strategy = hook.fail_strategy
endpoint_url = hook.endpoint_url
current_is_reachable: bool | None = hook.is_reachable
if not endpoint_url:
raise ValueError(
f"hook_id={hook_id} is active but has no endpoint_url — "
"active hooks without an endpoint_url must be rejected by _lookup_hook"
)
start = time.monotonic()
response: httpx.Response | None = None
exc: Exception | None = None
try:
api_key: str | None = (
hook.api_key.get_value(apply_mask=False) if hook.api_key else None
)
headers: dict[str, str] = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
with httpx.Client(timeout=timeout) as client:
response = client.post(endpoint_url, json=payload, headers=headers)
except Exception as e:
exc = e
duration_ms = int((time.monotonic() - start) * 1000)
outcome = _process_response(response=response, exc=exc, timeout=timeout)
# Skip the is_reachable write when the value would not change — avoids a
# no-op DB round-trip on every call when the hook is already in the expected state.
if outcome.updated_is_reachable == current_is_reachable:
outcome = outcome.model_copy(update={"updated_is_reachable": None})
_persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)
if not outcome.is_success:
if fail_strategy == HookFailStrategy.HARD:
raise OnyxError(
OnyxErrorCode.HOOK_EXECUTION_FAILED,
outcome.error_message or "Hook execution failed.",
)
logger.warning(
f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
)
return HookSoftFailed()
if outcome.response_payload is None:
raise ValueError(
f"response_payload is None for successful hook call (hook_id={hook_id})"
)
return outcome.response_payload

View File

@@ -1,5 +0,0 @@
from onyx.configs.app_configs import HOOK_ENABLED
from shared_configs.configs import MULTI_TENANT
# True only when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
HOOKS_AVAILABLE: bool = HOOK_ENABLED and not MULTI_TENANT

View File

@@ -479,9 +479,7 @@ def is_zip_file(file: UploadFile) -> bool:
def upload_files(
files: list[UploadFile],
file_origin: FileOrigin = FileOrigin.CONNECTOR,
unzip: bool = True,
files: list[UploadFile], file_origin: FileOrigin = FileOrigin.CONNECTOR
) -> FileUploadResponse:
# Skip directories and known macOS metadata entries
@@ -504,46 +502,31 @@ def upload_files(
if seen_zip:
raise HTTPException(status_code=400, detail=SEEN_ZIP_DETAIL)
seen_zip = True
# Validate the zip by opening it (catches corrupt/non-zip files)
with zipfile.ZipFile(file.file, "r") as zf:
if unzip:
zip_metadata_file_id = save_zip_metadata_to_file_store(
zf, file_store
zip_metadata_file_id = save_zip_metadata_to_file_store(
zf, file_store
)
for file_info in zf.namelist():
if zf.getinfo(file_info).is_dir():
continue
if not should_process_file(file_info):
continue
sub_file_bytes = zf.read(file_info)
mime_type, __ = mimetypes.guess_type(file_info)
if mime_type is None:
mime_type = "application/octet-stream"
file_id = file_store.save_file(
content=BytesIO(sub_file_bytes),
display_name=os.path.basename(file_info),
file_origin=file_origin,
file_type=mime_type,
)
for file_info in zf.namelist():
if zf.getinfo(file_info).is_dir():
continue
if not should_process_file(file_info):
continue
sub_file_bytes = zf.read(file_info)
mime_type, __ = mimetypes.guess_type(file_info)
if mime_type is None:
mime_type = "application/octet-stream"
file_id = file_store.save_file(
content=BytesIO(sub_file_bytes),
display_name=os.path.basename(file_info),
file_origin=file_origin,
file_type=mime_type,
)
deduped_file_paths.append(file_id)
deduped_file_names.append(os.path.basename(file_info))
continue
# Store the zip as-is (unzip=False)
file.file.seek(0)
file_id = file_store.save_file(
content=file.file,
display_name=file.filename,
file_origin=file_origin,
file_type=file.content_type or "application/zip",
)
deduped_file_paths.append(file_id)
deduped_file_names.append(file.filename)
deduped_file_paths.append(file_id)
deduped_file_names.append(os.path.basename(file_info))
continue
# Since we can't render docx files in the UI,
@@ -630,10 +613,9 @@ def _fetch_and_check_file_connector_cc_pair_permissions(
@router.post("/admin/connector/file/upload", tags=PUBLIC_API_TAGS)
def upload_files_api(
files: list[UploadFile],
unzip: bool = True,
_: User = Depends(current_curator_or_admin_user),
) -> FileUploadResponse:
return upload_files(files, FileOrigin.OTHER, unzip=unzip)
return upload_files(files, FileOrigin.OTHER)
@router.get("/admin/connector/{connector_id}/files", tags=PUBLIC_API_TAGS)

View File

@@ -74,7 +74,7 @@ def make_structured_onyx_request_id(prefix: str, request_url: str) -> str:
def _make_onyx_request_id(prefix: str, hash_input: str) -> str:
"""helper function to return an id given a string input"""
hash_obj = hashlib.md5(hash_input.encode("utf-8"), usedforsecurity=False)
hash_obj = hashlib.md5(hash_input.encode("utf-8"))
hash_bytes = hash_obj.digest()[:6] # Truncate to 6 bytes
# 6 bytes becomes 8 bytes. we shouldn't need to strip but just in case

View File

@@ -752,7 +752,7 @@ pypandoc-binary==1.16.2
# via onyx
pyparsing==3.2.5
# via httplib2
pypdf==6.9.1
pypdf==6.8.0
# via
# onyx
# unstructured-client

View File

@@ -29,7 +29,6 @@ from onyx.document_index.opensearch.opensearch_document_index import (
)
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import DocumentSchema
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DocumentQuery
@@ -227,7 +226,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
# Under test.
# Should not raise.
@@ -243,7 +242,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test.
@@ -272,7 +271,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
@@ -286,7 +285,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
# Under test and postcondition.
# Should return False before creation.
@@ -306,7 +305,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test.
@@ -341,7 +340,7 @@ class TestOpenSearchClient:
},
},
}
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=initial_mappings, settings=settings)
# Under test.
@@ -384,7 +383,7 @@ class TestOpenSearchClient:
"test_field": {"type": "keyword"},
},
}
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=initial_mappings, settings=settings)
# Under test and postcondition.
@@ -419,7 +418,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
# Create once - should succeed.
test_client.create_index(mappings=mappings, settings=settings)
@@ -462,7 +461,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
doc = _create_test_document_chunk(
@@ -490,7 +489,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
docs = [
@@ -521,7 +520,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
doc = _create_test_document_chunk(
@@ -549,7 +548,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
original_doc = _create_test_document_chunk(
@@ -584,7 +583,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=False
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test and postcondition.
@@ -603,7 +602,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
doc = _create_test_document_chunk(
@@ -639,7 +638,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test.
@@ -660,7 +659,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index multiple documents.
@@ -736,7 +735,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Create a document to update.
@@ -785,7 +784,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test and postcondition.
@@ -809,7 +808,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents.
docs = {
@@ -882,12 +881,8 @@ class TestOpenSearchClient:
)
# Make sure the chunk contents are preserved.
for i, chunk in enumerate(results):
expected = docs[chunk.document_chunk.document_id]
assert chunk.document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(expected, k)
for k in DocumentChunkWithoutVectors.model_fields
}
assert (
chunk.document_chunk == docs[chunk.document_chunk.document_id]
)
# Make sure score reporting seems reasonable (it should not be None
# or 0).
@@ -911,7 +906,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Note no documents were indexed.
@@ -952,7 +947,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_x.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents with different public/hidden and tenant states.
@@ -1043,12 +1038,7 @@ class TestOpenSearchClient:
# ordered; we're just assuming which doc will be the first result here.
assert results[0].document_chunk.document_id == "public-doc"
# Make sure the chunk contents are preserved.
assert results[0].document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(docs["public-doc"], k)
for k in DocumentChunkWithoutVectors.model_fields
}
)
assert results[0].document_chunk == docs["public-doc"]
# Make sure score reporting seems reasonable (it should not be None
# or 0).
assert results[0].score
@@ -1056,12 +1046,7 @@ class TestOpenSearchClient:
assert results[0].match_highlights.get(CONTENT_FIELD_NAME, [])
# Same for the second result.
assert results[1].document_chunk.document_id == "private-doc-user-a"
assert results[1].document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(docs["private-doc-user-a"], k)
for k in DocumentChunkWithoutVectors.model_fields
}
)
assert results[1].document_chunk == docs["private-doc-user-a"]
assert results[1].score
assert results[1].match_highlights.get(CONTENT_FIELD_NAME, [])
@@ -1081,7 +1066,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_x.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents with varying relevance to the query.
@@ -1208,7 +1193,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_x.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Although very unlikely in practice, let's use the same doc ID just to
@@ -1301,7 +1286,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Don't index any documents.
@@ -1328,7 +1313,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index chunks for two different documents.
@@ -1396,7 +1381,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents with different public/hidden and tenant states.
@@ -1473,7 +1458,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index docs with various ages.
@@ -1565,7 +1550,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index chunks for two different documents, one hidden one not.
@@ -1614,9 +1599,4 @@ class TestOpenSearchClient:
for result in results:
# Note each result must be from doc 1, which is not hidden.
expected_result = doc1_chunks[result.document_chunk.chunk_index]
assert result.document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(expected_result, k)
for k in DocumentChunkWithoutVectors.model_fields
}
)
assert result.document_chunk == expected_result

View File

@@ -31,6 +31,7 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
)
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.configs.constants import SOURCE_TYPE
from onyx.context.search.models import IndexFilters
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Document
from onyx.db.models import OpenSearchDocumentMigrationRecord
@@ -43,7 +44,6 @@ from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DocumentQuery
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
@@ -70,7 +70,6 @@ from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_PROJECT
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
from tests.external_dependency_unit.full_setup import ensure_full_deployment_setup
@@ -79,22 +78,24 @@ CHUNK_COUNT = 5
def _get_document_chunks_from_opensearch(
opensearch_client: OpenSearchIndexClient,
document_id: str,
tenant_state: TenantState,
opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str
) -> list[DocumentChunk]:
opensearch_client.refresh_index()
results: list[DocumentChunk] = []
for i in range(CHUNK_COUNT):
document_chunk_id: str = get_opensearch_doc_chunk_id(
tenant_state=tenant_state,
document_id=document_id,
chunk_index=i,
max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
)
result = opensearch_client.get_document(document_chunk_id)
results.append(result)
return results
filters = IndexFilters(access_control_list=None, tenant_id=current_tenant_id)
query_body = DocumentQuery.get_from_document_id_query(
document_id=document_id,
tenant_state=TenantState(tenant_id=current_tenant_id, multitenant=False),
index_filters=filters,
include_hidden=False,
max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
min_chunk_index=None,
max_chunk_index=None,
)
search_hits = opensearch_client.search(
body=query_body,
search_pipeline_id=None,
)
return [search_hit.document_chunk for search_hit in search_hits]
def _delete_document_chunks_from_opensearch(
@@ -451,13 +452,10 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Under test.
result = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -479,7 +477,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -524,9 +522,6 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Run the initial batch. To simulate partial progress we will mock the
# redis lock to return True for the first invocation of .owned() and
@@ -541,7 +536,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
return_value=mock_redis_client,
):
result_1 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
assert result_1 is True
@@ -564,7 +559,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Under test.
# Run the remainder of the migration.
result_2 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -588,7 +583,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -635,9 +630,6 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Run the initial batch. To simulate partial progress we will mock the
# redis lock to return True for the first invocation of .owned() and
@@ -654,7 +646,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
return_value=mock_redis_client,
):
result_1 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
assert result_1 is True
@@ -699,7 +691,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
),
):
result_2 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -736,7 +728,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
),
):
result_3 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -760,7 +752,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -848,25 +840,24 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
chunk["content"] = (
f"Different content {chunk[CHUNK_ID]} for {test_documents[0].id}"
)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
chunks_for_document_in_opensearch, _ = (
transform_vespa_chunks_to_opensearch_chunks(
document_in_opensearch,
tenant_state,
TenantState(tenant_id=get_current_tenant_id(), multitenant=False),
{},
)
)
opensearch_client.bulk_index_documents(
documents=chunks_for_document_in_opensearch,
tenant_state=tenant_state,
tenant_state=TenantState(
tenant_id=get_current_tenant_id(), multitenant=False
),
update_if_exists=True,
)
# Under test.
result = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -887,7 +878,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -931,14 +922,11 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Under test.
# First run.
result_1 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -959,7 +947,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -972,7 +960,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Under test.
# Second run.
result_2 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -994,7 +982,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)

View File

@@ -1,45 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [ 4 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Parent 2 0 R
>>
endobj
xref
0 5
0000000000 65535 f
0000000015 00000 n
0000000054 00000 n
0000000113 00000 n
0000000162 00000 n
trailer
<<
/Size 5
/Root 3 0 R
/Info 1 0 R
>>
startxref
256
%%EOF

View File

@@ -1,89 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 2
/Kids [ 4 0 R 6 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 5 0 R
/Parent 2 0 R
>>
endobj
5 0 obj
<<
/Length 47
>>
stream
BT /F1 12 Tf 50 150 Td (Page one content) Tj ET
endstream
endobj
6 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 7 0 R
/Parent 2 0 R
>>
endobj
7 0 obj
<<
/Length 47
>>
stream
BT /F1 12 Tf 50 150 Td (Page two content) Tj ET
endstream
endobj
xref
0 8
0000000000 65535 f
0000000015 00000 n
0000000054 00000 n
0000000119 00000 n
0000000168 00000 n
0000000349 00000 n
0000000446 00000 n
0000000627 00000 n
trailer
<<
/Size 8
/Root 3 0 R
/Info 1 0 R
>>
startxref
724
%%EOF

View File

@@ -1,62 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [ 4 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 5 0 R
/Parent 2 0 R
>>
endobj
5 0 obj
<<
/Length 42
>>
stream
BT /F1 12 Tf 50 150 Td (Hello World) Tj ET
endstream
endobj
xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000054 00000 n
0000000113 00000 n
0000000162 00000 n
0000000343 00000 n
trailer
<<
/Size 6
/Root 3 0 R
/Info 1 0 R
>>
startxref
435
%%EOF

View File

@@ -1,64 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
/Title (My Title)
/Author (Jane Doe)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [ 4 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 5 0 R
/Parent 2 0 R
>>
endobj
5 0 obj
<<
/Length 35
>>
stream
BT /F1 12 Tf 50 150 Td (test) Tj ET
endstream
endobj
xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000091 00000 n
0000000150 00000 n
0000000199 00000 n
0000000380 00000 n
trailer
<<
/Size 6
/Root 3 0 R
/Info 1 0 R
>>
startxref
465
%%EOF

View File

@@ -1,124 +0,0 @@
"""Unit tests for pypdf-dependent PDF processing functions.
Tests cover:
- read_pdf_file: text extraction, metadata, encrypted PDFs, image extraction
- pdf_to_text: convenience wrapper
- is_pdf_protected: password protection detection
Fixture PDFs live in ./fixtures/ and are pre-built so the test layer has no
dependency on pypdf internals (pypdf.generic).
"""
from io import BytesIO
from pathlib import Path
from onyx.file_processing.extract_file_text import pdf_to_text
from onyx.file_processing.extract_file_text import read_pdf_file
from onyx.file_processing.password_validation import is_pdf_protected
FIXTURES = Path(__file__).parent / "fixtures"
def _load(name: str) -> BytesIO:
return BytesIO((FIXTURES / name).read_bytes())
# ── read_pdf_file ────────────────────────────────────────────────────────
class TestReadPdfFile:
def test_basic_text_extraction(self) -> None:
text, _, images = read_pdf_file(_load("simple.pdf"))
assert "Hello World" in text
assert images == []
def test_multi_page_text_extraction(self) -> None:
text, _, _ = read_pdf_file(_load("multipage.pdf"))
assert "Page one content" in text
assert "Page two content" in text
def test_metadata_extraction(self) -> None:
_, pdf_metadata, _ = read_pdf_file(_load("with_metadata.pdf"))
assert pdf_metadata.get("Title") == "My Title"
assert pdf_metadata.get("Author") == "Jane Doe"
def test_encrypted_pdf_with_correct_password(self) -> None:
text, _, _ = read_pdf_file(_load("encrypted.pdf"), pdf_pass="pass123")
assert "Secret Content" in text
def test_encrypted_pdf_without_password(self) -> None:
text, _, _ = read_pdf_file(_load("encrypted.pdf"))
assert text == ""
def test_encrypted_pdf_with_wrong_password(self) -> None:
text, _, _ = read_pdf_file(_load("encrypted.pdf"), pdf_pass="wrong")
assert text == ""
def test_empty_pdf(self) -> None:
text, _, _ = read_pdf_file(_load("empty.pdf"))
assert text.strip() == ""
def test_invalid_pdf_returns_empty(self) -> None:
text, _, images = read_pdf_file(BytesIO(b"this is not a pdf"))
assert text == ""
assert images == []
def test_image_extraction_disabled_by_default(self) -> None:
_, _, images = read_pdf_file(_load("with_image.pdf"))
assert images == []
def test_image_extraction_collects_images(self) -> None:
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
assert len(images) == 1
img_bytes, img_name = images[0]
assert len(img_bytes) > 0
assert img_name # non-empty name
def test_image_callback_streams_instead_of_collecting(self) -> None:
"""With image_callback, images are streamed via callback and not accumulated."""
collected: list[tuple[bytes, str]] = []
def callback(data: bytes, name: str) -> None:
collected.append((data, name))
_, _, images = read_pdf_file(
_load("with_image.pdf"), extract_images=True, image_callback=callback
)
# Callback received the image
assert len(collected) == 1
assert len(collected[0][0]) > 0
# Returned list is empty when callback is used
assert images == []
# ── pdf_to_text ──────────────────────────────────────────────────────────
class TestPdfToText:
def test_returns_text(self) -> None:
assert "Hello World" in pdf_to_text(_load("simple.pdf"))
def test_with_password(self) -> None:
assert "Secret Content" in pdf_to_text(
_load("encrypted.pdf"), pdf_pass="pass123"
)
def test_encrypted_without_password_returns_empty(self) -> None:
assert pdf_to_text(_load("encrypted.pdf")) == ""
# ── is_pdf_protected ─────────────────────────────────────────────────────
class TestIsPdfProtected:
def test_unprotected_pdf(self) -> None:
assert is_pdf_protected(_load("simple.pdf")) is False
def test_protected_pdf(self) -> None:
assert is_pdf_protected(_load("encrypted.pdf")) is True
def test_preserves_file_position(self) -> None:
pdf = _load("simple.pdf")
pdf.seek(42)
is_pdf_protected(pdf)
assert pdf.tell() == 42

View File

@@ -1,479 +0,0 @@
"""Unit tests for the hook executor."""
import json
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
import httpx
import pytest
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.executor import execute_hook
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_PAYLOAD: dict[str, Any] = {"query": "test", "user_email": "u@example.com"}
_RESPONSE_PAYLOAD: dict[str, Any] = {"rewritten_query": "better test"}
def _make_hook(
*,
is_active: bool = True,
endpoint_url: str | None = "https://hook.example.com/query",
api_key: MagicMock | None = None,
timeout_seconds: float = 5.0,
fail_strategy: HookFailStrategy = HookFailStrategy.SOFT,
hook_id: int = 1,
) -> MagicMock:
hook = MagicMock()
hook.is_active = is_active
hook.endpoint_url = endpoint_url
hook.api_key = api_key
hook.timeout_seconds = timeout_seconds
hook.id = hook_id
hook.fail_strategy = fail_strategy
return hook
def _make_api_key(value: str) -> MagicMock:
api_key = MagicMock()
api_key.get_value.return_value = value
return api_key
def _make_response(
*,
status_code: int = 200,
json_return: Any = _RESPONSE_PAYLOAD,
json_side_effect: Exception | None = None,
) -> MagicMock:
"""Build a response mock with controllable json() behaviour."""
response = MagicMock()
response.status_code = status_code
if json_side_effect is not None:
response.json.side_effect = json_side_effect
else:
response.json.return_value = json_return
return response
def _setup_client(
mock_client_cls: MagicMock,
*,
response: MagicMock | None = None,
side_effect: Exception | None = None,
) -> MagicMock:
"""Wire up the httpx.Client mock and return the inner client.
If side_effect is an httpx.HTTPStatusError, it is raised from
raise_for_status() (matching real httpx behaviour) and post() returns a
response mock with the matching status_code set. All other exceptions are
raised directly from post().
"""
mock_client = MagicMock()
if isinstance(side_effect, httpx.HTTPStatusError):
error_response = MagicMock()
error_response.status_code = side_effect.response.status_code
error_response.raise_for_status.side_effect = side_effect
mock_client.post = MagicMock(return_value=error_response)
else:
mock_client.post = MagicMock(
side_effect=side_effect, return_value=response if not side_effect else None
)
mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)
mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)
return mock_client
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def db_session() -> MagicMock:
return MagicMock()
# ---------------------------------------------------------------------------
# Early-exit guards (no HTTP call, no DB writes)
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"hooks_available,hook",
[
# HOOKS_AVAILABLE=False exits before the DB lookup — hook is irrelevant.
pytest.param(False, None, id="hooks_not_available"),
pytest.param(True, None, id="hook_not_found"),
pytest.param(True, _make_hook(is_active=False), id="hook_inactive"),
pytest.param(True, _make_hook(endpoint_url=None), id="no_endpoint_url"),
],
)
def test_early_exit_returns_skipped_with_no_db_writes(
db_session: MagicMock,
hooks_available: bool,
hook: MagicMock | None,
) -> None:
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", hooks_available),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
):
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert isinstance(result, HookSkipped)
mock_update.assert_not_called()
mock_log.assert_not_called()
# ---------------------------------------------------------------------------
# Successful HTTP call
# ---------------------------------------------------------------------------
def test_success_returns_payload_and_sets_reachable(db_session: MagicMock) -> None:
hook = _make_hook()
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
patch("httpx.Client") as mock_client_cls,
):
_setup_client(mock_client_cls, response=_make_response())
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert result == _RESPONSE_PAYLOAD
_, update_kwargs = mock_update.call_args
assert update_kwargs["is_reachable"] is True
mock_log.assert_not_called()
def test_non_dict_json_response_is_a_failure(db_session: MagicMock) -> None:
"""response.json() returning a non-dict (e.g. list) must be treated as failure."""
hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit"),
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
patch("httpx.Client") as mock_client_cls,
):
_setup_client(
mock_client_cls,
response=_make_response(json_return=["unexpected", "list"]),
)
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert isinstance(result, HookSoftFailed)
_, log_kwargs = mock_log.call_args
assert log_kwargs["is_success"] is False
assert "non-dict" in (log_kwargs["error_message"] or "")
def test_json_decode_failure_is_a_failure(db_session: MagicMock) -> None:
"""response.json() raising must be treated as failure with SOFT strategy."""
hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit"),
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
patch("httpx.Client") as mock_client_cls,
):
_setup_client(
mock_client_cls,
response=_make_response(
json_side_effect=json.JSONDecodeError("not JSON", "", 0)
),
)
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert isinstance(result, HookSoftFailed)
_, log_kwargs = mock_log.call_args
assert log_kwargs["is_success"] is False
assert "non-JSON" in (log_kwargs["error_message"] or "")
# ---------------------------------------------------------------------------
# HTTP failure paths
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"exception,fail_strategy,expected_type,expected_is_reachable",
[
pytest.param(
httpx.ConnectError("refused"),
HookFailStrategy.SOFT,
HookSoftFailed,
False,
id="connect_error_soft",
),
pytest.param(
httpx.ConnectError("refused"),
HookFailStrategy.HARD,
OnyxError,
False,
id="connect_error_hard",
),
pytest.param(
httpx.TimeoutException("timeout"),
HookFailStrategy.SOFT,
HookSoftFailed,
True,
id="timeout_soft",
),
pytest.param(
httpx.TimeoutException("timeout"),
HookFailStrategy.HARD,
OnyxError,
True,
id="timeout_hard",
),
pytest.param(
httpx.HTTPStatusError(
"500",
request=MagicMock(),
response=MagicMock(status_code=500, text="error"),
),
HookFailStrategy.SOFT,
HookSoftFailed,
True,
id="http_status_error_soft",
),
pytest.param(
httpx.HTTPStatusError(
"500",
request=MagicMock(),
response=MagicMock(status_code=500, text="error"),
),
HookFailStrategy.HARD,
OnyxError,
True,
id="http_status_error_hard",
),
],
)
def test_http_failure_paths(
db_session: MagicMock,
exception: Exception,
fail_strategy: HookFailStrategy,
expected_type: type,
expected_is_reachable: bool,
) -> None:
hook = _make_hook(fail_strategy=fail_strategy)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
patch("httpx.Client") as mock_client_cls,
):
_setup_client(mock_client_cls, side_effect=exception)
if expected_type is OnyxError:
with pytest.raises(OnyxError) as exc_info:
execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
else:
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert isinstance(result, expected_type)
mock_update.assert_called_once()
_, kwargs = mock_update.call_args
assert kwargs["is_reachable"] is expected_is_reachable
# ---------------------------------------------------------------------------
# Authorization header
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"api_key_value,expect_auth_header",
[
pytest.param("secret-token", True, id="api_key_present"),
pytest.param(None, False, id="api_key_absent"),
],
)
def test_authorization_header(
db_session: MagicMock,
api_key_value: str | None,
expect_auth_header: bool,
) -> None:
api_key = _make_api_key(api_key_value) if api_key_value else None
hook = _make_hook(api_key=api_key)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit"),
patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
patch("httpx.Client") as mock_client_cls,
):
mock_client = _setup_client(mock_client_cls, response=_make_response())
execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
_, call_kwargs = mock_client.post.call_args
if expect_auth_header:
assert call_kwargs["headers"]["Authorization"] == f"Bearer {api_key_value}"
else:
assert "Authorization" not in call_kwargs["headers"]
# ---------------------------------------------------------------------------
# Persist session failure
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"http_exception,expected_result",
[
pytest.param(None, _RESPONSE_PAYLOAD, id="success_path"),
pytest.param(httpx.ConnectError("refused"), OnyxError, id="hard_fail_path"),
],
)
def test_persist_session_failure_is_swallowed(
db_session: MagicMock,
http_exception: Exception | None,
expected_result: Any,
) -> None:
"""DB session failure in _persist_result must not mask the real return value or OnyxError."""
hook = _make_hook(fail_strategy=HookFailStrategy.HARD)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch(
"onyx.hooks.executor.get_session_with_current_tenant",
side_effect=RuntimeError("DB unavailable"),
),
patch("httpx.Client") as mock_client_cls,
):
_setup_client(
mock_client_cls,
response=_make_response() if not http_exception else None,
side_effect=http_exception,
)
if expected_result is OnyxError:
with pytest.raises(OnyxError) as exc_info:
execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
else:
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert result == expected_result
def test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> None:
"""is_reachable update failing (e.g. concurrent hook deletion) must not
prevent the execution log from being written.
Simulates the production failure path: update_hook__no_commit raises
OnyxError(NOT_FOUND) as it would if the hook was concurrently deleted
between the initial lookup and the reachable update.
"""
hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch(
"onyx.hooks.executor.update_hook__no_commit",
side_effect=OnyxError(OnyxErrorCode.NOT_FOUND, "hook deleted"),
),
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
patch("httpx.Client") as mock_client_cls,
):
_setup_client(mock_client_cls, side_effect=httpx.ConnectError("refused"))
result = execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert isinstance(result, HookSoftFailed)
mock_log.assert_called_once()

View File

@@ -1,109 +0,0 @@
import io
import zipfile
from unittest.mock import MagicMock
from unittest.mock import patch
from zipfile import BadZipFile
import pytest
from fastapi import UploadFile
from starlette.datastructures import Headers
from onyx.configs.constants import FileOrigin
from onyx.server.documents.connector import upload_files
def _create_test_zip() -> bytes:
"""Create a simple in-memory zip file containing two text files."""
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as zf:
zf.writestr("file1.txt", "hello")
zf.writestr("file2.txt", "world")
return buf.getvalue()
def _make_upload_file(content: bytes, filename: str, content_type: str) -> UploadFile:
return UploadFile(
file=io.BytesIO(content),
filename=filename,
headers=Headers({"content-type": content_type}),
)
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_true_extracts_files(
mock_get_store: MagicMock,
) -> None:
"""When unzip=True (default), a zip upload is extracted into individual files."""
mock_store = MagicMock()
mock_store.save_file.side_effect = lambda **kwargs: f"id-{kwargs['display_name']}"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
upload = _make_upload_file(zip_bytes, "test.zip", "application/zip")
result = upload_files([upload], FileOrigin.CONNECTOR)
# Should have extracted the two individual files, not stored the zip itself
assert len(result.file_paths) == 2
assert "id-file1.txt" in result.file_paths
assert "id-file2.txt" in result.file_paths
assert "file1.txt" in result.file_names
assert "file2.txt" in result.file_names
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_false_stores_zip_as_is(
mock_get_store: MagicMock,
) -> None:
"""When unzip=False, the zip file is stored as-is without extraction."""
mock_store = MagicMock()
mock_store.save_file.return_value = "zip-file-id"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
upload = _make_upload_file(zip_bytes, "site_export.zip", "application/zip")
result = upload_files([upload], FileOrigin.CONNECTOR, unzip=False)
# Should store exactly one file (the zip itself)
assert len(result.file_paths) == 1
assert result.file_paths[0] == "zip-file-id"
assert result.file_names == ["site_export.zip"]
# No zip metadata should be created
assert result.zip_metadata_file_id is None
# Verify the stored content is a valid zip
saved_content: io.BytesIO = mock_store.save_file.call_args[1]["content"]
saved_content.seek(0)
with zipfile.ZipFile(saved_content, "r") as zf:
assert set(zf.namelist()) == {"file1.txt", "file2.txt"}
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_invalid_zip_with_unzip_false_raises(
mock_get_store: MagicMock,
) -> None:
"""An invalid zip is rejected even when unzip=False (validation still runs)."""
mock_get_store.return_value = MagicMock()
bad_zip = _make_upload_file(b"not a zip", "bad.zip", "application/zip")
with pytest.raises(BadZipFile):
upload_files([bad_zip], FileOrigin.CONNECTOR, unzip=False)
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_multiple_zips_rejected_when_unzip_false(
mock_get_store: MagicMock,
) -> None:
"""The seen_zip guard rejects a second zip even when unzip=False."""
mock_store = MagicMock()
mock_store.save_file.return_value = "zip-id"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
zip1 = _make_upload_file(zip_bytes, "a.zip", "application/zip")
zip2 = _make_upload_file(zip_bytes, "b.zip", "application/zip")
with pytest.raises(Exception, match="Only one zip file"):
upload_files([zip1, zip2], FileOrigin.CONNECTOR, unzip=False)

View File

@@ -8,7 +8,7 @@
"name": "widget",
"version": "0.1.0",
"dependencies": {
"next": "^16.1.7",
"next": "^16.1.5",
"react": "^19",
"react-dom": "^19",
"react-markdown": "^10.1.0"
@@ -1023,9 +1023,9 @@
}
},
"node_modules/@next/env": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.5.tgz",
"integrity": "sha512-CRSCPJiSZoi4Pn69RYBDI9R7YK2g59vLexPQFXY0eyw+ILevIenCywzg+DqmlBik9zszEnw2HLFOUlLAcJbL7g==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@@ -1039,9 +1039,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.5.tgz",
"integrity": "sha512-eK7Wdm3Hjy/SCL7TevlH0C9chrpeOYWx2iR7guJDaz4zEQKWcS1IMVfMb9UKBFMg1XgzcPTYPIp1Vcpukkjg6Q==",
"cpu": [
"arm64"
],
@@ -1055,9 +1055,9 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.5.tgz",
"integrity": "sha512-foQscSHD1dCuxBmGkbIr6ScAUF6pRoDZP6czajyvmXPAOFNnQUJu2Os1SGELODjKp/ULa4fulnBWoHV3XdPLfA==",
"cpu": [
"x64"
],
@@ -1071,9 +1071,9 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.5.tgz",
"integrity": "sha512-qNIb42o3C02ccIeSeKjacF3HXotGsxh/FMk/rSRmCzOVMtoWH88odn2uZqF8RLsSUWHcAqTgYmPD3pZ03L9ZAA==",
"cpu": [
"arm64"
],
@@ -1087,9 +1087,9 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.5.tgz",
"integrity": "sha512-U+kBxGUY1xMAzDTXmuVMfhaWUZQAwzRaHJ/I6ihtR5SbTVUEaDRiEU9YMjy1obBWpdOBuk1bcm+tsmifYSygfw==",
"cpu": [
"arm64"
],
@@ -1103,9 +1103,9 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.5.tgz",
"integrity": "sha512-gq2UtoCpN7Ke/7tKaU7i/1L7eFLfhMbXjNghSv0MVGF1dmuoaPeEVDvkDuO/9LVa44h5gqpWeJ4mRRznjDv7LA==",
"cpu": [
"x64"
],
@@ -1119,9 +1119,9 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.5.tgz",
"integrity": "sha512-bQWSE729PbXT6mMklWLf8dotislPle2L70E9q6iwETYEOt092GDn0c+TTNj26AjmeceSsC4ndyGsK5nKqHYXjQ==",
"cpu": [
"x64"
],
@@ -1135,9 +1135,9 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.5.tgz",
"integrity": "sha512-LZli0anutkIllMtTAWZlDqdfvjWX/ch8AFK5WgkNTvaqwlouiD1oHM+WW8RXMiL0+vAkAJyAGEzPPjO+hnrSNQ==",
"cpu": [
"arm64"
],
@@ -1151,9 +1151,9 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.5.tgz",
"integrity": "sha512-7is37HJTNQGhjPpQbkKjKEboHYQnCgpVt/4rBrrln0D9nderNxZ8ZWs8w1fAtzUx7wEyYjQ+/13myFgFj6K2Ng==",
"cpu": [
"x64"
],
@@ -2564,15 +2564,12 @@
"dev": true
},
"node_modules/baseline-browser-mapping": {
"version": "2.10.8",
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.8.tgz",
"integrity": "sha512-PCLz/LXGBsNTErbtB6i5u4eLpHeMfi93aUv5duMmj6caNu6IphS4q6UevDnL36sZQv9lrP11dbPKGMaXPwMKfQ==",
"version": "2.9.14",
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.14.tgz",
"integrity": "sha512-B0xUquLkiGLgHhpPBqvl7GWegWBUNuujQ6kXd/r1U38ElPT6Ok8KZ8e+FpUGEc2ZoRQUzq/aUnaKFc/svWUGSg==",
"license": "Apache-2.0",
"bin": {
"baseline-browser-mapping": "dist/cli.cjs"
},
"engines": {
"node": ">=6.0.0"
"baseline-browser-mapping": "dist/cli.js"
}
},
"node_modules/brace-expansion": {
@@ -5929,14 +5926,14 @@
"dev": true
},
"node_modules/next": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.5.tgz",
"integrity": "sha512-f+wE+NSbiQgh3DSAlTaw2FwY5yGdVViAtp8TotNQj4kk4Q8Bh1sC/aL9aH+Rg1YAVn18OYXsRDT7U/079jgP7w==",
"license": "MIT",
"dependencies": {
"@next/env": "16.1.7",
"@next/env": "16.1.5",
"@swc/helpers": "0.5.15",
"baseline-browser-mapping": "^2.9.19",
"baseline-browser-mapping": "^2.8.3",
"caniuse-lite": "^1.0.30001579",
"postcss": "8.4.31",
"styled-jsx": "5.1.6"
@@ -5948,14 +5945,14 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.1.7",
"@next/swc-darwin-x64": "16.1.7",
"@next/swc-linux-arm64-gnu": "16.1.7",
"@next/swc-linux-arm64-musl": "16.1.7",
"@next/swc-linux-x64-gnu": "16.1.7",
"@next/swc-linux-x64-musl": "16.1.7",
"@next/swc-win32-arm64-msvc": "16.1.7",
"@next/swc-win32-x64-msvc": "16.1.7",
"@next/swc-darwin-arm64": "16.1.5",
"@next/swc-darwin-x64": "16.1.5",
"@next/swc-linux-arm64-gnu": "16.1.5",
"@next/swc-linux-arm64-musl": "16.1.5",
"@next/swc-linux-x64-gnu": "16.1.5",
"@next/swc-linux-x64-musl": "16.1.5",
"@next/swc-win32-arm64-msvc": "16.1.5",
"@next/swc-win32-x64-msvc": "16.1.5",
"sharp": "^0.34.4"
},
"peerDependencies": {

View File

@@ -9,7 +9,7 @@
"lint": "next lint"
},
"dependencies": {
"next": "^16.1.7",
"next": "^16.1.5",
"react": "^19",
"react-dom": "^19",
"react-markdown": "^10.1.0"

View File

@@ -92,7 +92,7 @@ backend = [
"python-gitlab==5.6.0",
"python-pptx==0.6.23",
"pypandoc_binary==1.16.2",
"pypdf==6.9.1",
"pypdf==6.8.0",
"pytest-mock==3.12.0",
"pytest-playwright==0.7.0",
"python-docx==1.1.2",
@@ -245,7 +245,6 @@ select = [
"ARG",
"E",
"F",
"S324",
"W",
]

8
uv.lock generated
View File

@@ -4481,7 +4481,7 @@ requires-dist = [
{ name = "pygithub", marker = "extra == 'backend'", specifier = "==2.5.0" },
{ name = "pympler", marker = "extra == 'backend'", specifier = "==1.1" },
{ name = "pypandoc-binary", marker = "extra == 'backend'", specifier = "==1.16.2" },
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.9.1" },
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.8.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.5" },
{ name = "pytest-alembic", marker = "extra == 'dev'", specifier = "==0.12.1" },
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
@@ -5727,11 +5727,11 @@ wheels = [
[[package]]
name = "pypdf"
version = "6.9.1"
version = "6.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f9/fb/dc2e8cb006e80b0020ed20d8649106fe4274e82d8e756ad3e24ade19c0df/pypdf-6.9.1.tar.gz", hash = "sha256:ae052407d33d34de0c86c5c729be6d51010bf36e03035a8f23ab449bca52377d", size = 5311551, upload-time = "2026-03-17T10:46:07.876Z" }
sdist = { url = "https://files.pythonhosted.org/packages/b4/a3/e705b0805212b663a4c27b861c8a603dba0f8b4bb281f96f8e746576a50d/pypdf-6.8.0.tar.gz", hash = "sha256:cb7eaeaa4133ce76f762184069a854e03f4d9a08568f0e0623f7ea810407833b", size = 5307831, upload-time = "2026-03-09T13:37:40.591Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f9/f4/75543fa802b86e72f87e9395440fe1a89a6d149887e3e55745715c3352ac/pypdf-6.9.1-py3-none-any.whl", hash = "sha256:f35a6a022348fae47e092a908339a8f3dc993510c026bb39a96718fc7185e89f", size = 333661, upload-time = "2026-03-17T10:46:06.286Z" },
{ url = "https://files.pythonhosted.org/packages/8c/ec/4ccf3bb86b1afe5d7176e1c8abcdbf22b53dd682ec2eda50e1caadcf6846/pypdf-6.8.0-py3-none-any.whl", hash = "sha256:2a025080a8dd73f48123c89c57174a5ff3806c71763ee4e49572dc90454943c7", size = 332177, upload-time = "2026-03-09T13:37:38.774Z" },
]
[[package]]

View File

@@ -0,0 +1,59 @@
---
name: playwright-test-generator
description: Use this agent when you need to create automated browser tests using Playwright. Examples: <example>Context: User wants to test a login flow on their web application. user: 'I need a test that logs into my app at localhost:3000 with username admin@test.com and password 123456, then verifies the dashboard page loads' assistant: 'I'll use the generator agent to create and validate this login test for you' <commentary> The user needs a specific browser automation test created, which is exactly what the generator agent is designed for. </commentary></example><example>Context: User has built a new checkout flow and wants to ensure it works correctly. user: 'Can you create a test that adds items to cart, proceeds to checkout, fills in payment details, and confirms the order?' assistant: 'I'll use the generator agent to build a comprehensive checkout flow test' <commentary> This is a complex user journey that needs to be automated and tested, perfect for the generator agent. </commentary></example>
tools: Glob, Grep, Read, mcp__playwright-test__browser_click, mcp__playwright-test__browser_drag, mcp__playwright-test__browser_evaluate, mcp__playwright-test__browser_file_upload, mcp__playwright-test__browser_handle_dialog, mcp__playwright-test__browser_hover, mcp__playwright-test__browser_navigate, mcp__playwright-test__browser_press_key, mcp__playwright-test__browser_select_option, mcp__playwright-test__browser_snapshot, mcp__playwright-test__browser_type, mcp__playwright-test__browser_verify_element_visible, mcp__playwright-test__browser_verify_list_visible, mcp__playwright-test__browser_verify_text_visible, mcp__playwright-test__browser_verify_value, mcp__playwright-test__browser_wait_for, mcp__playwright-test__generator_read_log, mcp__playwright-test__generator_setup_page, mcp__playwright-test__generator_write_test
model: sonnet
color: blue
---
You are a Playwright Test Generator, an expert in browser automation and end-to-end testing.
Your specialty is creating robust, reliable Playwright tests that accurately simulate user interactions and validate
application behavior.
# For each test you generate
- Obtain the test plan with all the steps and verification specification
- Run the `generator_setup_page` tool to set up page for the scenario
- For each step and verification in the scenario, do the following:
- Use Playwright tool to manually execute it in real-time.
- Use the step description as the intent for each Playwright tool call.
- Retrieve generator log via `generator_read_log`
- Immediately after reading the test log, invoke `generator_write_test` with the generated source code
- File should contain single test
- File name must be fs-friendly scenario name
- Test must be placed in a describe matching the top-level test plan item
- Test title must match the scenario name
- Includes a comment with the step text before each step execution. Do not duplicate comments if step requires
multiple actions.
- Always use best practices from the log when generating tests.
<example-generation>
For following plan:
```markdown file=specs/plan.md
### 1. Adding New Todos
**Seed:** `tests/seed.spec.ts`
#### 1.1 Add Valid Todo
**Steps:**
1. Click in the "What needs to be done?" input field
#### 1.2 Add Multiple Todos
...
```
Following file is generated:
```ts file=add-valid-todo.spec.ts
// spec: specs/plan.md
// seed: tests/seed.spec.ts
test.describe('Adding New Todos', () => {
test('Add Valid Todo', async { page } => {
// 1. Click in the "What needs to be done?" input field
await page.click(...);
...
});
});
```
</example-generation>

View File

@@ -0,0 +1,45 @@
---
name: playwright-test-healer
description: Use this agent when you need to debug and fix failing Playwright tests. Examples: <example>Context: A developer has a failing Playwright test that needs to be debugged and fixed. user: 'The login test is failing, can you fix it?' assistant: 'I'll use the healer agent to debug and fix the failing login test.' <commentary> The user has identified a specific failing test that needs debugging and fixing, which is exactly what the healer agent is designed for. </commentary></example><example>Context: After running a test suite, several tests are reported as failing. user: 'Test user-registration.spec.ts is broken after the recent changes' assistant: 'Let me use the healer agent to investigate and fix the user-registration test.' <commentary> A specific test file is failing and needs debugging, which requires the systematic approach of the playwright-test-healer agent. </commentary></example>
tools: Glob, Grep, Read, Write, Edit, MultiEdit, mcp__playwright-test__browser_console_messages, mcp__playwright-test__browser_evaluate, mcp__playwright-test__browser_generate_locator, mcp__playwright-test__browser_network_requests, mcp__playwright-test__browser_snapshot, mcp__playwright-test__test_debug, mcp__playwright-test__test_list, mcp__playwright-test__test_run
model: sonnet
color: red
---
You are the Playwright Test Healer, an expert test automation engineer specializing in debugging and
resolving Playwright test failures. Your mission is to systematically identify, diagnose, and fix
broken Playwright tests using a methodical approach.
Your workflow:
1. **Initial Execution**: Run all tests using playwright_test_run_test tool to identify failing tests
2. **Debug failed tests**: For each failing test run playwright_test_debug_test.
3. **Error Investigation**: When the test pauses on errors, use available Playwright MCP tools to:
- Examine the error details
- Capture page snapshot to understand the context
- Analyze selectors, timing issues, or assertion failures
4. **Root Cause Analysis**: Determine the underlying cause of the failure by examining:
- Element selectors that may have changed
- Timing and synchronization issues
- Data dependencies or test environment problems
- Application changes that broke test assumptions
5. **Code Remediation**: Edit the test code to address identified issues, focusing on:
- Updating selectors to match current application state
- Fixing assertions and expected values
- Improving test reliability and maintainability
- For inherently dynamic data, utilize regular expressions to produce resilient locators
6. **Verification**: Restart the test after each fix to validate the changes
7. **Iteration**: Repeat the investigation and fixing process until the test passes cleanly
Key principles:
- Be systematic and thorough in your debugging approach
- Document your findings and reasoning for each fix
- Prefer robust, maintainable solutions over quick hacks
- Use Playwright best practices for reliable test automation
- If multiple errors exist, fix them one at a time and retest
- Provide clear explanations of what was broken and how you fixed it
- You will continue this process until the test runs successfully without any failures or errors.
- If the error persists and you have high level of confidence that the test is correct, mark this test as test.fixme()
so that it is skipped during the execution. Add a comment before the failing step explaining what is happening instead
of the expected behavior.
- Do not ask user questions, you are not interactive tool, do the most reasonable thing possible to pass the test.
- Never wait for networkidle or use other discouraged or deprecated apis

View File

@@ -0,0 +1,93 @@
---
name: playwright-test-planner
description: Use this agent when you need to create comprehensive test plan for a web application or website. Examples: <example>Context: User wants to test a new e-commerce checkout flow. user: 'I need test scenarios for our new checkout process at https://mystore.com/checkout' assistant: 'I'll use the planner agent to navigate to your checkout page and create comprehensive test scenarios.' <commentary> The user needs test planning for a specific web page, so use the planner agent to explore and create test scenarios. </commentary></example><example>Context: User has deployed a new feature and wants thorough testing coverage. user: 'Can you help me test our new user dashboard at https://app.example.com/dashboard?' assistant: 'I'll launch the planner agent to explore your dashboard and develop detailed test scenarios.' <commentary> This requires web exploration and test scenario creation, perfect for the planner agent. </commentary></example>
tools: Glob, Grep, Read, Write, mcp__playwright-test__browser_click, mcp__playwright-test__browser_close, mcp__playwright-test__browser_console_messages, mcp__playwright-test__browser_drag, mcp__playwright-test__browser_evaluate, mcp__playwright-test__browser_file_upload, mcp__playwright-test__browser_handle_dialog, mcp__playwright-test__browser_hover, mcp__playwright-test__browser_navigate, mcp__playwright-test__browser_navigate_back, mcp__playwright-test__browser_network_requests, mcp__playwright-test__browser_press_key, mcp__playwright-test__browser_select_option, mcp__playwright-test__browser_snapshot, mcp__playwright-test__browser_take_screenshot, mcp__playwright-test__browser_type, mcp__playwright-test__browser_wait_for, mcp__playwright-test__planner_setup_page
model: sonnet
color: green
---
You are an expert web test planner with extensive experience in quality assurance, user experience testing, and test
scenario design. Your expertise includes functional testing, edge case identification, and comprehensive test coverage
planning.
You will:
1. **Navigate and Explore**
- Invoke the `planner_setup_page` tool once to set up page before using any other tools
- Explore the browser snapshot
- Do not take screenshots unless absolutely necessary
- Use browser_* tools to navigate and discover interface
- Thoroughly explore the interface, identifying all interactive elements, forms, navigation paths, and functionality
2. **Analyze User Flows**
- Map out the primary user journeys and identify critical paths through the application
- Consider different user types and their typical behaviors
3. **Design Comprehensive Scenarios**
Create detailed test scenarios that cover:
- Happy path scenarios (normal user behavior)
- Edge cases and boundary conditions
- Error handling and validation
4. **Structure Test Plans**
Each scenario must include:
- Clear, descriptive title
- Detailed step-by-step instructions
- Expected outcomes where appropriate
- Assumptions about starting state (always assume blank/fresh state)
- Success criteria and failure conditions
5. **Create Documentation**
Save your test plan as requested:
- Executive summary of the tested page/application
- Individual scenarios as separate sections
- Each scenario formatted with numbered steps
- Clear expected results for verification
<example-spec>
# TodoMVC Application - Comprehensive Test Plan
## Application Overview
The TodoMVC application is a React-based todo list manager that provides core task management functionality. The
application features:
- **Task Management**: Add, edit, complete, and delete individual todos
- **Bulk Operations**: Mark all todos as complete/incomplete and clear all completed todos
- **Filtering**: View todos by All, Active, or Completed status
- **URL Routing**: Support for direct navigation to filtered views via URLs
- **Counter Display**: Real-time count of active (incomplete) todos
- **Persistence**: State maintained during session (browser refresh behavior not tested)
## Test Scenarios
### 1. Adding New Todos
**Seed:** `tests/seed.spec.ts`
#### 1.1 Add Valid Todo
**Steps:**
1. Click in the "What needs to be done?" input field
2. Type "Buy groceries"
3. Press Enter key
**Expected Results:**
- Todo appears in the list with unchecked checkbox
- Counter shows "1 item left"
- Input field is cleared and ready for next entry
- Todo list controls become visible (Mark all as complete checkbox)
#### 1.2
...
</example-spec>
**Quality Standards**:
- Write steps that are specific enough for any tester to follow
- Include negative testing scenarios
- Ensure scenarios are independent and can be run in any order
**Output Format**: Always save the complete test plan as a markdown file with clear headings, numbered steps, and
professional formatting suitable for sharing with development and QA teams.

View File

@@ -0,0 +1 @@
../../../.cursor/skills/playwright

13
web/.mcp.json Normal file
View File

@@ -0,0 +1,13 @@
{
"mcpServers": {
"playwright": {
"type": "stdio",
"command": "npx",
"args": [
"-y",
"@playwright/mcp@latest"
],
"env": {}
}
}
}

View File

@@ -174,7 +174,6 @@ function ContentLg({
)}
onClick={editable ? startEditing : undefined}
style={{ height: config.lineHeight }}
title={title}
>
{title}
</span>

View File

@@ -218,7 +218,6 @@ function ContentMd({
"text-text-04",
editable && "cursor-pointer"
)}
title={title}
onClick={editable ? startEditing : undefined}
style={{ height: config.lineHeight }}
>

View File

@@ -118,7 +118,6 @@ function ContentSm({
<span
className={cn("opal-content-sm-title", config.titleFont)}
style={{ height: config.lineHeight }}
title={title}
>
{title}
</span>

View File

@@ -231,7 +231,6 @@ function ContentXl({
)}
onClick={editable ? startEditing : undefined}
style={{ height: config.lineHeight }}
title={title}
>
{title}
</span>

82
web/package-lock.json generated
View File

@@ -61,7 +61,7 @@
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.7",
"next": "16.1.6",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",
@@ -2896,9 +2896,9 @@
}
},
"node_modules/@next/env": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.6.tgz",
"integrity": "sha512-N1ySLuZjnAtN3kFnwhAwPvZah8RJxKasD7x1f8shFqhncnWZn4JMfg37diLNuoHsLAlrDfM3g4mawVdtAG8XLQ==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@@ -2942,9 +2942,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.6.tgz",
"integrity": "sha512-wTzYulosJr/6nFnqGW7FrG3jfUUlEf8UjGA0/pyypJl42ExdVgC6xJgcXQ+V8QFn6niSG2Pb8+MIG1mZr2vczw==",
"cpu": [
"arm64"
],
@@ -2958,9 +2958,9 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.6.tgz",
"integrity": "sha512-BLFPYPDO+MNJsiDWbeVzqvYd4NyuRrEYVB5k2N3JfWncuHAy2IVwMAOlVQDFjj+krkWzhY2apvmekMkfQR0CUQ==",
"cpu": [
"x64"
],
@@ -2974,9 +2974,9 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.6.tgz",
"integrity": "sha512-OJYkCd5pj/QloBvoEcJ2XiMnlJkRv9idWA/j0ugSuA34gMT6f5b7vOiCQHVRpvStoZUknhl6/UxOXL4OwtdaBw==",
"cpu": [
"arm64"
],
@@ -2990,9 +2990,9 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.6.tgz",
"integrity": "sha512-S4J2v+8tT3NIO9u2q+S0G5KdvNDjXfAv06OhfOzNDaBn5rw84DGXWndOEB7d5/x852A20sW1M56vhC/tRVbccQ==",
"cpu": [
"arm64"
],
@@ -3006,9 +3006,9 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.6.tgz",
"integrity": "sha512-2eEBDkFlMMNQnkTyPBhQOAyn2qMxyG2eE7GPH2WIDGEpEILcBPI/jdSv4t6xupSP+ot/jkfrCShLAa7+ZUPcJQ==",
"cpu": [
"x64"
],
@@ -3022,9 +3022,9 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.6.tgz",
"integrity": "sha512-oicJwRlyOoZXVlxmIMaTq7f8pN9QNbdes0q2FXfRsPhfCi8n8JmOZJm5oo1pwDaFbnnD421rVU409M3evFbIqg==",
"cpu": [
"x64"
],
@@ -3038,9 +3038,9 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.6.tgz",
"integrity": "sha512-gQmm8izDTPgs+DCWH22kcDmuUp7NyiJgEl18bcr8irXA5N2m2O+JQIr6f3ct42GOs9c0h8QF3L5SzIxcYAAXXw==",
"cpu": [
"arm64"
],
@@ -3054,9 +3054,9 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.6.tgz",
"integrity": "sha512-NRfO39AIrzBnixKbjuo2YiYhB6o9d8v/ymU9m/Xk8cyVk+k7XylniXkHwjs4s70wedVffc6bQNbufk5v0xEm0A==",
"cpu": [
"x64"
],
@@ -14068,14 +14068,14 @@
"license": "MIT"
},
"node_modules/next": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.6.tgz",
"integrity": "sha512-hkyRkcu5x/41KoqnROkfTm2pZVbKxvbZRuNvKXLRXxs3VfyO0WhY50TQS40EuKO9SW3rBj/sF3WbVwDACeMZyw==",
"license": "MIT",
"dependencies": {
"@next/env": "16.1.7",
"@next/env": "16.1.6",
"@swc/helpers": "0.5.15",
"baseline-browser-mapping": "^2.9.19",
"baseline-browser-mapping": "^2.8.3",
"caniuse-lite": "^1.0.30001579",
"postcss": "8.4.31",
"styled-jsx": "5.1.6"
@@ -14087,14 +14087,14 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.1.7",
"@next/swc-darwin-x64": "16.1.7",
"@next/swc-linux-arm64-gnu": "16.1.7",
"@next/swc-linux-arm64-musl": "16.1.7",
"@next/swc-linux-x64-gnu": "16.1.7",
"@next/swc-linux-x64-musl": "16.1.7",
"@next/swc-win32-arm64-msvc": "16.1.7",
"@next/swc-win32-x64-msvc": "16.1.7",
"@next/swc-darwin-arm64": "16.1.6",
"@next/swc-darwin-x64": "16.1.6",
"@next/swc-linux-arm64-gnu": "16.1.6",
"@next/swc-linux-arm64-musl": "16.1.6",
"@next/swc-linux-x64-gnu": "16.1.6",
"@next/swc-linux-x64-musl": "16.1.6",
"@next/swc-win32-arm64-msvc": "16.1.6",
"@next/swc-win32-x64-msvc": "16.1.6",
"sharp": "^0.34.4"
},
"peerDependencies": {

View File

@@ -79,7 +79,7 @@
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.7",
"next": "16.1.6",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",

View File

@@ -12,7 +12,7 @@ import { localizeAndPrettify } from "@/lib/time";
import Button from "@/refresh-components/buttons/Button";
import Text from "@/refresh-components/texts/Text";
import { PageSelector } from "@/components/PageSelector";
import { useCallback, useEffect, useRef, useState, useMemo } from "react";
import { useEffect, useState, useMemo } from "react";
import { SvgAlertTriangle } from "@opal/icons";
export interface IndexAttemptErrorsModalProps {
errors: {
@@ -22,66 +22,93 @@ export interface IndexAttemptErrorsModalProps {
onClose: () => void;
onResolveAll: () => void;
isResolvingErrors?: boolean;
onPageChange?: (page: number) => void;
currentPage?: number;
pageSize?: number;
}
const ROW_HEIGHT = 65; // 4rem + 1px for border
export default function IndexAttemptErrorsModal({
errors,
onClose,
onResolveAll,
isResolvingErrors = false,
pageSize: propPageSize,
}: IndexAttemptErrorsModalProps) {
const observerRef = useRef<ResizeObserver | null>(null);
const [pageSize, setPageSize] = useState(10);
const [calculatedPageSize, setCalculatedPageSize] = useState(10);
const [currentPage, setCurrentPage] = useState(1);
const tableContainerRef = useCallback((container: HTMLDivElement | null) => {
if (observerRef.current) {
observerRef.current.disconnect();
observerRef.current = null;
}
// Reset to page 1 when the error list actually changes
useEffect(() => {
setCurrentPage(1);
}, [errors.items.length, errors.total_items]);
if (!container) return;
useEffect(() => {
const calculatePageSize = () => {
// Modal height is 75% of viewport height
const modalHeight = window.innerHeight * 0.6;
const observer = new ResizeObserver(() => {
const thead = container.querySelector("thead");
const theadHeight = thead?.getBoundingClientRect().height ?? 0;
const availableHeight = container.clientHeight - theadHeight;
const newPageSize = Math.max(3, Math.floor(availableHeight / ROW_HEIGHT));
setPageSize(newPageSize);
});
// Estimate heights (in pixels):
// - Modal header (title + description): ~120px
// - Table header: ~40px
// - Pagination section: ~80px
// - Modal padding: ~64px (32px top + 32px bottom)
const fixedHeight = 120 + 40 + 80 + 64;
observer.observe(container);
observerRef.current = observer;
// Available height for table rows
const availableHeight = modalHeight - fixedHeight;
// Each table row is approximately 60px (including borders and padding)
const rowHeight = 60;
// Calculate how many rows can fit, with a minimum of 3
const rowsPerPage = Math.max(3, Math.floor(availableHeight / rowHeight));
setCalculatedPageSize((prev) => {
// Only update if the new size is significantly different to prevent flickering
if (Math.abs(prev - rowsPerPage) > 0) {
return rowsPerPage;
}
return prev;
});
};
// Initial calculation
calculatePageSize();
// Debounced resize handler to prevent excessive recalculation
let resizeTimeout: NodeJS.Timeout;
const debouncedCalculatePageSize = () => {
clearTimeout(resizeTimeout);
resizeTimeout = setTimeout(calculatePageSize, 100);
};
window.addEventListener("resize", debouncedCalculatePageSize);
return () => {
window.removeEventListener("resize", debouncedCalculatePageSize);
clearTimeout(resizeTimeout);
};
}, []);
// When data changes, reset to page 1.
// When page size changes (resize), preserve the user's position by
// finding which new page contains the first item they were looking at.
const prevPageSizeRef = useRef(pageSize);
// Separate effect to reset current page when page size changes
useEffect(() => {
if (pageSize !== prevPageSizeRef.current) {
setCurrentPage((prev) => {
const firstVisibleIndex = (prev - 1) * prevPageSizeRef.current;
const newPage = Math.floor(firstVisibleIndex / pageSize) + 1;
const totalPages = Math.ceil(errors.items.length / pageSize);
return Math.min(newPage, totalPages);
});
prevPageSizeRef.current = pageSize;
} else {
setCurrentPage(1);
}
}, [errors.items.length, pageSize]);
setCurrentPage(1);
}, [calculatedPageSize]);
const pageSize = propPageSize || calculatedPageSize;
// Memoize pagination calculations to prevent unnecessary recalculations
const paginationData = useMemo(() => {
const totalPages = Math.ceil(errors.items.length / pageSize);
const startIndex = (currentPage - 1) * pageSize;
const currentPageItems = errors.items.slice(
const endIndex = startIndex + pageSize;
const currentPageItems = errors.items.slice(startIndex, endIndex);
return {
totalPages,
currentPageItems,
startIndex,
startIndex + pageSize
);
return { totalPages, currentPageItems };
endIndex,
};
}, [errors.items, pageSize, currentPage]);
const hasUnresolvedErrors = useMemo(
@@ -110,7 +137,7 @@ export default function IndexAttemptErrorsModal({
onClose={onClose}
height="fit"
/>
<Modal.Body height="full">
<Modal.Body>
{!isResolvingErrors && (
<div className="flex flex-col gap-2 flex-shrink-0">
<Text as="p">
@@ -125,10 +152,7 @@ export default function IndexAttemptErrorsModal({
</div>
)}
<div
ref={tableContainerRef}
className="flex-1 w-full overflow-hidden min-h-0"
>
<div className="flex-1 overflow-hidden min-h-0">
<Table>
<TableHeader>
<TableRow>
@@ -141,11 +165,11 @@ export default function IndexAttemptErrorsModal({
<TableBody>
{paginationData.currentPageItems.length > 0 ? (
paginationData.currentPageItems.map((error) => (
<TableRow key={error.id} className="h-[4rem]">
<TableCell>
<TableRow key={error.id} className="h-[60px] max-h-[60px]">
<TableCell className="h-[60px] align-top">
{localizeAndPrettify(error.time_created)}
</TableCell>
<TableCell>
<TableCell className="h-[60px] align-top">
{error.document_link ? (
<a
href={error.document_link}
@@ -159,12 +183,12 @@ export default function IndexAttemptErrorsModal({
error.document_id || error.entity_id || "Unknown"
)}
</TableCell>
<TableCell>
<div className="flex items-center h-[2rem] overflow-y-auto whitespace-normal">
<TableCell className="h-[60px] align-top p-0">
<div className="h-[60px] overflow-y-auto p-4 whitespace-normal">
{error.failure_message}
</div>
</TableCell>
<TableCell>
<TableCell className="h-[60px] align-top">
<span
className={`px-2 py-1 rounded text-xs ${
error.is_resolved
@@ -178,7 +202,7 @@ export default function IndexAttemptErrorsModal({
</TableRow>
))
) : (
<TableRow className="h-[4rem]">
<TableRow>
<TableCell
colSpan={4}
className="text-center py-8 text-gray-500"
@@ -191,24 +215,32 @@ export default function IndexAttemptErrorsModal({
</Table>
</div>
{paginationData.totalPages > 1 && (
<div className="flex w-full justify-center">
<PageSelector
totalPages={paginationData.totalPages}
currentPage={currentPage}
onPageChange={handlePageChange}
/>
<div className="flex-shrink-0">
{paginationData.totalPages > 1 && (
<div className="flex-1 flex justify-center mb-2">
<PageSelector
totalPages={paginationData.totalPages}
currentPage={currentPage}
onPageChange={handlePageChange}
/>
</div>
)}
<div className="flex w-full">
<div className="flex gap-2 ml-auto">
{hasUnresolvedErrors && !isResolvingErrors && (
// TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
<Button
onClick={onResolveAll}
className="ml-4 whitespace-nowrap"
>
Resolve All
</Button>
)}
</div>
</div>
)}
</div>
</Modal.Body>
<Modal.Footer>
{hasUnresolvedErrors && !isResolvingErrors && (
// TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
<Button onClick={onResolveAll} className="ml-4 whitespace-nowrap">
Resolve All
</Button>
)}
</Modal.Footer>
</Modal.Content>
</Modal>
);

View File

@@ -18,7 +18,7 @@ import { PageSelector } from "@/components/PageSelector";
import { localizeAndPrettify } from "@/lib/time";
import { getDocsProcessedPerMinute } from "@/lib/indexAttempt";
import { InfoIcon } from "@/components/icons/icons";
import ExceptionTraceModal from "@/sections/modals/PreviewModal/ExceptionTraceModal";
import ExceptionTraceModal from "@/components/modals/ExceptionTraceModal";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { SvgClock } from "@opal/icons";
export interface IndexingAttemptsTableProps {

View File

@@ -21,13 +21,10 @@ export const submitGoogleSite = async (
formData.append("files", file);
});
const response = await fetch(
"/api/manage/admin/connector/file/upload?unzip=false",
{
method: "POST",
body: formData,
}
);
const response = await fetch("/api/manage/admin/connector/file/upload", {
method: "POST",
body: formData,
});
const responseJson = await response.json();
if (!response.ok) {
toast.error(`Unable to upload files - ${responseJson.detail}`);

View File

@@ -19,7 +19,6 @@ import {
} from "@/lib/types";
import type { Route } from "next";
import { useRouter } from "next/navigation";
import Truncated from "@/refresh-components/texts/Truncated";
import {
FiChevronDown,
FiChevronRight,
@@ -166,7 +165,9 @@ function ConnectorRow({
onClick={handleRowClick}
>
<TableCell className="">
<Truncated>{ccPairsIndexingStatus.name}</Truncated>
<p className="max-w-[200px] xl:max-w-[400px] inline-block ellipsis truncate">
{ccPairsIndexingStatus.name}
</p>
</TableCell>
<TableCell>
{timeAgo(ccPairsIndexingStatus?.last_success) || "-"}
@@ -245,7 +246,9 @@ function FederatedConnectorRow({
onClick={handleRowClick}
>
<TableCell className="">
<Truncated>{federatedConnector.name}</Truncated>
<p className="max-w-[200px] xl:max-w-[400px] inline-block ellipsis truncate">
{federatedConnector.name}
</p>
</TableCell>
<TableCell>N/A</TableCell>
<TableCell>

View File

@@ -0,0 +1,53 @@
import { useState } from "react";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import { SvgAlertTriangle, SvgCheck, SvgCopy } from "@opal/icons";
interface ExceptionTraceModalProps {
onOutsideClick: () => void;
exceptionTrace: string;
}
export default function ExceptionTraceModal({
onOutsideClick,
exceptionTrace,
}: ExceptionTraceModalProps) {
const [copyClicked, setCopyClicked] = useState(false);
return (
<Modal open onOpenChange={onOutsideClick}>
<Modal.Content width="lg" height="full">
<Modal.Header
icon={SvgAlertTriangle}
title="Full Exception Trace"
onClose={onOutsideClick}
height="fit"
/>
<Modal.Body>
<div className="mb-6">
{!copyClicked ? (
<button
type="button"
onClick={() => {
navigator.clipboard.writeText(exceptionTrace!);
setCopyClicked(true);
setTimeout(() => setCopyClicked(false), 2000);
}}
className="flex w-fit items-center hover:bg-accent-background p-2 border-border border rounded"
>
<Text>Copy full trace</Text>
<SvgCopy className="stroke-text-04 ml-2 h-4 w-4 flex flex-shrink-0" />
</button>
) : (
<div className="flex w-fit items-center hover:bg-accent-background p-2 border-border border rounded cursor-default">
<Text>Copied to clipboard</Text>
<SvgCheck className="stroke-text-04 my-auto ml-2 h-4 w-4 flex flex-shrink-0" />
</div>
)}
</div>
<div className="whitespace-pre-wrap">{exceptionTrace}</div>
</Modal.Body>
</Modal.Content>
</Modal>
);
}

View File

@@ -1,39 +0,0 @@
import Modal from "@/refresh-components/Modal";
import { SvgAlertTriangle } from "@opal/icons";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import { CopyButton } from "@/sections/modals/PreviewModal/variants/shared";
import FloatingFooter from "@/sections/modals/PreviewModal/FloatingFooter";
interface ExceptionTraceModalProps {
onOutsideClick: () => void;
exceptionTrace: string;
language?: string;
}
export default function ExceptionTraceModal({
onOutsideClick,
exceptionTrace,
language = "python",
}: ExceptionTraceModalProps) {
return (
<Modal open onOpenChange={onOutsideClick}>
<Modal.Content width="lg" height="full">
<Modal.Header
icon={SvgAlertTriangle}
title="Full Exception Trace"
onClose={onOutsideClick}
height="fit"
/>
<div className="flex flex-col flex-1 min-h-0 overflow-hidden w-full bg-background-tint-01">
<CodePreview content={exceptionTrace} language={language} normalize />
</div>
<FloatingFooter
right={<CopyButton getText={() => exceptionTrace} />}
codeBackground
/>
</Modal.Content>
</Modal>
);
}

View File

@@ -1,39 +0,0 @@
import { cn } from "@/lib/utils";
import { ReactNode } from "react";
interface FloatingFooterProps {
left?: ReactNode;
right?: ReactNode;
codeBackground?: boolean;
}
export default function FloatingFooter({
left,
right,
codeBackground,
}: FloatingFooterProps) {
return (
<div
className={cn(
"absolute bottom-0 left-0 right-0",
"flex items-center justify-between",
"p-4 pointer-events-none w-full"
)}
style={{
background: `linear-gradient(to top, var(--background-${
codeBackground ? "code-01" : "tint-01"
}) 40%, transparent)`,
}}
>
{/* Left slot */}
<div className="pointer-events-auto">{left}</div>
{/* Right slot */}
{right ? (
<div className="pointer-events-auto rounded-12 bg-background-tint-00 p-1 shadow-lg">
{right}
</div>
) : null}
</div>
);
}

View File

@@ -5,8 +5,8 @@ import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { cn } from "@/lib/utils";
import { Section } from "@/layouts/general-layouts";
import FloatingFooter from "@/sections/modals/PreviewModal/FloatingFooter";
import mime from "mime";
import {
getCodeLanguage,
@@ -189,12 +189,30 @@ export default function PreviewModal({
)}
</div>
{/* Floating footer */}
{!isLoading && !loadError && (
<FloatingFooter
left={variant.renderFooterLeft(ctx)}
right={variant.renderFooterRight(ctx)}
codeBackground={variant.codeBackground}
/>
<div
className={cn(
"absolute bottom-0 left-0 right-0",
"flex items-center justify-between",
"p-4 pointer-events-none w-full"
)}
style={{
background: `linear-gradient(to top, var(--background-${
variant.codeBackground ? "code-01" : "tint-01"
}) 40%, transparent)`,
}}
>
{/* Left slot */}
<div className="pointer-events-auto">
{variant.renderFooterLeft(ctx)}
</div>
{/* Right slot */}
<div className="pointer-events-auto rounded-12 bg-background-tint-00 p-1 shadow-lg">
{variant.renderFooterRight(ctx)}
</div>
</div>
)}
</Modal.Content>
</Modal>

View File

@@ -12,7 +12,6 @@ import { cn, noProp } from "@/lib/utils";
import { DRAG_TYPES } from "./constants";
import SidebarTab from "@/refresh-components/buttons/SidebarTab";
import IconButton from "@/refresh-components/buttons/IconButton";
import Truncated from "@/refresh-components/texts/Truncated";
import { Button } from "@opal/components";
import ButtonRenaming from "@/refresh-components/buttons/ButtonRenaming";
import type { IconProps } from "@opal/types";
@@ -182,7 +181,7 @@ const ProjectFolderButton = memo(({ project }: ProjectFolderButtonProps) => {
onClose={() => setIsEditing(false)}
/>
) : (
<Truncated>{project.name}</Truncated>
project.name
)}
</SidebarTab>
</Popover.Anchor>

View File

@@ -169,9 +169,7 @@ test.describe("Project Files visual regression", () => {
.first();
await expect(iconWrapper).toBeVisible();
const container = page.locator("[data-main-container]");
await expect(container).toBeVisible();
await expectElementScreenshot(container, {
await expectElementScreenshot(filesSection, {
name: "project-files-long-underscore-filename",
});

View File

@@ -0,0 +1,875 @@
# Index Attempt Errors Modal - Comprehensive Test Plan
## Application Overview
The Index Attempt Errors Modal is an admin-facing feature found on the connector detail page at
`/admin/connector/[ccPairId]`. It surfaces document-level indexing failures for a given
Connector-Credential Pair (CC Pair) and allows an admin to review them and trigger a full re-index
to attempt resolution.
### Feature Summary
- **Entry point**: A yellow `Alert` banner on the connector detail page that reads "Some documents
failed to index" with a "View details." bold link. The banner appears only when
`indexAttemptErrors.total_items > 0`.
- **Data fetching**: The parent page (`page.tsx`) fetches errors via `usePaginatedFetch` with
`itemsPerPage: 10, pagesPerBatch: 1`, polling every 5 seconds. Only the first 10 errors are
loaded into the modal. The modal receives these as `errors.items` and performs client-side
pagination over them.
- **Modal title**: "Indexing Errors" with an `SvgAlertTriangle` icon.
- **Table columns**: Time, Document ID (optionally hyperlinked), Error Message (scrollable cell at
60px height), Status (badge).
- **Pagination**: Client-side within the modal. Page size is computed dynamically from the
container height via a `ResizeObserver` (minimum 3 rows per page). A `PageSelector` renders only
when `totalPages > 1`.
- **Resolve All button**: In the modal footer. Rendered only when `hasUnresolvedErrors === true`
and `isResolvingErrors === false`. Clicking it: closes the modal, sets
`showIsResolvingKickoffLoader` to true, and awaits `triggerReIndex(fromBeginning = true)`.
- **Spinner**: The full-screen `Spinner` is shown when `showIsResolvingKickoffLoader &&
!isResolvingErrors`. Once the backend index attempt transitions to `in_progress` / `not_started`
with `from_beginning = true`, `isResolvingErrors` becomes true and the spinner is hidden
regardless of `showIsResolvingKickoffLoader`.
- **Resolving state**: While a full re-index initiated from the modal is running (latest index
attempt is `in_progress` or `not_started`, `from_beginning = true`, and none of the currently
loaded errors belong to that attempt), the banner switches to an animated "Resolving failures"
pulse and the modal header description changes.
- **Access control**: The `/api/manage/admin/cc-pair/{id}/errors` endpoint requires
`current_curator_or_admin_user`.
---
## Important Implementation Details (Affecting Test Design)
1. **10-error fetch limit**: The parent page only fetches up to 10 errors per poll cycle
(`itemsPerPage: 10`). The modal's client-side pagination operates on these 10 items, not on the
full database count. Testing large error counts via the UI requires either adjusting this limit
or using direct API calls.
2. **Double-spinner invocation**: The `onResolveAll` handler in `page.tsx` sets
`showIsResolvingKickoffLoader(true)` before calling `triggerReIndex`, which itself also sets it
to `true`. The spinner correctly disappears when `triggerReIndex` resolves (via `finally`). This
is benign but worth noting for timing-sensitive tests.
3. **isResolvingErrors logic**: `isResolvingErrors` is derived from `indexAttemptErrors.items`
(the 10-item fetch) and `latestIndexAttempt`. If any of the currently loaded errors have the
same `index_attempt_id` as the latest in-progress attempt, `isResolvingErrors` is `false` even
though a re-index is running.
4. **PageSelector "unclickable" style**: The "" and "" buttons use a `div` with
`unclickable` prop that adds `text-text-200` class and removes `cursor-pointer`. They are not
`<button disabled>` elements — they remain clickable in the DOM but navigation is guarded by
`Math.max`/`Math.min` clamps.
5. **Alert uses dark: modifiers**: The banner component uses `dark:` Tailwind classes, which
contradicts the project's `colors.css` theming convention. This is an existing code issue and
not a test failure.
---
## Assumptions
- All test scenarios begin from a fresh, clean state unless explicitly stated otherwise.
- The test user is logged in as an admin (`a@example.com` / `a`).
- A file connector is created via `OnyxApiClient.createFileConnector()` before each scenario that
needs one, and cleaned up in `afterEach` via `apiClient.deleteCCPair(testCcPairId)`.
- Indexing errors are seeded directly via `psql` or a dedicated test API endpoint because they are
produced by the background indexing pipeline in production.
- The connector detail page polls CC Pair data and errors every 5 seconds; tests that check
dynamic state must account for this polling interval (allow up to 10 seconds).
---
## Test Scenarios
### 1. Alert Banner Visibility
#### 1.1 No Errors - Banner Is Hidden
**Seed:** Create a file connector with zero `IndexAttemptError` records.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}`.
2. Wait for the page to finish loading (`networkidle`).
3. Observe the area between the connector header and the "Indexing" section title.
**Expected Results:**
- The yellow alert banner ("Some documents failed to index") is not present in the DOM.
- The "Indexing" section and its status card are visible.
---
#### 1.2 One or More Unresolved Errors - Banner Appears
**Seed:** Create a file connector, then insert at least one `IndexAttemptError` row with
`is_resolved = false` for that CC Pair.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}`.
2. Wait for the page to finish loading.
3. Observe the area above the "Indexing" section title.
**Expected Results:**
- The yellow alert banner is visible.
- The banner heading reads "Some documents failed to index".
- The banner body contains the text "We ran into some issues while processing some documents."
- The text "View details." is rendered as a bold, clickable element within the banner body.
---
#### 1.3 All Errors Resolved - Banner Disappears Automatically
**Seed:** Create a file connector with one `IndexAttemptError` where `is_resolved = false`.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}` and confirm the banner is visible.
2. Using `psql` or a direct DB update, set `is_resolved = true` on that error record.
3. Wait up to 10 seconds for the 5-second polling cycle to refresh the errors fetch.
4. Observe the banner area.
**Expected Results:**
- The yellow alert banner disappears without a manual page reload.
- No navigation or error occurs.
---
#### 1.4 Banner Absent for Invalid Connector With No Errors
**Seed:** Create a file connector with zero errors. Manually put it into `INVALID` status via the
DB.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}`.
2. Observe both the "Invalid Connector State" callout and the banner area.
**Expected Results:**
- The "Invalid Connector State" warning callout is visible.
- The yellow "Some documents failed to index" banner is absent.
- The two alerts do not overlap.
---
### 2. Opening and Closing the Modal
#### 2.1 Open Modal via "View Details" Link
**Seed:** Create a file connector with one unresolved `IndexAttemptError`.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}`.
2. Wait for the yellow alert banner to appear.
3. Click the bold "View details." text within the banner.
**Expected Results:**
- A modal dialog appears with the title "Indexing Errors" and an alert-triangle icon.
- The modal header has no description text (description is only shown in the resolving state).
- The modal body shows the paragraph starting with "Below are the errors encountered during
indexing."
- A second paragraph reads "Click the button below to kick off a full re-index..."
- The table is visible with four column headers: Time, Document ID, Error Message, Status.
- The one seeded error is displayed as a row.
- The modal footer contains a "Resolve All" button.
---
#### 2.2 Close Modal via the X Button
**Seed:** One unresolved `IndexAttemptError`.
**Steps:**
1. Open the Indexing Errors modal (scenario 2.1).
2. Click the close (X) button in the modal header.
**Expected Results:**
- The modal closes and is no longer visible.
- The connector detail page remains with the yellow alert banner still present.
- No navigation occurs.
---
#### 2.3 Close Modal via Escape Key
**Seed:** One unresolved `IndexAttemptError`.
**Steps:**
1. Open the Indexing Errors modal.
2. Press the Escape key.
**Expected Results:**
- The modal closes.
- The connector detail page remains intact with the banner still displayed.
---
#### 2.4 Close Modal via Backdrop Click
**Seed:** One unresolved `IndexAttemptError`.
**Steps:**
1. Open the Indexing Errors modal.
2. Click outside the modal content area on the dimmed backdrop.
**Expected Results:**
- The modal closes.
- The connector detail page remains intact.
---
#### 2.5 Modal Cannot Be Opened When Errors Are Resolving
**Seed:** Simulate `isResolvingErrors = true` by ensuring the latest index attempt has
`status = in_progress`, `from_beginning = true`, and no currently loaded errors share its
`index_attempt_id`.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}`.
2. Observe the yellow banner in the resolving state.
3. Confirm there is no "View details." link in the banner.
**Expected Results:**
- The banner body shows only the animated "Resolving failures" text (no "View details." link).
- There is no interactive element in the banner to open the modal.
---
### 3. Table Content and Rendering
#### 3.1 Single Error Row - All Fields Present
**Seed:** Insert one `IndexAttemptError` with:
- `document_id = "doc-123"`
- `document_link = "https://example.com/doc-123"`
- `failure_message = "Timeout while fetching document content"`
- `is_resolved = false`
- `time_created = <known timestamp>`
**Steps:**
1. Open the Indexing Errors modal.
2. Inspect the single data row in the table.
**Expected Results:**
- The "Time" cell displays a human-readable, localized version of `time_created`.
- The "Document ID" cell renders "doc-123" as an `<a>` element pointing to
`https://example.com/doc-123` with `target="_blank"` and `rel="noopener noreferrer"`.
- The "Error Message" cell shows "Timeout while fetching document content" in a 60px-height
scrollable div.
- The "Status" cell shows a badge with text "Unresolved" styled with red background.
---
#### 3.2 Error Without a Document Link - Plain Text ID
**Seed:** Insert one `IndexAttemptError` with `document_id = "doc-no-link"` and
`document_link = null`.
**Steps:**
1. Open the Indexing Errors modal.
2. Inspect the Document ID cell.
**Expected Results:**
- The Document ID cell displays "doc-no-link" as plain text with no `<a>` element or underline.
---
#### 3.3 Error With Entity ID Instead of Document ID
**Seed:** Insert one `IndexAttemptError` with `document_id = null`, `entity_id = "entity-abc"`,
and `document_link = null`.
**Steps:**
1. Open the Indexing Errors modal.
2. Inspect the Document ID cell.
**Expected Results:**
- The Document ID cell displays "entity-abc" as plain text (fallback to `entity_id` when
`document_id` is null and no link exists).
---
#### 3.4 Error With Document Link But No Document ID - Uses Entity ID in Link
**Seed:** Insert one `IndexAttemptError` with `document_id = null`,
`entity_id = "entity-link-test"`, and `document_link = "https://example.com/entity"`.
**Steps:**
1. Open the Indexing Errors modal.
2. Inspect the Document ID cell.
**Expected Results:**
- The Document ID cell renders "entity-link-test" as a hyperlink pointing to
`https://example.com/entity`.
- The link text is `entity_id` because `document_id` is null (code:
`error.document_id || error.entity_id || "Unknown"`).
---
#### 3.5 Error With Neither Document ID Nor Entity ID
**Seed:** Insert one `IndexAttemptError` with `document_id = null` and `entity_id = null`.
**Steps:**
1. Open the Indexing Errors modal.
2. Inspect the Document ID cell.
**Expected Results:**
- The Document ID cell displays the text "Unknown".
---
#### 3.6 Long Error Message Is Scrollable
**Seed:** Insert one `IndexAttemptError` with a `failure_message` of at least 500 characters.
**Steps:**
1. Open the Indexing Errors modal.
2. Locate the Error Message cell for that row.
3. Attempt to scroll within the cell.
**Expected Results:**
- The Error Message cell's inner `div` is capped at 60px height with `overflow-y-auto`.
- The cell content is scrollable, allowing the full message to be read.
- The table row height does not expand beyond 60px.
---
#### 3.7 Error Message With Special HTML Characters Is Escaped
**Seed:** Insert one `IndexAttemptError` with
`failure_message = "<script>alert('xss')</script>"`.
**Steps:**
1. Open the Indexing Errors modal.
2. Inspect the Error Message cell.
**Expected Results:**
- The text is rendered as a literal string, not interpreted as HTML.
- No JavaScript alert dialog appears.
- The exact text `<script>alert('xss')</script>` is visible as escaped content.
---
#### 3.8 Single-Character Error Message Does Not Break Layout
**Seed:** Insert one `IndexAttemptError` with `failure_message = "X"`.
**Steps:**
1. Open the Indexing Errors modal.
2. Inspect the Error Message cell and table row height.
**Expected Results:**
- The cell renders "X" without layout breakage.
- The row height remains at 60px.
---
#### 3.9 Resolved Errors Are Filtered Out of Modal by Default
**Seed:** Insert one `IndexAttemptError` with `is_resolved = true` and no unresolved errors.
**Steps:**
1. Make a direct API call: `GET /api/manage/admin/cc-pair/{ccPairId}/errors` (without
`include_resolved`).
2. Make a second call: `GET /api/manage/admin/cc-pair/{ccPairId}/errors?include_resolved=true`.
3. Navigate to `/admin/connector/{ccPairId}`.
**Expected Results:**
- The first API call returns zero items (`total_items = 0`).
- The second API call returns the resolved error with `is_resolved: true`.
- The yellow alert banner is absent from the connector page.
- The modal cannot be opened through normal UI (no "View details." link).
---
### 4. Pagination
#### 4.1 Single Page - No Pagination Controls
**Seed:** Insert 3 unresolved errors (the minimum page size).
**Steps:**
1. Open the Indexing Errors modal.
2. Observe whether a `PageSelector` is rendered below the table.
**Expected Results:**
- No `PageSelector` is rendered (`totalPages === 1`).
- All 3 errors are visible simultaneously in the table.
---
#### 4.2 Multiple Pages - Pagination Controls Appear
**Seed:** Insert 10 unresolved errors (matches the parent's `itemsPerPage: 10` fetch limit).
The modal's dynamic page size is typically larger than 3 but can be forced small by using a
narrow viewport.
**Steps:**
1. Set the browser viewport to a height that results in a page size smaller than 10 (e.g., a
height where the modal table container fits only 3 rows).
2. Open the Indexing Errors modal.
3. Observe the area below the table.
**Expected Results:**
- A `PageSelector` is rendered with "" and "" navigation controls.
- The current page indicator (page 1) is visually highlighted (active styling).
- Only the rows for page 1 are shown in the table body.
---
#### 4.3 Navigate to Next Page
**Seed:** Insert 10 unresolved errors and use a viewport that yields page size < 10.
**Steps:**
1. Open the Indexing Errors modal and confirm `PageSelector` is visible on page 1.
2. Note the Document IDs visible on page 1.
3. Click the "" (next page) button.
**Expected Results:**
- The table updates to show errors for page 2.
- The Document IDs on page 2 differ from those on page 1.
- The page 2 indicator becomes highlighted.
- The "" (previous page) button becomes clickable (no longer has `unclickable` styling).
---
#### 4.4 Navigate Back to Previous Page
**Seed:** Same as 4.3.
**Steps:**
1. Open the modal and navigate to page 2 (scenario 4.3).
2. Click the "" (previous page) button.
**Expected Results:**
- The table returns to showing page 1 errors.
- The page 1 indicator is highlighted.
- The "" button gains `unclickable` styling (lighter text, no pointer cursor).
---
#### 4.5 Previous Button Does Not Navigate Below Page 1
**Seed:** Insert enough errors to produce at least 2 pages.
**Steps:**
1. Open the Indexing Errors modal. Confirm the current page is 1.
2. Observe the "" button styling.
3. Click the "" button.
**Expected Results:**
- The "" button has `text-text-200` styling (from `PageLink unclickable` prop) and no
`cursor-pointer`.
- Note: The button is a `div`, not a `<button disabled>`. It remains clickable in the DOM, but the
handler clamps navigation to `Math.max(currentPage - 1, 1)`, so clicking it on page 1 has no
effect on the displayed rows.
- The current page remains page 1.
---
#### 4.6 Next Button Does Not Navigate Beyond Last Page
**Seed:** Insert exactly enough errors to produce 2 pages. Navigate to page 2.
**Steps:**
1. Open the modal and navigate to the last page.
2. Observe the "" button styling.
3. Click the "" button.
**Expected Results:**
- The "" button has `unclickable` styling on the last page.
- Clicking it does not navigate beyond the last page (clamped by `Math.min`).
---
#### 4.7 Page Resets to 1 When Error Count Changes
**Seed:** Insert 10 errors. Use a small viewport so multiple pages exist. Navigate to page 2.
**Steps:**
1. Open the modal and navigate to page 2.
2. While the modal is open, delete all error rows from the DB.
3. Wait up to 10 seconds for the polling cycle to reload errors.
**Expected Results:**
- The modal's error table shows the empty-state message.
- The `currentPage` state resets to 1 (triggered by the `useEffect` watching
`errors.items.length` and `errors.total_items`).
- The `PageSelector` disappears if only one (empty) page remains.
---
#### 4.8 API-Level Pagination: page_size Parameter Maximum
**Seed:** Insert 101 errors for the CC Pair.
**Steps:**
1. Make `GET /api/manage/admin/cc-pair/{ccPairId}/errors?page_size=100` as an authenticated admin.
2. Make `GET /api/manage/admin/cc-pair/{ccPairId}/errors?page_size=101`.
**Expected Results:**
- The `page_size=100` request returns 100 items and `total_items = 101`.
- The `page_size=101` request returns a 422 Unprocessable Entity error (backend enforces
`le=100`).
---
### 5. Resolve All Functionality
#### 5.1 Resolve All Button Triggers Full Re-Index and Shows Spinner
**Seed:** Create a file connector in ACTIVE status (not paused, not indexing, not invalid) with at
least one unresolved `IndexAttemptError`.
**Steps:**
1. Open the Indexing Errors modal.
2. Confirm the "Resolve All" button is visible in the modal footer.
3. Click "Resolve All".
**Expected Results:**
- The modal closes immediately.
- A full-screen `Spinner` component appears while the re-index request is in flight
(`showIsResolvingKickoffLoader = true` and `isResolvingErrors = false`).
- A success toast notification appears: "Complete re-indexing started successfully".
- The `Spinner` disappears after `triggerIndexing` resolves.
- The connector detail page is still visible.
- The yellow alert banner remains visible (errors are not immediately marked resolved; they resolve
as the re-index runs).
---
#### 5.2 Spinner Disappears Once Re-Index Is Picked Up
**Seed:** Same as 5.1. The re-index task must be picked up by a running Celery worker.
**Steps:**
1. Click "Resolve All" (scenario 5.1).
2. Wait for the Celery worker to start the index attempt (it will transition to `not_started` /
`in_progress` with `from_beginning = true`).
3. Observe the spinner and the banner.
**Expected Results:**
- Once `isResolvingErrors` becomes `true` (the latest attempt is in-progress, from-beginning, and
none of the currently loaded errors belong to it), the spinner condition
`showIsResolvingKickoffLoader && !isResolvingErrors` becomes false, hiding the spinner.
- The banner transitions to the "Resolving failures" pulse state.
---
#### 5.3 Resolve All Button Is Hidden When All Loaded Errors Are Resolved
**Note:** Since `usePaginatedFetch` fetches only unresolved errors by default (no
`include_resolved` param on the errors endpoint), zero unresolved errors means the banner is
absent and the modal cannot be opened via the normal UI. This scenario therefore validates the
banner-suppression mechanism:
**Steps:**
1. Ensure the CC Pair has zero unresolved `IndexAttemptError` records.
2. Navigate to `/admin/connector/{ccPairId}`.
3. Confirm the yellow alert banner is not present.
**Expected Results:**
- The yellow banner is absent.
- The modal cannot be opened via the banner (no "View details." link).
- The "Resolve All" button is therefore unreachable via the normal UI.
---
#### 5.4 Resolve All Button Is Hidden While Re-Index Is In Progress
**Seed:** Trigger a full re-index (scenario 5.1) and re-open the modal while it is running.
**Steps:**
1. Trigger "Resolve All" (scenario 5.1).
2. Immediately click "View details." in the banner to re-open the modal.
3. Observe the modal while the re-index is `in_progress`.
**Expected Results:**
- The modal header description reads: "Currently attempting to resolve all errors by performing a
full re-index. This may take some time to complete."
- The two explanatory body paragraphs ("Below are the errors..." and "Click the button below...") are
not visible.
- The "Resolve All" button is not present in the footer.
- The error rows are still displayed in the table.
---
#### 5.5 Resolve All Fails Gracefully When Connector Is Paused
**Seed:** Create a file connector that is in PAUSED status with unresolved errors.
**Steps:**
1. Open the Indexing Errors modal.
2. Confirm the "Resolve All" button is visible (it renders based on `hasUnresolvedErrors`, not on
connector status).
3. Click "Resolve All".
**Expected Results:**
- The modal closes and the spinner appears briefly.
- An error toast appears (because `triggerIndexing` returns an error message for paused
connectors).
- The spinner disappears after the failed request.
- The banner is still visible with the "View details." link.
---
### 6. Banner Resolving State
#### 6.1 Banner Shows "Resolving Failures" While Re-Index Is In Progress
**Seed:** Trigger a full re-index from the modal.
**Steps:**
1. Trigger "Resolve All".
2. Return to the connector detail page without re-opening the modal.
3. Observe the yellow alert banner body.
**Expected Results:**
- The banner body no longer shows "We ran into some issues..." or the "View details." link.
- Instead, the banner body shows a pulsing "Resolving failures" span
(`animate-pulse` CSS class).
- The banner heading still reads "Some documents failed to index".
- The banner remains visible until the re-index completes and errors are resolved.
---
#### 6.2 Banner Reverts to "View Details" if Resolving Attempt Gains New Errors
**Seed:** A re-index is in progress (`isResolvingErrors = true`). Force a new
`IndexAttemptError` with `index_attempt_id` matching the running attempt.
**Steps:**
1. Observe the banner in "Resolving failures" state.
2. Insert a new `IndexAttemptError` with `index_attempt_id = <running attempt id>` into the DB.
3. Wait for the 5-second polling cycle.
4. Observe the banner.
**Expected Results:**
- `isResolvingErrors` transitions back to `false` (the loaded errors now include one belonging to
the latest attempt).
- The banner reverts to showing "We ran into some issues..." with the "View details." link.
---
### 7. Empty State
#### 7.1 Empty Table When No Items on Current Page
**Seed:** Construct a state where the `errors.items` array is empty (zero errors loaded for the
CC Pair — this requires that `indexAttemptErrors.total_items > 0` to keep the banner visible, but
a data race occurs where errors are deleted while the modal is open).
**Steps:**
1. Open the modal with some errors visible.
2. Delete all error rows from the DB while the modal is open.
3. Wait for the 5-second polling cycle.
4. Observe the table body.
**Expected Results:**
- The table body shows a single row spanning all four columns with the text "No errors found on
this page" (centered, grayed-out `text-gray-500` style).
- The `PageSelector` may disappear (if `totalPages` drops to 1).
---
### 8. Access Control
#### 8.1 Non-Admin User Cannot Access the Errors API Endpoint
**Seed:** Ensure a basic (non-admin, non-curator) user account exists.
**Steps:**
1. Log in as the basic user.
2. Make a `GET /api/manage/admin/cc-pair/{ccPairId}/errors` request.
**Expected Results:**
- The API returns a 401 or 403 HTTP status code.
- The basic user cannot access the connector detail admin page via the UI.
---
#### 8.2 Curator User Can Access the Errors Endpoint and Open Modal
**Seed:** Ensure a curator user account exists and is assigned to the CC Pair's access group.
**Steps:**
1. Log in as the curator user.
2. Navigate to `/admin/connector/{ccPairId}` for a connector the curator can edit.
3. If unresolved errors exist, click "View details." to open the modal.
**Expected Results:**
- The modal opens successfully.
- The errors table is populated.
- The "Resolve All" button is visible (if unresolved errors exist and no re-index is running).
---
### 9. Data Freshness and Auto-Refresh
#### 9.1 New Error Appears Without Page Reload
**Seed:** Create a file connector with zero errors. Keep the connector detail page open.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}` and confirm no yellow banner.
2. Insert a new `IndexAttemptError` with `is_resolved = false` via the DB.
3. Wait up to 10 seconds (two 5-second polling cycles).
4. Observe the page.
**Expected Results:**
- The yellow alert banner appears automatically without a manual page refresh.
- No full page reload occurs.
---
#### 9.2 Errors List Refreshes While Modal Is Open
**Seed:** Create a file connector with 2 unresolved errors.
**Steps:**
1. Open the Indexing Errors modal.
2. Confirm 2 rows are visible.
3. Insert a third `IndexAttemptError` via the DB.
4. Wait up to 10 seconds for the polling cycle.
5. Observe the modal table.
**Expected Results:**
- The third error row appears in the table without closing and reopening the modal.
- If the total errors now exceed the current page size, the `PageSelector` appears if it was not
already present.
---
### 10. Page-Level Integration
#### 10.1 Connector Detail Page Continues to Function With Modal Open
**Seed:** Create a file connector with unresolved errors.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}`.
2. Open the Indexing Errors modal.
3. With the modal open, attempt to scroll the page behind the modal.
4. Close the modal.
5. Verify the page remains functional: the Indexing section status card is visible, and the
"Manage" dropdown button is present.
**Expected Results:**
- The modal renders over page content with a dimmed backdrop.
- Scrolling behind the modal does not cause layout issues.
- After closing the modal, all page elements are interactive and properly displayed.
- No state corruption occurs.
---
#### 10.2 Other Manage Dropdown Actions Are Unaffected
**Seed:** Create a file connector with unresolved errors.
**Steps:**
1. Navigate to `/admin/connector/{ccPairId}`.
2. Open the Indexing Errors modal and close it without clicking "Resolve All".
3. Open the "Manage" dropdown.
4. Confirm the "Re-Index", "Pause", and "Delete" items are visible and have the correct enabled
state.
**Expected Results:**
- The Manage dropdown functions normally after interacting with the errors modal.
- No state from the modal (e.g., lingering `showIsResolvingKickoffLoader`) affects the dropdown.
---
### 11. Boundary Conditions
#### 11.1 Minimum Page Size of 3 Rows
**Seed:** Insert enough errors to exceed page size.
**Steps:**
1. Set the browser viewport to a very small height (e.g., 300px total).
2. Open the Indexing Errors modal.
3. Observe the number of rows rendered per page.
**Expected Results:**
- The page size does not drop below 3 rows (enforced by `Math.max(3, ...)` in the `ResizeObserver`
callback).
- At least 3 rows are displayed per page regardless of available container height.
---
#### 11.2 Exactly the API Page Limit (10 Items) Displayed
**Seed:** Insert exactly 10 unresolved errors for the CC Pair.
**Steps:**
1. Open the Indexing Errors modal with a sufficiently large viewport.
2. Observe that all 10 errors are visible (assuming the dynamic page size is >= 10).
**Expected Results:**
- All 10 errors are accessible in the modal.
- No pagination is needed if the computed page size is >= 10.
- Note: If 11+ errors exist in the DB, only the first 10 (from `usePaginatedFetch`) are surfaced
in the modal. The 11th error would require a separate API call or a larger `itemsPerPage` config
to verify.
---
#### 11.3 Modal Opens Only When indexAttemptErrors Is Non-Null
**Steps:**
1. Observe the condition in `page.tsx`: `{showIndexAttemptErrors && indexAttemptErrors && ...}`.
2. During the initial page load (before the first poll completes), `indexAttemptErrors` is `null`.
**Expected Results:**
- Clicking "View details." while `indexAttemptErrors` is still null has no effect
(`setShowIndexAttemptErrors(true)` is called but the modal renders only when both
`showIndexAttemptErrors` and `indexAttemptErrors` are truthy).
- Once the first poll completes and errors are available, the modal renders normally.
---
## Test File Location
These tests should be implemented as Playwright E2E specs at:
```
web/tests/e2e/connectors/index_attempt_errors_modal.spec.ts
```
### Recommended OnyxApiClient Additions
The following methods should be added to `web/tests/e2e/utils/onyxApiClient.ts` to support
seeding and cleanup:
- `createIndexAttemptError(ccPairId, options)` - inserts an error record via `psql` or a dedicated
test endpoint; options include `documentId`, `documentLink`, `entityId`, `failureMessage`,
`isResolved`, `indexAttemptId`.
- `resolveAllIndexAttemptErrors(ccPairId)` - marks all errors for a CC Pair as resolved.
- `getIndexAttemptErrors(ccPairId, includeResolved?)` - calls the errors API and returns the
parsed response.
### Cleanup Strategy
Each test must clean up its CC Pair in an `afterEach` hook:
```typescript
test.afterEach(async ({ page }) => {
const apiClient = new OnyxApiClient(page.request);
if (testCcPairId !== null) {
await apiClient.deleteCCPair(testCcPairId);
testCcPairId = null;
}
});
```
Cascade deletes on the CC Pair will remove associated `IndexAttemptError` rows automatically.
### Polling Guidance
For scenarios that require waiting for auto-refresh to propagate state changes, use
`expect.poll()` with a 10-second timeout to avoid flaky tests:
```typescript
await expect.poll(
async () => page.locator('[data-testid="error-banner"]').isVisible(),
{ timeout: 10000 }
).toBe(true);
```

View File

@@ -0,0 +1,705 @@
import { test, expect } from "@playwright/test";
import { loginAs } from "@tests/e2e/utils/auth";
import { OnyxApiClient } from "@tests/e2e/utils/onyxApiClient";
import { execSync } from "child_process";
// ─── Database Helpers ─────────────────────────────────────────────────────────
// IndexAttemptError rows are produced by background workers in production.
// In tests we seed them directly via psql since there is no public API for it.
const DB_CONTAINER = process.env.DB_CONTAINER || "onyx-relational_db-1";
function psql(sql: string): string {
return execSync(`docker exec -i ${DB_CONTAINER} psql -U postgres -t -A`, {
input: sql,
encoding: "utf-8",
}).trim();
}
function getSearchSettingsId(): number {
const result = psql(
"SELECT id FROM search_settings WHERE status = 'PRESENT' ORDER BY id DESC LIMIT 1;"
);
const id = parseInt(result, 10);
if (isNaN(id)) {
throw new Error(
`No search_settings with status PRESENT found: "${result}"`
);
}
return id;
}
function createIndexAttempt(
ccPairId: number,
options: { fromBeginning?: boolean; status?: string } = {}
): number {
const { fromBeginning = false, status = "success" } = options;
const searchSettingsId = getSearchSettingsId();
const result = psql(
`INSERT INTO index_attempt (
connector_credential_pair_id, from_beginning, status, search_settings_id,
time_created, time_started, time_updated
) VALUES (
${ccPairId}, ${fromBeginning}, '${status}', ${searchSettingsId},
NOW(), NOW(), NOW()
) RETURNING id;`
);
const id = parseInt(result, 10);
if (isNaN(id)) {
throw new Error(`Failed to create index attempt: "${result}"`);
}
return id;
}
function sqlVal(v: string | null): string {
return v === null ? "NULL" : `'${v.replace(/'/g, "''")}'`;
}
interface CreateErrorOptions {
indexAttemptId: number;
ccPairId: number;
documentId?: string | null;
documentLink?: string | null;
entityId?: string | null;
failureMessage?: string;
isResolved?: boolean;
}
function createError(options: CreateErrorOptions): number {
const {
indexAttemptId,
ccPairId,
documentId = null,
documentLink = null,
entityId = null,
failureMessage = "Test indexing error",
isResolved = false,
} = options;
const result = psql(
`INSERT INTO index_attempt_errors (
index_attempt_id, connector_credential_pair_id,
document_id, document_link, entity_id,
failure_message, is_resolved, time_created
) VALUES (
${indexAttemptId}, ${ccPairId},
${sqlVal(documentId)}, ${sqlVal(documentLink)}, ${sqlVal(entityId)},
${sqlVal(failureMessage)}, ${isResolved}, NOW()
) RETURNING id;`
);
const id = parseInt(result, 10);
if (isNaN(id)) {
throw new Error(`Failed to create index attempt error: "${result}"`);
}
return id;
}
function createMultipleErrors(
indexAttemptId: number,
ccPairId: number,
count: number
): number[] {
const ids: number[] = [];
for (let i = 0; i < count; i++) {
ids.push(
createError({
indexAttemptId,
ccPairId,
documentId: `doc-${i + 1}`,
failureMessage: `Error #${i + 1}: Failed to index document`,
})
);
}
return ids;
}
function resolveAllErrors(ccPairId: number): void {
psql(
`UPDATE index_attempt_errors SET is_resolved = true
WHERE connector_credential_pair_id = ${ccPairId};`
);
}
// ─── Shared UI Helpers ────────────────────────────────────────────────────────
async function waitForBanner(page: import("@playwright/test").Page) {
await expect(page.getByText("Some documents failed to index")).toBeVisible({
timeout: 15000,
});
}
async function openErrorsModal(page: import("@playwright/test").Page) {
await waitForBanner(page);
await page.getByText("View details.").click();
await expect(page.getByText("Indexing Errors")).toBeVisible();
}
// ─── Tests ────────────────────────────────────────────────────────────────────
test.describe("Index Attempt Errors Modal", () => {
test.describe.configure({ retries: 2 });
let testCcPairId: number | null = null;
let testIndexAttemptId: number | null = null;
test.beforeEach(async ({ page }) => {
await page.context().clearCookies();
await loginAs(page, "admin");
const apiClient = new OnyxApiClient(page.request);
testCcPairId = await apiClient.createFileConnector(
`Error Modal Test ${Date.now()}`
);
testIndexAttemptId = createIndexAttempt(testCcPairId);
});
test.afterEach(async ({ page }) => {
if (testCcPairId !== null) {
const apiClient = new OnyxApiClient(page.request);
try {
await apiClient.pauseConnector(testCcPairId);
} catch {
// May already be paused
}
try {
await apiClient.deleteCCPair(testCcPairId);
} catch (error) {
console.warn(`Cleanup failed for CC pair ${testCcPairId}: ${error}`);
}
testCcPairId = null;
testIndexAttemptId = null;
}
});
// ── 1. Alert Banner Visibility ────────────────────────────────────────────
test("1.1 banner is hidden when no errors exist", async ({ page }) => {
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await expect(
page.getByText("Some documents failed to index")
).not.toBeVisible();
});
test("1.2 banner appears when unresolved errors exist", async ({ page }) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-banner-test",
failureMessage: "Test error for banner visibility",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await waitForBanner(page);
await expect(
page.getByText("We ran into some issues while processing some documents.")
).toBeVisible();
await expect(page.getByText("View details.")).toBeVisible();
});
test("1.3 banner disappears when all errors are resolved", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
failureMessage: "Error to be resolved",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await waitForBanner(page);
// Resolve all errors via DB
resolveAllErrors(testCcPairId!);
// Wait for the 5-second polling cycle to pick up the change
await expect(
page.getByText("Some documents failed to index")
).not.toBeVisible({ timeout: 15000 });
});
// ── 2. Opening and Closing the Modal ──────────────────────────────────────
test("2.1 modal opens via View details link with correct content", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-modal-open",
failureMessage: "Error for modal open test",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
// Explanatory text
await expect(
page.getByText("Below are the errors encountered during indexing.")
).toBeVisible();
await expect(
page.getByText(
"Click the button below to kick off a full re-index to try and resolve these errors."
)
).toBeVisible();
// Table headers
await expect(
page.getByRole("columnheader", { name: "Time" })
).toBeVisible();
await expect(
page.getByRole("columnheader", { name: "Document ID" })
).toBeVisible();
await expect(
page.getByRole("columnheader", { name: "Error Message" })
).toBeVisible();
await expect(
page.getByRole("columnheader", { name: "Status" })
).toBeVisible();
// Error row content
await expect(page.getByText("doc-modal-open")).toBeVisible();
await expect(page.getByText("Error for modal open test")).toBeVisible();
await expect(page.getByText("Unresolved")).toBeVisible();
// Resolve All button
await expect(
page.getByRole("button", { name: "Resolve All" })
).toBeVisible();
});
test("2.2 modal closes via X button", async ({ page }) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
failureMessage: "Error for close-X test",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
// The close button is the first <button> in the dialog (rendered in Modal.Header)
// "Resolve All" is in the footer, so .first() gets the X button
const dialog = page.getByRole("dialog");
await dialog
.locator("button")
.filter({ hasNotText: /Resolve All/ })
.first()
.click();
await expect(page.getByText("Indexing Errors")).not.toBeVisible();
// Banner should still be present
await expect(
page.getByText("Some documents failed to index")
).toBeVisible();
});
test("2.3 modal closes via Escape key", async ({ page }) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
failureMessage: "Error for close-escape test",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
await page.keyboard.press("Escape");
await expect(page.getByText("Indexing Errors")).not.toBeVisible();
await expect(
page.getByText("Some documents failed to index")
).toBeVisible();
});
// ── 3. Table Content and Rendering ────────────────────────────────────────
test("3.1 error row with document link renders as hyperlink", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-linked",
documentLink: "https://example.com/doc-linked",
failureMessage: "Timeout while fetching document content",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
const docLink = page.getByRole("link", { name: "doc-linked" });
await expect(docLink).toBeVisible();
await expect(docLink).toHaveAttribute(
"href",
"https://example.com/doc-linked"
);
await expect(docLink).toHaveAttribute("target", "_blank");
await expect(docLink).toHaveAttribute("rel", "noopener noreferrer");
await expect(
page.getByText("Timeout while fetching document content")
).toBeVisible();
await expect(page.getByText("Unresolved")).toBeVisible();
});
test("3.2 error without document link shows plain text ID", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-no-link",
documentLink: null,
failureMessage: "Error without link",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
await expect(page.getByText("doc-no-link")).toBeVisible();
// Should NOT be a link
await expect(page.getByRole("link", { name: "doc-no-link" })).toHaveCount(
0
);
});
test("3.3 error with entity ID fallback when no document ID", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: null,
entityId: "entity-abc",
failureMessage: "Error with entity ID only",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
await expect(page.getByText("entity-abc")).toBeVisible();
});
test("3.4 error with no document ID or entity ID shows Unknown", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: null,
entityId: null,
failureMessage: "Error with no identifiers",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
// The table cell should display "Unknown" as fallback
const dialog = page.getByRole("dialog");
await expect(dialog.getByText("Unknown")).toBeVisible();
});
test("3.5 entity ID used as link text when document link exists but no document ID", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: null,
entityId: "entity-link-test",
documentLink: "https://example.com/entity",
failureMessage: "Error with entity link",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
const link = page.getByRole("link", { name: "entity-link-test" });
await expect(link).toBeVisible();
await expect(link).toHaveAttribute("href", "https://example.com/entity");
});
test("3.6 HTML in error message is escaped (XSS safe)", async ({ page }) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-xss",
failureMessage: "<script>alert('xss')</script>",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
// Text should be rendered literally, not executed as HTML
await expect(page.getByText("<script>alert('xss')</script>")).toBeVisible();
});
// ── 4. Pagination ─────────────────────────────────────────────────────────
test("4.1 no pagination controls when errors fit on one page", async ({
page,
}) => {
createMultipleErrors(testIndexAttemptId!, testCcPairId!, 2);
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
// Both errors should be visible
await expect(page.getByText("doc-1")).toBeVisible();
await expect(page.getByText("doc-2")).toBeVisible();
// PageSelector should not appear (only renders when totalPages > 1)
// The "" next-page button only exists when pagination is shown
const dialog = page.getByRole("dialog");
await expect(dialog.locator('text=""')).not.toBeVisible();
});
test("4.2 pagination appears and navigation works with many errors", async ({
page,
}) => {
// 10 errors should span multiple pages given the modal's dynamic page size
// (viewport 1280x720 typically yields ~5 rows per page in the modal)
createMultipleErrors(testIndexAttemptId!, testCcPairId!, 10);
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
const dialog = page.getByRole("dialog");
const nextBtn = dialog.locator('text=""');
const prevBtn = dialog.locator('text=""');
// If the viewport produces a page size >= 10, pagination won't appear
// Skip the navigation part in that case
if (await nextBtn.isVisible()) {
// Record page 1 content
const page1Content = await dialog.locator("table tbody").textContent();
// Navigate to page 2
await nextBtn.click();
const page2Content = await dialog.locator("table tbody").textContent();
expect(page2Content).not.toBe(page1Content);
// Navigate back to page 1
await prevBtn.click();
const backToPage1 = await dialog.locator("table tbody").textContent();
expect(backToPage1).toBe(page1Content);
}
});
// ── 5. Resolve All Functionality ──────────────────────────────────────────
test("5.1 Resolve All triggers re-index and shows success toast", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-resolve",
failureMessage: "Error to resolve via re-index",
});
// Activate the connector so the re-index request can succeed
// (createFileConnector pauses the connector by default)
await page.request.put(`/api/manage/admin/cc-pair/${testCcPairId}/status`, {
data: { status: "ACTIVE" },
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
await page.getByRole("button", { name: "Resolve All" }).click();
// Modal should close
await expect(page.getByText("Indexing Errors")).not.toBeVisible();
// Success toast should appear
await expect(
page.getByText("Complete re-indexing started successfully")
).toBeVisible({ timeout: 15000 });
});
test("5.2 Resolve All button is absent when isResolvingErrors is true", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-resolving",
failureMessage: "Error during resolving state",
});
// Create a separate index attempt that simulates a from-beginning re-index
// in progress, with no errors belonging to it
createIndexAttempt(testCcPairId!, {
fromBeginning: true,
status: "in_progress",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
// The banner should show "Resolving failures" instead of "View details."
await expect(page.getByText("Some documents failed to index")).toBeVisible({
timeout: 15000,
});
await expect(page.getByText("Resolving failures")).toBeVisible();
await expect(page.getByText("View details.")).not.toBeVisible();
});
// ── 6. Data Freshness and Auto-Refresh ────────────────────────────────────
test("6.1 new error appears on page without manual reload", async ({
page,
}) => {
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
// Initially no banner
await expect(
page.getByText("Some documents failed to index")
).not.toBeVisible();
// Insert an error via DB while the page is already open
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-auto-refresh",
failureMessage: "Error added while page is open",
});
// Wait for the 5-second polling cycle to pick it up
await expect(page.getByText("Some documents failed to index")).toBeVisible({
timeout: 15000,
});
});
test("6.2 errors list refreshes while modal is open", async ({ page }) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-existing",
failureMessage: "Pre-existing error",
});
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await openErrorsModal(page);
await expect(page.getByText("doc-existing")).toBeVisible();
// Add a second error while the modal is open
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-new-while-open",
failureMessage: "Error added while modal is open",
});
// The new error should appear after the polling cycle
await expect(page.getByText("doc-new-while-open")).toBeVisible({
timeout: 15000,
});
});
// ── 7. Access Control ─────────────────────────────────────────────────────
test("7.1 non-admin user cannot access the errors API endpoint", async ({
page,
}) => {
// Register a basic (non-admin) user
const email = `basic_${Date.now()}@example.com`;
const password = "TestPassword123!";
await page.request.post("/api/auth/register", {
data: { email, username: email, password },
});
// Login as the basic user
await page.context().clearCookies();
await page.request.post("/api/auth/login", {
form: { username: email, password },
});
// Try to access the errors endpoint
const errorsRes = await page.request.get(
`/api/manage/admin/cc-pair/${testCcPairId}/errors`
);
expect([401, 403]).toContain(errorsRes.status());
// Re-login as admin for afterEach cleanup
await page.context().clearCookies();
await loginAs(page, "admin");
// Clean up the basic user
const apiClient = new OnyxApiClient(page.request);
try {
await apiClient.deleteUser(email);
} catch {
// Ignore cleanup failures
}
});
// ── 8. Resolved Errors Filtered by Default ────────────────────────────────
test("8.1 resolved errors are not shown in the modal and banner is absent", async ({
page,
}) => {
createError({
indexAttemptId: testIndexAttemptId!,
ccPairId: testCcPairId!,
documentId: "doc-resolved",
failureMessage: "Already resolved error",
isResolved: true,
});
// API without include_resolved should return 0 items
const defaultRes = await page.request.get(
`/api/manage/admin/cc-pair/${testCcPairId}/errors`
);
expect(defaultRes.ok()).toBe(true);
const defaultData = await defaultRes.json();
expect(defaultData.total_items).toBe(0);
// API with include_resolved=true should return the error
const resolvedRes = await page.request.get(
`/api/manage/admin/cc-pair/${testCcPairId}/errors?include_resolved=true`
);
expect(resolvedRes.ok()).toBe(true);
const resolvedData = await resolvedRes.json();
expect(resolvedData.total_items).toBe(1);
expect(resolvedData.items[0].is_resolved).toBe(true);
// Banner should not appear on the page
await page.goto(`/admin/connector/${testCcPairId}`);
await page.waitForLoadState("networkidle");
await expect(
page.getByText("Some documents failed to index")
).not.toBeVisible();
});
// ── 9. API Pagination Boundary ────────────────────────────────────────────
test("9.1 API rejects page_size over 100", async ({ page }) => {
const res = await page.request.get(
`/api/manage/admin/cc-pair/${testCcPairId}/errors?page_size=101`
);
expect(res.status()).toBe(422);
});
});

View File

@@ -0,0 +1,210 @@
#!/usr/bin/env bash
#
# Seed IndexAttemptError records for local testing of the Index Attempt Errors Modal.
#
# Usage:
# ./seed_index_attempt_errors.sh [--cc-pair-id <ID>] [--count <N>] [--clean]
#
# Options:
# --cc-pair-id <ID> Use an existing CC pair (skips connector creation)
# --count <N> Number of unresolved errors to insert (default: 7)
# --clean Remove ALL test-seeded errors (those with failure_message LIKE 'SEED:%') and exit
#
# Without --cc-pair-id, the script creates a file connector via the API
# and prints its CC pair ID so you can navigate to /admin/connector/<ID>.
#
# Prerequisites:
# - Onyx services running (docker compose up)
# - curl and jq installed
set -euo pipefail
BASE_URL="${BASE_URL:-http://localhost:3000}"
ADMIN_EMAIL="${ADMIN_EMAIL:-admin_user@example.com}"
ADMIN_PASSWORD="${ADMIN_PASSWORD:-TestPassword123!}"
DB_CONTAINER="${DB_CONTAINER:-onyx-relational_db-1}"
CC_PAIR_ID=""
ERROR_COUNT=7
CLEAN=false
# --- Parse args ---
while [[ $# -gt 0 ]]; do
case "$1" in
--cc-pair-id) CC_PAIR_ID="$2"; shift 2 ;;
--count) ERROR_COUNT="$2"; shift 2 ;;
--clean) CLEAN=true; shift ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
# --- Helper: run psql ---
psql_exec() {
docker exec "$DB_CONTAINER" psql -U postgres -qtAX -c "$1"
}
# --- Clean mode ---
if $CLEAN; then
deleted=$(psql_exec "DELETE FROM index_attempt_errors WHERE failure_message LIKE 'SEED:%' RETURNING id;" | wc -l)
echo "Deleted $deleted seeded error(s)."
exit 0
fi
# --- Authenticate and get session cookie ---
COOKIE_JAR=$(mktemp)
trap 'rm -f "$COOKIE_JAR"' EXIT
echo "Authenticating as $ADMIN_EMAIL..."
login_resp=$(curl -s -o /dev/null -w "%{http_code}" \
-c "$COOKIE_JAR" \
-X POST "$BASE_URL/api/auth/login" \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "username=${ADMIN_EMAIL}&password=${ADMIN_PASSWORD}")
if [[ "$login_resp" != "200" && "$login_resp" != "204" && "$login_resp" != "302" ]]; then
echo "Login failed (HTTP $login_resp). Check credentials." >&2
# Try the simpler a@example.com / a creds as fallback
echo "Retrying with a@example.com / a..."
ADMIN_EMAIL="a@example.com"
ADMIN_PASSWORD="a"
login_resp=$(curl -s -o /dev/null -w "%{http_code}" \
-c "$COOKIE_JAR" \
-X POST "$BASE_URL/api/auth/login" \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "username=${ADMIN_EMAIL}&password=${ADMIN_PASSWORD}")
if [[ "$login_resp" != "200" && "$login_resp" != "204" && "$login_resp" != "302" ]]; then
echo "Login failed again (HTTP $login_resp)." >&2
exit 1
fi
fi
echo "Authenticated."
# --- Create a file connector if no CC pair specified ---
if [[ -z "$CC_PAIR_ID" ]]; then
echo "Creating file connector..."
create_resp=$(curl -s -b "$COOKIE_JAR" \
-X POST "$BASE_URL/api/manage/admin/connector-with-mock-credential" \
-H "Content-Type: application/json" \
-d '{
"name": "Seed Errors Test Connector",
"source": "file",
"input_type": "load_state",
"connector_specific_config": {"file_locations": []},
"refresh_freq": null,
"prune_freq": null,
"indexing_start": null,
"access_type": "public",
"groups": []
}')
CC_PAIR_ID=$(echo "$create_resp" | jq -r '.data // empty')
if [[ -z "$CC_PAIR_ID" ]]; then
echo "Failed to create connector: $create_resp" >&2
exit 1
fi
echo "Created CC pair ID: $CC_PAIR_ID"
else
echo "Using existing CC pair ID: $CC_PAIR_ID"
fi
# --- Find or create an index attempt for this CC pair ---
ATTEMPT_ID=$(psql_exec "
SELECT id FROM index_attempt
WHERE connector_credential_pair_id = $CC_PAIR_ID
ORDER BY id DESC LIMIT 1;
")
if [[ -z "$ATTEMPT_ID" ]]; then
echo "No index attempt found. Creating one..."
SEARCH_SETTINGS_ID=$(psql_exec "SELECT id FROM search_settings ORDER BY id DESC LIMIT 1;")
if [[ -z "$SEARCH_SETTINGS_ID" ]]; then
echo "No search_settings found in DB." >&2
exit 1
fi
ATTEMPT_ID=$(psql_exec "
INSERT INTO index_attempt (connector_credential_pair_id, search_settings_id, from_beginning, status, new_docs_indexed, total_docs_indexed, docs_removed_from_index, time_updated, completed_batches, total_chunks)
VALUES ($CC_PAIR_ID, $SEARCH_SETTINGS_ID, true, 'completed_with_errors', 5, 10, 0, now(), 0, 0)
RETURNING id;
")
echo "Created index attempt ID: $ATTEMPT_ID"
else
echo "Using existing index attempt ID: $ATTEMPT_ID"
fi
# --- Insert the curated test errors ---
echo "Inserting test errors..."
# Error 1: Document with link (hyperlinked doc ID)
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, 'doc-001', 'https://example.com/doc-001', NULL, 'SEED: Timeout while fetching document content from remote server', false);
"
# Error 2: Document without link (plain text doc ID)
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, 'doc-no-link', NULL, NULL, 'SEED: Permission denied accessing resource - authentication token expired', false);
"
# Error 3: Entity ID only (no document_id, no link)
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, NULL, NULL, 'entity-abc', 'SEED: Entity sync failed due to upstream rate limiting', false);
"
# Error 4: Entity ID with link (hyperlinked entity)
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, NULL, 'https://example.com/entity', 'entity-link-test', 'SEED: Connection reset by peer during entity fetch', false);
"
# Error 5: Neither document_id nor entity_id (renders "Unknown")
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, NULL, NULL, NULL, 'SEED: Unknown document failed with a catastrophic internal error that produced a very long error message designed to test the scrollable cell behavior in the modal UI. This message continues for quite a while to ensure the 60px height overflow-y-auto container is properly exercised during manual testing.', false);
"
# Error 6: XSS test (special HTML characters)
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, 'doc-xss', NULL, NULL, 'SEED: <script>alert(''xss'')</script>', false);
"
# Error 7: Single-character error message
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, 'doc-short', NULL, NULL, 'SEED: X', false);
"
# Insert additional generic errors if --count > 7
if (( ERROR_COUNT > 7 )); then
extra=$(( ERROR_COUNT - 7 ))
echo "Inserting $extra additional generic errors..."
for i in $(seq 1 "$extra"); do
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, 'doc-extra-$i', NULL, NULL, 'SEED: Generic error #$i for pagination testing', false);
"
done
fi
# Error: One resolved error (to test filtering)
psql_exec "
INSERT INTO index_attempt_errors (index_attempt_id, connector_credential_pair_id, document_id, document_link, entity_id, failure_message, is_resolved)
VALUES ($ATTEMPT_ID, $CC_PAIR_ID, 'doc-resolved', NULL, NULL, 'SEED: This error was already resolved', true);
"
# --- Verify ---
total=$(psql_exec "SELECT count(*) FROM index_attempt_errors WHERE connector_credential_pair_id = $CC_PAIR_ID AND failure_message LIKE 'SEED:%';")
unresolved=$(psql_exec "SELECT count(*) FROM index_attempt_errors WHERE connector_credential_pair_id = $CC_PAIR_ID AND failure_message LIKE 'SEED:%' AND is_resolved = false;")
echo ""
echo "=== Done ==="
echo "CC Pair ID: $CC_PAIR_ID"
echo "Index Attempt ID: $ATTEMPT_ID"
echo "Seeded errors: $total ($unresolved unresolved, $(( total - unresolved )) resolved)"
echo ""
echo "View in browser: $BASE_URL/admin/connector/$CC_PAIR_ID"
echo "API check: curl -b <cookies> '$BASE_URL/api/manage/admin/cc-pair/$CC_PAIR_ID/errors'"
echo ""
echo "To clean up: $0 --clean"
echo "To delete connector: curl -b <cookies> -X DELETE '$BASE_URL/api/manage/admin/cc-pair/$CC_PAIR_ID'"