Compare commits

..

1 Commits

Author SHA1 Message Date
Jamison Lahman
4e9f651d64 fix(favicon): include content type in link metadata 2026-03-17 17:13:06 -07:00
52 changed files with 525 additions and 1654 deletions

View File

@@ -317,7 +317,6 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.docprocessing",
"onyx.background.celery.tasks.evals",
"onyx.background.celery.tasks.hierarchyfetching",
"onyx.background.celery.tasks.hooks",
"onyx.background.celery.tasks.periodic",
"onyx.background.celery.tasks.pruning",
"onyx.background.celery.tasks.shared",

View File

@@ -14,7 +14,6 @@ from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.hooks.utils import HOOKS_AVAILABLE
from shared_configs.configs import MULTI_TENANT
# choosing 15 minutes because it roughly gives us enough time to process many tasks
@@ -362,19 +361,6 @@ if not MULTI_TENANT:
tasks_to_schedule.extend(beat_task_templates)
if HOOKS_AVAILABLE:
tasks_to_schedule.append(
{
"name": "hook-execution-log-cleanup",
"task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
"schedule": timedelta(days=1),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
},
}
)
def generate_cloud_tasks(
beat_tasks: list[dict], beat_templates: list[dict], beat_multiplier: float

View File

@@ -1,35 +0,0 @@
from celery import shared_task
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.hook import cleanup_old_execution_logs__no_commit
from onyx.utils.logger import setup_logger
logger = setup_logger()
_HOOK_EXECUTION_LOG_RETENTION_DAYS: int = 30
@shared_task(
name=OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
trail=False,
)
def hook_execution_log_cleanup_task(*, tenant_id: str) -> None: # noqa: ARG001
try:
with get_session_with_current_tenant() as db_session:
deleted: int = cleanup_old_execution_logs__no_commit(
db_session=db_session,
max_age_days=_HOOK_EXECUTION_LOG_RETENTION_DAYS,
)
db_session.commit()
if deleted:
logger.info(
f"Deleted {deleted} hook execution log(s) older than "
f"{_HOOK_EXECUTION_LOG_RETENTION_DAYS} days."
)
except Exception:
logger.exception("Failed to clean up hook execution logs")
raise

View File

@@ -297,9 +297,7 @@ class PostgresCacheBackend(CacheBackend):
def _lock_id_for(self, name: str) -> int:
"""Map *name* to a 64-bit signed int for ``pg_advisory_lock``."""
h = hashlib.md5(
f"{self._tenant_id}:{name}".encode(), usedforsecurity=False
).digest()
h = hashlib.md5(f"{self._tenant_id}:{name}".encode()).digest()
return struct.unpack("q", h[:8])[0]

View File

@@ -318,17 +318,6 @@ VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(
os.environ.get("OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE") or 500
)
# If set, will override the default number of shards and replicas for the index.
OPENSEARCH_INDEX_NUM_SHARDS: int | None = (
int(os.environ["OPENSEARCH_INDEX_NUM_SHARDS"])
if os.environ.get("OPENSEARCH_INDEX_NUM_SHARDS", None) is not None
else None
)
OPENSEARCH_INDEX_NUM_REPLICAS: int | None = (
int(os.environ["OPENSEARCH_INDEX_NUM_REPLICAS"])
if os.environ.get("OPENSEARCH_INDEX_NUM_REPLICAS", None) is not None
else None
)
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
# NOTE: this is used if and only if the vespa config server is accessible via a

View File

@@ -597,9 +597,6 @@ class OnyxCeleryTask:
EXPORT_QUERY_HISTORY_TASK = "export_query_history_task"
EXPORT_QUERY_HISTORY_CLEANUP_TASK = "export_query_history_cleanup_task"
# Hook execution log retention
HOOK_EXECUTION_LOG_CLEANUP_TASK = "hook_execution_log_cleanup_task"
# Sandbox cleanup
CLEANUP_IDLE_SANDBOXES = "cleanup_idle_sandboxes"
CLEANUP_OLD_SNAPSHOTS = "cleanup_old_snapshots"

View File

@@ -18,7 +18,6 @@ from onyx.configs.app_configs import OPENSEARCH_HOST
from onyx.configs.app_configs import OPENSEARCH_REST_API_PORT
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW
from onyx.utils.logger import setup_logger
@@ -57,8 +56,8 @@ class SearchHit(BaseModel, Generic[SchemaDocumentModel]):
# Maps schema property name to a list of highlighted snippets with match
# terms wrapped in tags (e.g. "something <hi>keyword</hi> other thing").
match_highlights: dict[str, list[str]] = {}
# Score explanation from OpenSearch when "explain": true is set in the
# query. Contains detailed breakdown of how the score was calculated.
# Score explanation from OpenSearch when "explain": true is set in the query.
# Contains detailed breakdown of how the score was calculated.
explanation: dict[str, Any] | None = None
@@ -834,13 +833,9 @@ class OpenSearchIndexClient(OpenSearchClient):
@log_function_time(print_only=True, debug_only=True)
def search(
self, body: dict[str, Any], search_pipeline_id: str | None
) -> list[SearchHit[DocumentChunkWithoutVectors]]:
) -> list[SearchHit[DocumentChunk]]:
"""Searches the index.
NOTE: Does not return vector fields. In order to take advantage of
performance benefits, the search body should exclude the schema's vector
fields.
TODO(andrei): Ideally we could check that every field in the body is
present in the index, to avoid a class of runtime bugs that could easily
be caught during development. Or change the function signature to accept
@@ -888,7 +883,7 @@ class OpenSearchIndexClient(OpenSearchClient):
raise_on_timeout=True,
)
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
search_hits: list[SearchHit[DocumentChunk]] = []
for hit in hits:
document_chunk_source: dict[str, Any] | None = hit.get("_source")
if not document_chunk_source:
@@ -898,10 +893,8 @@ class OpenSearchIndexClient(OpenSearchClient):
document_chunk_score = hit.get("_score", None)
match_highlights: dict[str, list[str]] = hit.get("highlight", {})
explanation: dict[str, Any] | None = hit.get("_explanation", None)
search_hit = SearchHit[DocumentChunkWithoutVectors](
document_chunk=DocumentChunkWithoutVectors.model_validate(
document_chunk_source
),
search_hit = SearchHit[DocumentChunk](
document_chunk=DocumentChunk.model_validate(document_chunk_source),
score=document_chunk_score,
match_highlights=match_highlights,
explanation=explanation,

View File

@@ -47,7 +47,6 @@ from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
from onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import DocumentSchema
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME
@@ -118,7 +117,7 @@ def set_cluster_state(client: OpenSearchClient) -> None:
def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
chunk: DocumentChunkWithoutVectors,
chunk: DocumentChunk,
score: float | None,
highlights: dict[str, list[str]],
) -> InferenceChunkUncleaned:
@@ -881,7 +880,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
)
results: list[InferenceChunk] = []
for chunk_request in chunk_requests:
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
search_hits: list[SearchHit[DocumentChunk]] = []
query_body = DocumentQuery.get_from_document_id_query(
document_id=chunk_request.document_id,
tenant_state=self._tenant_state,
@@ -945,7 +944,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
include_hidden=False,
)
normalization_pipeline_name, _ = get_normalization_pipeline_name_and_config()
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
body=query_body,
search_pipeline_id=normalization_pipeline_name,
)
@@ -977,7 +976,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
index_filters=filters,
num_to_retrieve=num_to_retrieve,
)
search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
body=query_body,
search_pipeline_id=None,
)

View File

@@ -11,8 +11,6 @@ from pydantic import model_serializer
from pydantic import model_validator
from pydantic import SerializerFunctionWrapHandler
from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_REPLICAS
from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_SHARDS
from onyx.configs.app_configs import OPENSEARCH_TEXT_ANALYZER
from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
from onyx.document_index.interfaces_new import TenantState
@@ -102,9 +100,9 @@ def set_or_convert_timezone_to_utc(value: datetime) -> datetime:
return value
class DocumentChunkWithoutVectors(BaseModel):
class DocumentChunk(BaseModel):
"""
Represents a chunk of a document in the OpenSearch index without vectors.
Represents a chunk of a document in the OpenSearch index.
The names of these fields are based on the OpenSearch schema. Changes to the
schema require changes here. See get_document_schema.
@@ -126,7 +124,9 @@ class DocumentChunkWithoutVectors(BaseModel):
# Either both should be None or both should be non-None.
title: str | None = None
title_vector: list[float] | None = None
content: str
content_vector: list[float]
source_type: str
# A list of key-value pairs separated by INDEX_SEPARATOR. See
@@ -176,9 +176,19 @@ class DocumentChunkWithoutVectors(BaseModel):
def __str__(self) -> str:
return (
f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
f"content length={len(self.content)}, tenant_id={self.tenant_id.tenant_id})."
f"content length={len(self.content)}, content vector length={len(self.content_vector)}, "
f"tenant_id={self.tenant_id.tenant_id})"
)
@model_validator(mode="after")
def check_title_and_title_vector_are_consistent(self) -> Self:
# title and title_vector should both either be None or not.
if self.title is not None and self.title_vector is None:
raise ValueError("Bug: Title vector must not be None if title is not None.")
if self.title_vector is not None and self.title is None:
raise ValueError("Bug: Title must not be None if title vector is not None.")
return self
@model_serializer(mode="wrap")
def serialize_model(
self, handler: SerializerFunctionWrapHandler
@@ -295,35 +305,6 @@ class DocumentChunkWithoutVectors(BaseModel):
return TenantState(tenant_id=value, multitenant=MULTI_TENANT)
class DocumentChunk(DocumentChunkWithoutVectors):
"""Represents a chunk of a document in the OpenSearch index.
The names of these fields are based on the OpenSearch schema. Changes to the
schema require changes here. See get_document_schema.
"""
model_config = {"frozen": True}
title_vector: list[float] | None = None
content_vector: list[float]
def __str__(self) -> str:
return (
f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
f"content length={len(self.content)}, content vector length={len(self.content_vector)}, "
f"tenant_id={self.tenant_id.tenant_id})"
)
@model_validator(mode="after")
def check_title_and_title_vector_are_consistent(self) -> Self:
# title and title_vector should both either be None or not.
if self.title is not None and self.title_vector is None:
raise ValueError("Bug: Title vector must not be None if title is not None.")
if self.title_vector is not None and self.title is None:
raise ValueError("Bug: Title must not be None if title vector is not None.")
return self
class DocumentSchema:
"""
Represents the schema and indexing strategies of the OpenSearch index.
@@ -536,34 +517,77 @@ class DocumentSchema:
return schema
@staticmethod
def get_index_settings_based_on_environment() -> dict[str, Any]:
def get_index_settings() -> dict[str, Any]:
"""
Returns the index settings based on the environment.
Standard settings for reasonable local index and search performance.
"""
if USING_AWS_MANAGED_OPENSEARCH:
# NOTE: The number of data copies, including the primary (not a
# replica) copy, must be divisible by the number of AZs.
if MULTI_TENANT:
number_of_shards = 324
number_of_replicas = 2
else:
number_of_shards = 3
number_of_replicas = 2
else:
number_of_shards = 1
number_of_replicas = 1
if OPENSEARCH_INDEX_NUM_SHARDS is not None:
number_of_shards = OPENSEARCH_INDEX_NUM_SHARDS
if OPENSEARCH_INDEX_NUM_REPLICAS is not None:
number_of_replicas = OPENSEARCH_INDEX_NUM_REPLICAS
return {
"index": {
"number_of_shards": number_of_shards,
"number_of_replicas": number_of_replicas,
"number_of_shards": 1,
"number_of_replicas": 1,
# Required for vector search.
"knn": True,
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_for_aws_managed_opensearch_st_dev() -> dict[str, Any]:
"""
Settings for AWS-managed OpenSearch.
Our AWS-managed OpenSearch cluster has 3 data nodes in 3 availability
zones.
- We use 3 shards to distribute load across all data nodes.
- We use 2 replicas to ensure each shard has a copy in each
availability zone. This is a hard requirement from AWS. The number
of data copies, including the primary (not a replica) copy, must be
divisible by the number of AZs.
"""
return {
"index": {
"number_of_shards": 3,
"number_of_replicas": 2,
# Required for vector search.
"knn": True,
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_for_aws_managed_opensearch_mt_cloud() -> dict[str, Any]:
"""
Settings for AWS-managed OpenSearch in multi-tenant cloud.
324 shards very roughly targets a storage load of ~30Gb per shard, which
according to AWS OpenSearch documentation is within a good target range.
As documented above we need 2 replicas for a total of 3 copies of the
data because the cluster is configured with 3-AZ awareness.
"""
return {
"index": {
"number_of_shards": 324,
"number_of_replicas": 2,
# Required for vector search.
"knn": True,
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_based_on_environment() -> dict[str, Any]:
"""
Returns the index settings based on the environment.
"""
if USING_AWS_MANAGED_OPENSEARCH:
if MULTI_TENANT:
return (
DocumentSchema.get_index_settings_for_aws_managed_opensearch_mt_cloud()
)
else:
return (
DocumentSchema.get_index_settings_for_aws_managed_opensearch_st_dev()
)
else:
return DocumentSchema.get_index_settings()

View File

@@ -235,17 +235,9 @@ class DocumentQuery:
# returning some number of results less than the index max allowed
# return size.
"size": DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW,
# By default exclude retrieving the vector fields in order to save
# on retrieval cost as we don't need them upstream.
"_source": {
"excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
},
"_source": get_full_document,
"timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
}
if not get_full_document:
# If we explicitly do not want the underlying document, we will only
# retrieve IDs.
final_get_ids_query["_source"] = False
if not OPENSEARCH_PROFILING_DISABLED:
final_get_ids_query["profile"] = True
@@ -395,11 +387,6 @@ class DocumentQuery:
"size": num_hits,
"highlight": match_highlights_configuration,
"timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
# Exclude retrieving the vector fields in order to save on
# retrieval cost as we don't need them upstream.
"_source": {
"excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
},
}
# Explain is for scoring breakdowns.
@@ -459,11 +446,6 @@ class DocumentQuery:
},
"size": num_to_retrieve,
"timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
# Exclude retrieving the vector fields in order to save on
# retrieval cost as we don't need them upstream.
"_source": {
"excludes": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]
},
}
if not OPENSEARCH_PROFILING_DISABLED:
final_random_search_query["profile"] = True

View File

@@ -88,7 +88,6 @@ class OnyxErrorCode(Enum):
SERVICE_UNAVAILABLE = ("SERVICE_UNAVAILABLE", 503)
BAD_GATEWAY = ("BAD_GATEWAY", 502)
LLM_PROVIDER_ERROR = ("LLM_PROVIDER_ERROR", 502)
HOOK_EXECUTION_FAILED = ("HOOK_EXECUTION_FAILED", 502)
GATEWAY_TIMEOUT = ("GATEWAY_TIMEOUT", 504)
def __init__(self, code: str, status_code: int) -> None:

View File

@@ -1,177 +0,0 @@
"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.
Usage:
result = await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
)
if isinstance(result, HookSkipped):
# no active hook configured — continue with original behavior
...
elif isinstance(result, HookSoftFailed):
# hook failed but fail strategy is SOFT — continue with original behavior
...
else:
# result is the response payload dict from the customer's endpoint
...
DB session design
-----------------
The executor uses two sessions:
1. Caller's session (db_session) — used only for the hook lookup read. All
needed fields are extracted from the Hook object before the HTTP call, so
the caller's session is not held open during the external HTTP request.
2. Persist session — a separate short-lived session opened after the HTTP call
completes to write the HookExecutionLog row and update is_reachable on the
Hook. Using a separate session ensures both writes persist even when the
caller's outer transaction rolls back (e.g. on HARD fail where an OnyxError
is raised).
"""
import time
from typing import Any
import httpx
from sqlalchemy.orm import Session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.db.hook import create_hook_execution_log__no_commit
from onyx.db.hook import get_non_deleted_hook_by_hook_point
from onyx.db.hook import update_hook__no_commit
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.utils import HOOKS_AVAILABLE
from onyx.utils.logger import setup_logger
logger = setup_logger()
class HookSkipped:
"""No active hook configured for this hook point."""
class HookSoftFailed:
"""Hook was called but failed with SOFT fail strategy — continuing."""
async def execute_hook(
*,
db_session: Session,
hook_point: HookPoint,
payload: dict[str, Any],
) -> dict[str, Any] | HookSkipped | HookSoftFailed:
"""Call the active hook for the given hook point with the provided payload.
Returns:
- dict[str, Any] on success — the response payload from the customer's endpoint
- HookSkipped — no active hook configured for this hook point
- HookSoftFailed — hook was called but failed with SOFT fail strategy
Raises OnyxError(HOOK_EXECUTION_FAILED) if the hook failed and
fail_strategy is HARD.
After the HTTP call, a separate DB session persists both the HookExecutionLog
and any is_reachable update on the Hook, ensuring they survive even if the
caller's outer transaction rolls back.
"""
# Early-exit guards — no HTTP call is made and no DB writes are performed
# for any of these paths. There is nothing to log and no reachability
# information to update.
if not HOOKS_AVAILABLE:
return HookSkipped()
hook = get_non_deleted_hook_by_hook_point(
db_session=db_session, hook_point=hook_point
)
if hook is None or not hook.is_active:
return HookSkipped()
endpoint_url = hook.endpoint_url
if not endpoint_url:
return HookSkipped()
api_key: str | None = (
hook.api_key.get_value(apply_mask=False) if hook.api_key else None
)
timeout = hook.timeout_seconds
hook_id = hook.id
fail_strategy = hook.fail_strategy
headers: dict[str, str] = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
start = time.monotonic()
status_code: int | None = None
error_message: str | None = None
response_payload: dict[str, Any] | None = None
is_reachable: bool | None = None # None = no change
try:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(endpoint_url, json=payload, headers=headers)
status_code = response.status_code
response.raise_for_status()
response_payload = response.json()
is_success = True
is_reachable = True
except httpx.ConnectError as e:
# Endpoint is definitively unreachable (DNS failure, connection refused, etc.)
error_message = f"Hook endpoint unreachable: {e}"
is_success = False
is_reachable = False
except httpx.TimeoutException as e:
error_message = f"Hook timed out after {timeout}s: {e}"
is_success = False
except httpx.HTTPStatusError as e:
error_message = (
f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
)
is_success = False
except Exception as e:
error_message = f"Hook call failed: {e}"
is_success = False
duration_ms = int((time.monotonic() - start) * 1000)
# Update is_reachable and write the execution log via separate sessions so
# both persist even if the caller's outer transaction rolls back (e.g. on HARD fail).
try:
with get_session_with_current_tenant() as persist_session:
if is_reachable is not None:
update_hook__no_commit(
db_session=persist_session,
hook_id=hook_id,
is_reachable=is_reachable,
)
create_hook_execution_log__no_commit(
db_session=persist_session,
hook_id=hook_id,
is_success=is_success,
error_message=error_message,
status_code=status_code,
duration_ms=duration_ms,
)
persist_session.commit()
except Exception:
logger.exception(
f"Failed to persist hook execution result for hook_id={hook_id}"
)
if not is_success:
if fail_strategy == HookFailStrategy.HARD:
raise OnyxError(
OnyxErrorCode.HOOK_EXECUTION_FAILED,
error_message or "Hook execution failed.",
)
logger.warning(
f"Hook execution failed (soft fail) for hook_id={hook_id}: {error_message}"
)
return HookSoftFailed()
return response_payload # type: ignore[return-value]

View File

@@ -1,5 +0,0 @@
from onyx.configs.app_configs import HOOK_ENABLED
from shared_configs.configs import MULTI_TENANT
# True only when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
HOOKS_AVAILABLE: bool = HOOK_ENABLED and not MULTI_TENANT

View File

@@ -479,9 +479,7 @@ def is_zip_file(file: UploadFile) -> bool:
def upload_files(
files: list[UploadFile],
file_origin: FileOrigin = FileOrigin.CONNECTOR,
unzip: bool = True,
files: list[UploadFile], file_origin: FileOrigin = FileOrigin.CONNECTOR
) -> FileUploadResponse:
# Skip directories and known macOS metadata entries
@@ -504,46 +502,31 @@ def upload_files(
if seen_zip:
raise HTTPException(status_code=400, detail=SEEN_ZIP_DETAIL)
seen_zip = True
# Validate the zip by opening it (catches corrupt/non-zip files)
with zipfile.ZipFile(file.file, "r") as zf:
if unzip:
zip_metadata_file_id = save_zip_metadata_to_file_store(
zf, file_store
zip_metadata_file_id = save_zip_metadata_to_file_store(
zf, file_store
)
for file_info in zf.namelist():
if zf.getinfo(file_info).is_dir():
continue
if not should_process_file(file_info):
continue
sub_file_bytes = zf.read(file_info)
mime_type, __ = mimetypes.guess_type(file_info)
if mime_type is None:
mime_type = "application/octet-stream"
file_id = file_store.save_file(
content=BytesIO(sub_file_bytes),
display_name=os.path.basename(file_info),
file_origin=file_origin,
file_type=mime_type,
)
for file_info in zf.namelist():
if zf.getinfo(file_info).is_dir():
continue
if not should_process_file(file_info):
continue
sub_file_bytes = zf.read(file_info)
mime_type, __ = mimetypes.guess_type(file_info)
if mime_type is None:
mime_type = "application/octet-stream"
file_id = file_store.save_file(
content=BytesIO(sub_file_bytes),
display_name=os.path.basename(file_info),
file_origin=file_origin,
file_type=mime_type,
)
deduped_file_paths.append(file_id)
deduped_file_names.append(os.path.basename(file_info))
continue
# Store the zip as-is (unzip=False)
file.file.seek(0)
file_id = file_store.save_file(
content=file.file,
display_name=file.filename,
file_origin=file_origin,
file_type=file.content_type or "application/zip",
)
deduped_file_paths.append(file_id)
deduped_file_names.append(file.filename)
deduped_file_paths.append(file_id)
deduped_file_names.append(os.path.basename(file_info))
continue
# Since we can't render docx files in the UI,
@@ -630,10 +613,9 @@ def _fetch_and_check_file_connector_cc_pair_permissions(
@router.post("/admin/connector/file/upload", tags=PUBLIC_API_TAGS)
def upload_files_api(
files: list[UploadFile],
unzip: bool = True,
_: User = Depends(current_curator_or_admin_user),
) -> FileUploadResponse:
return upload_files(files, FileOrigin.OTHER, unzip=unzip)
return upload_files(files, FileOrigin.OTHER)
@router.get("/admin/connector/{connector_id}/files", tags=PUBLIC_API_TAGS)

View File

@@ -74,7 +74,7 @@ def make_structured_onyx_request_id(prefix: str, request_url: str) -> str:
def _make_onyx_request_id(prefix: str, hash_input: str) -> str:
"""helper function to return an id given a string input"""
hash_obj = hashlib.md5(hash_input.encode("utf-8"), usedforsecurity=False)
hash_obj = hashlib.md5(hash_input.encode("utf-8"))
hash_bytes = hash_obj.digest()[:6] # Truncate to 6 bytes
# 6 bytes becomes 8 bytes. we shouldn't need to strip but just in case

View File

@@ -752,7 +752,7 @@ pypandoc-binary==1.16.2
# via onyx
pyparsing==3.2.5
# via httplib2
pypdf==6.9.1
pypdf==6.8.0
# via
# onyx
# unstructured-client

View File

@@ -297,10 +297,6 @@ def index_batch_params(
class TestDocumentIndexOld:
"""Tests the old DocumentIndex interface."""
# TODO(ENG-3864)(andrei): Re-enable this test.
@pytest.mark.xfail(
reason="Flaky test: Retrieved chunks vary non-deterministically before and after changing user projects and personas. Likely a timing issue with the index being updated."
)
def test_update_single_can_clear_user_projects_and_personas(
self,
document_indices: list[DocumentIndex],

View File

@@ -29,7 +29,6 @@ from onyx.document_index.opensearch.opensearch_document_index import (
)
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
from onyx.document_index.opensearch.schema import DocumentSchema
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DocumentQuery
@@ -227,7 +226,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
# Under test.
# Should not raise.
@@ -243,7 +242,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test.
@@ -272,7 +271,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
@@ -286,7 +285,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
# Under test and postcondition.
# Should return False before creation.
@@ -306,7 +305,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test.
@@ -341,7 +340,7 @@ class TestOpenSearchClient:
},
},
}
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=initial_mappings, settings=settings)
# Under test.
@@ -384,7 +383,7 @@ class TestOpenSearchClient:
"test_field": {"type": "keyword"},
},
}
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=initial_mappings, settings=settings)
# Under test and postcondition.
@@ -419,7 +418,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
# Create once - should succeed.
test_client.create_index(mappings=mappings, settings=settings)
@@ -462,7 +461,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
doc = _create_test_document_chunk(
@@ -490,7 +489,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
docs = [
@@ -521,7 +520,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
doc = _create_test_document_chunk(
@@ -549,7 +548,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
original_doc = _create_test_document_chunk(
@@ -584,7 +583,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=False
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test and postcondition.
@@ -603,7 +602,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
doc = _create_test_document_chunk(
@@ -639,7 +638,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test.
@@ -660,7 +659,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index multiple documents.
@@ -736,7 +735,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Create a document to update.
@@ -785,7 +784,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test and postcondition.
@@ -809,7 +808,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents.
docs = {
@@ -882,12 +881,8 @@ class TestOpenSearchClient:
)
# Make sure the chunk contents are preserved.
for i, chunk in enumerate(results):
expected = docs[chunk.document_chunk.document_id]
assert chunk.document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(expected, k)
for k in DocumentChunkWithoutVectors.model_fields
}
assert (
chunk.document_chunk == docs[chunk.document_chunk.document_id]
)
# Make sure score reporting seems reasonable (it should not be None
# or 0).
@@ -911,7 +906,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Note no documents were indexed.
@@ -952,7 +947,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_x.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents with different public/hidden and tenant states.
@@ -1043,12 +1038,7 @@ class TestOpenSearchClient:
# ordered; we're just assuming which doc will be the first result here.
assert results[0].document_chunk.document_id == "public-doc"
# Make sure the chunk contents are preserved.
assert results[0].document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(docs["public-doc"], k)
for k in DocumentChunkWithoutVectors.model_fields
}
)
assert results[0].document_chunk == docs["public-doc"]
# Make sure score reporting seems reasonable (it should not be None
# or 0).
assert results[0].score
@@ -1056,12 +1046,7 @@ class TestOpenSearchClient:
assert results[0].match_highlights.get(CONTENT_FIELD_NAME, [])
# Same for the second result.
assert results[1].document_chunk.document_id == "private-doc-user-a"
assert results[1].document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(docs["private-doc-user-a"], k)
for k in DocumentChunkWithoutVectors.model_fields
}
)
assert results[1].document_chunk == docs["private-doc-user-a"]
assert results[1].score
assert results[1].match_highlights.get(CONTENT_FIELD_NAME, [])
@@ -1081,7 +1066,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_x.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents with varying relevance to the query.
@@ -1208,7 +1193,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_x.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Although very unlikely in practice, let's use the same doc ID just to
@@ -1301,7 +1286,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Don't index any documents.
@@ -1328,7 +1313,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index chunks for two different documents.
@@ -1396,7 +1381,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index documents with different public/hidden and tenant states.
@@ -1473,7 +1458,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index docs with various ages.
@@ -1565,7 +1550,7 @@ class TestOpenSearchClient:
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=tenant_state.multitenant
)
settings = DocumentSchema.get_index_settings_based_on_environment()
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Index chunks for two different documents, one hidden one not.
@@ -1614,9 +1599,4 @@ class TestOpenSearchClient:
for result in results:
# Note each result must be from doc 1, which is not hidden.
expected_result = doc1_chunks[result.document_chunk.chunk_index]
assert result.document_chunk == DocumentChunkWithoutVectors(
**{
k: getattr(expected_result, k)
for k in DocumentChunkWithoutVectors.model_fields
}
)
assert result.document_chunk == expected_result

View File

@@ -31,6 +31,7 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
)
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.configs.constants import SOURCE_TYPE
from onyx.context.search.models import IndexFilters
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import Document
from onyx.db.models import OpenSearchDocumentMigrationRecord
@@ -43,7 +44,6 @@ from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
from onyx.document_index.opensearch.search import DocumentQuery
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
@@ -70,7 +70,6 @@ from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_PROJECT
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
from tests.external_dependency_unit.full_setup import ensure_full_deployment_setup
@@ -79,22 +78,24 @@ CHUNK_COUNT = 5
def _get_document_chunks_from_opensearch(
opensearch_client: OpenSearchIndexClient,
document_id: str,
tenant_state: TenantState,
opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str
) -> list[DocumentChunk]:
opensearch_client.refresh_index()
results: list[DocumentChunk] = []
for i in range(CHUNK_COUNT):
document_chunk_id: str = get_opensearch_doc_chunk_id(
tenant_state=tenant_state,
document_id=document_id,
chunk_index=i,
max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
)
result = opensearch_client.get_document(document_chunk_id)
results.append(result)
return results
filters = IndexFilters(access_control_list=None, tenant_id=current_tenant_id)
query_body = DocumentQuery.get_from_document_id_query(
document_id=document_id,
tenant_state=TenantState(tenant_id=current_tenant_id, multitenant=False),
index_filters=filters,
include_hidden=False,
max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,
min_chunk_index=None,
max_chunk_index=None,
)
search_hits = opensearch_client.search(
body=query_body,
search_pipeline_id=None,
)
return [search_hit.document_chunk for search_hit in search_hits]
def _delete_document_chunks_from_opensearch(
@@ -451,13 +452,10 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Under test.
result = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -479,7 +477,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -524,9 +522,6 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Run the initial batch. To simulate partial progress we will mock the
# redis lock to return True for the first invocation of .owned() and
@@ -541,7 +536,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
return_value=mock_redis_client,
):
result_1 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
assert result_1 is True
@@ -564,7 +559,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Under test.
# Run the remainder of the migration.
result_2 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -588,7 +583,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -635,9 +630,6 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Run the initial batch. To simulate partial progress we will mock the
# redis lock to return True for the first invocation of .owned() and
@@ -654,7 +646,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
return_value=mock_redis_client,
):
result_1 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
assert result_1 is True
@@ -699,7 +691,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
),
):
result_2 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -736,7 +728,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
),
):
result_3 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -760,7 +752,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -848,25 +840,24 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
chunk["content"] = (
f"Different content {chunk[CHUNK_ID]} for {test_documents[0].id}"
)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
chunks_for_document_in_opensearch, _ = (
transform_vespa_chunks_to_opensearch_chunks(
document_in_opensearch,
tenant_state,
TenantState(tenant_id=get_current_tenant_id(), multitenant=False),
{},
)
)
opensearch_client.bulk_index_documents(
documents=chunks_for_document_in_opensearch,
tenant_state=tenant_state,
tenant_state=TenantState(
tenant_id=get_current_tenant_id(), multitenant=False
),
update_if_exists=True,
)
# Under test.
result = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -887,7 +878,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -931,14 +922,11 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
for chunks in document_chunks.values():
all_chunks.extend(chunks)
vespa_document_index.index_raw_chunks(all_chunks)
tenant_state = TenantState(
tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT
)
# Under test.
# First run.
result_1 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -959,7 +947,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)
@@ -972,7 +960,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Under test.
# Second run.
result_2 = migrate_chunks_from_vespa_to_opensearch_task(
tenant_id=tenant_state.tenant_id
tenant_id=get_current_tenant_id()
)
# Postcondition.
@@ -994,7 +982,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
# Verify chunks were indexed in OpenSearch.
for document in test_documents:
opensearch_chunks = _get_document_chunks_from_opensearch(
opensearch_client, document.id, tenant_state
opensearch_client, document.id, get_current_tenant_id()
)
assert len(opensearch_chunks) == CHUNK_COUNT
opensearch_chunks.sort(key=lambda x: x.chunk_index)

View File

@@ -1,45 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [ 4 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Parent 2 0 R
>>
endobj
xref
0 5
0000000000 65535 f
0000000015 00000 n
0000000054 00000 n
0000000113 00000 n
0000000162 00000 n
trailer
<<
/Size 5
/Root 3 0 R
/Info 1 0 R
>>
startxref
256
%%EOF

View File

@@ -1,89 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 2
/Kids [ 4 0 R 6 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 5 0 R
/Parent 2 0 R
>>
endobj
5 0 obj
<<
/Length 47
>>
stream
BT /F1 12 Tf 50 150 Td (Page one content) Tj ET
endstream
endobj
6 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 7 0 R
/Parent 2 0 R
>>
endobj
7 0 obj
<<
/Length 47
>>
stream
BT /F1 12 Tf 50 150 Td (Page two content) Tj ET
endstream
endobj
xref
0 8
0000000000 65535 f
0000000015 00000 n
0000000054 00000 n
0000000119 00000 n
0000000168 00000 n
0000000349 00000 n
0000000446 00000 n
0000000627 00000 n
trailer
<<
/Size 8
/Root 3 0 R
/Info 1 0 R
>>
startxref
724
%%EOF

View File

@@ -1,62 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [ 4 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 5 0 R
/Parent 2 0 R
>>
endobj
5 0 obj
<<
/Length 42
>>
stream
BT /F1 12 Tf 50 150 Td (Hello World) Tj ET
endstream
endobj
xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000054 00000 n
0000000113 00000 n
0000000162 00000 n
0000000343 00000 n
trailer
<<
/Size 6
/Root 3 0 R
/Info 1 0 R
>>
startxref
435
%%EOF

View File

@@ -1,64 +0,0 @@
%PDF-1.3
%<25><><EFBFBD><EFBFBD>
1 0 obj
<<
/Producer (pypdf)
/Title (My Title)
/Author (Jane Doe)
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [ 4 0 R ]
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Resources <<
/Font <<
/F1 <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
>>
>>
/MediaBox [ 0.0 0.0 200 200 ]
/Contents 5 0 R
/Parent 2 0 R
>>
endobj
5 0 obj
<<
/Length 35
>>
stream
BT /F1 12 Tf 50 150 Td (test) Tj ET
endstream
endobj
xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000091 00000 n
0000000150 00000 n
0000000199 00000 n
0000000380 00000 n
trailer
<<
/Size 6
/Root 3 0 R
/Info 1 0 R
>>
startxref
465
%%EOF

View File

@@ -1,124 +0,0 @@
"""Unit tests for pypdf-dependent PDF processing functions.
Tests cover:
- read_pdf_file: text extraction, metadata, encrypted PDFs, image extraction
- pdf_to_text: convenience wrapper
- is_pdf_protected: password protection detection
Fixture PDFs live in ./fixtures/ and are pre-built so the test layer has no
dependency on pypdf internals (pypdf.generic).
"""
from io import BytesIO
from pathlib import Path
from onyx.file_processing.extract_file_text import pdf_to_text
from onyx.file_processing.extract_file_text import read_pdf_file
from onyx.file_processing.password_validation import is_pdf_protected
FIXTURES = Path(__file__).parent / "fixtures"
def _load(name: str) -> BytesIO:
return BytesIO((FIXTURES / name).read_bytes())
# ── read_pdf_file ────────────────────────────────────────────────────────
class TestReadPdfFile:
def test_basic_text_extraction(self) -> None:
text, _, images = read_pdf_file(_load("simple.pdf"))
assert "Hello World" in text
assert images == []
def test_multi_page_text_extraction(self) -> None:
text, _, _ = read_pdf_file(_load("multipage.pdf"))
assert "Page one content" in text
assert "Page two content" in text
def test_metadata_extraction(self) -> None:
_, pdf_metadata, _ = read_pdf_file(_load("with_metadata.pdf"))
assert pdf_metadata.get("Title") == "My Title"
assert pdf_metadata.get("Author") == "Jane Doe"
def test_encrypted_pdf_with_correct_password(self) -> None:
text, _, _ = read_pdf_file(_load("encrypted.pdf"), pdf_pass="pass123")
assert "Secret Content" in text
def test_encrypted_pdf_without_password(self) -> None:
text, _, _ = read_pdf_file(_load("encrypted.pdf"))
assert text == ""
def test_encrypted_pdf_with_wrong_password(self) -> None:
text, _, _ = read_pdf_file(_load("encrypted.pdf"), pdf_pass="wrong")
assert text == ""
def test_empty_pdf(self) -> None:
text, _, _ = read_pdf_file(_load("empty.pdf"))
assert text.strip() == ""
def test_invalid_pdf_returns_empty(self) -> None:
text, _, images = read_pdf_file(BytesIO(b"this is not a pdf"))
assert text == ""
assert images == []
def test_image_extraction_disabled_by_default(self) -> None:
_, _, images = read_pdf_file(_load("with_image.pdf"))
assert images == []
def test_image_extraction_collects_images(self) -> None:
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
assert len(images) == 1
img_bytes, img_name = images[0]
assert len(img_bytes) > 0
assert img_name # non-empty name
def test_image_callback_streams_instead_of_collecting(self) -> None:
"""With image_callback, images are streamed via callback and not accumulated."""
collected: list[tuple[bytes, str]] = []
def callback(data: bytes, name: str) -> None:
collected.append((data, name))
_, _, images = read_pdf_file(
_load("with_image.pdf"), extract_images=True, image_callback=callback
)
# Callback received the image
assert len(collected) == 1
assert len(collected[0][0]) > 0
# Returned list is empty when callback is used
assert images == []
# ── pdf_to_text ──────────────────────────────────────────────────────────
class TestPdfToText:
def test_returns_text(self) -> None:
assert "Hello World" in pdf_to_text(_load("simple.pdf"))
def test_with_password(self) -> None:
assert "Secret Content" in pdf_to_text(
_load("encrypted.pdf"), pdf_pass="pass123"
)
def test_encrypted_without_password_returns_empty(self) -> None:
assert pdf_to_text(_load("encrypted.pdf")) == ""
# ── is_pdf_protected ─────────────────────────────────────────────────────
class TestIsPdfProtected:
def test_unprotected_pdf(self) -> None:
assert is_pdf_protected(_load("simple.pdf")) is False
def test_protected_pdf(self) -> None:
assert is_pdf_protected(_load("encrypted.pdf")) is True
def test_preserves_file_position(self) -> None:
pdf = _load("simple.pdf")
pdf.seek(42)
is_pdf_protected(pdf)
assert pdf.tell() == 42

View File

@@ -1,372 +0,0 @@
"""Unit tests for the hook executor."""
from typing import Any
from unittest.mock import AsyncMock
from unittest.mock import MagicMock
from unittest.mock import patch
import httpx
import pytest
from onyx.db.enums import HookFailStrategy
from onyx.db.enums import HookPoint
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.executor import execute_hook
from onyx.hooks.executor import HookSkipped
from onyx.hooks.executor import HookSoftFailed
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_PAYLOAD: dict[str, Any] = {"query": "test", "user_email": "u@example.com"}
_RESPONSE_PAYLOAD: dict[str, Any] = {"rewritten_query": "better test"}
def _make_hook(
*,
is_active: bool = True,
endpoint_url: str | None = "https://hook.example.com/query",
api_key: MagicMock | None = None,
timeout_seconds: float = 5.0,
fail_strategy: HookFailStrategy = HookFailStrategy.SOFT,
hook_id: int = 1,
) -> MagicMock:
hook = MagicMock()
hook.is_active = is_active
hook.endpoint_url = endpoint_url
hook.api_key = api_key
hook.timeout_seconds = timeout_seconds
hook.id = hook_id
hook.fail_strategy = fail_strategy
return hook
def _make_api_key(value: str) -> MagicMock:
api_key = MagicMock()
api_key.get_value.return_value = value
return api_key
def _setup_async_client(
mock_client_cls: MagicMock,
*,
response: MagicMock | None = None,
side_effect: Exception | None = None,
) -> AsyncMock:
"""Wire up the httpx.AsyncClient mock and return the inner client.
If side_effect is an httpx.HTTPStatusError, it is raised from
raise_for_status() (matching real httpx behaviour) and post() returns a
response mock with the matching status_code set. All other exceptions are
raised directly from post().
"""
mock_client = AsyncMock()
if isinstance(side_effect, httpx.HTTPStatusError):
# In real httpx, HTTPStatusError comes from raise_for_status(), not post().
# Wire a response mock that raises on raise_for_status() so status_code
# is captured before the exception fires, matching the executor's flow.
error_response = MagicMock()
error_response.status_code = side_effect.response.status_code
error_response.raise_for_status.side_effect = side_effect
mock_client.post = AsyncMock(return_value=error_response)
else:
mock_client.post = AsyncMock(
side_effect=side_effect, return_value=response if not side_effect else None
)
mock_client_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_cls.return_value.__aexit__ = AsyncMock(return_value=False)
return mock_client
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def db_session() -> MagicMock:
return MagicMock()
# ---------------------------------------------------------------------------
# Early-exit guards (no HTTP call, no DB writes)
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
@pytest.mark.parametrize(
"hooks_available,hook",
[
# HOOKS_AVAILABLE=False exits before the DB lookup — hook is irrelevant.
pytest.param(False, None, id="hooks_not_available"),
pytest.param(True, None, id="hook_not_found"),
pytest.param(True, _make_hook(is_active=False), id="hook_inactive"),
pytest.param(True, _make_hook(endpoint_url=None), id="no_endpoint_url"),
],
)
async def test_early_exit_returns_skipped_with_no_db_writes(
db_session: MagicMock,
hooks_available: bool,
hook: MagicMock | None,
) -> None:
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", hooks_available),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
):
result = await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert isinstance(result, HookSkipped)
mock_update.assert_not_called()
mock_log.assert_not_called()
# ---------------------------------------------------------------------------
# Successful HTTP call
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_success_returns_payload_and_sets_reachable(
db_session: MagicMock,
) -> None:
hook = _make_hook()
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = _RESPONSE_PAYLOAD
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
patch("httpx.AsyncClient") as mock_client_cls,
):
_setup_async_client(mock_client_cls, response=mock_response)
result = await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert result == _RESPONSE_PAYLOAD
_, update_kwargs = mock_update.call_args
assert update_kwargs["is_reachable"] is True
_, log_kwargs = mock_log.call_args
assert log_kwargs["is_success"] is True
# ---------------------------------------------------------------------------
# HTTP failure paths
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
@pytest.mark.parametrize(
"exception,fail_strategy,expected_type,expect_is_reachable_false",
[
pytest.param(
httpx.ConnectError("refused"),
HookFailStrategy.SOFT,
HookSoftFailed,
True,
id="connect_error_soft",
),
pytest.param(
httpx.ConnectError("refused"),
HookFailStrategy.HARD,
OnyxError,
True,
id="connect_error_hard",
),
pytest.param(
httpx.TimeoutException("timeout"),
HookFailStrategy.SOFT,
HookSoftFailed,
False,
id="timeout_soft",
),
pytest.param(
httpx.HTTPStatusError(
"500",
request=MagicMock(),
response=MagicMock(status_code=500, text="error"),
),
HookFailStrategy.HARD,
OnyxError,
False,
id="http_status_error_hard",
),
],
)
async def test_http_failure_paths(
db_session: MagicMock,
exception: Exception,
fail_strategy: HookFailStrategy,
expected_type: type,
expect_is_reachable_false: bool,
) -> None:
hook = _make_hook(fail_strategy=fail_strategy)
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
patch("httpx.AsyncClient") as mock_client_cls,
):
_setup_async_client(mock_client_cls, side_effect=exception)
if expected_type is OnyxError:
with pytest.raises(OnyxError) as exc_info:
await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
else:
result = await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert isinstance(result, expected_type)
if expect_is_reachable_false:
mock_update.assert_called_once()
_, kwargs = mock_update.call_args
assert kwargs["is_reachable"] is False
else:
mock_update.assert_not_called()
# ---------------------------------------------------------------------------
# Authorization header
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
@pytest.mark.parametrize(
"api_key_value,expect_auth_header",
[
pytest.param("secret-token", True, id="api_key_present"),
pytest.param(None, False, id="api_key_absent"),
],
)
async def test_authorization_header(
db_session: MagicMock,
api_key_value: str | None,
expect_auth_header: bool,
) -> None:
api_key = _make_api_key(api_key_value) if api_key_value else None
hook = _make_hook(api_key=api_key)
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = _RESPONSE_PAYLOAD
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch("onyx.hooks.executor.get_session_with_current_tenant"),
patch("onyx.hooks.executor.update_hook__no_commit"),
patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
patch("httpx.AsyncClient") as mock_client_cls,
):
mock_client = _setup_async_client(mock_client_cls, response=mock_response)
await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
_, call_kwargs = mock_client.post.call_args
if expect_auth_header:
assert call_kwargs["headers"]["Authorization"] == f"Bearer {api_key_value}"
else:
assert "Authorization" not in call_kwargs["headers"]
# ---------------------------------------------------------------------------
# Persist session failure
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
@pytest.mark.parametrize(
"http_exception,expected_result",
[
pytest.param(None, _RESPONSE_PAYLOAD, id="success_path"),
pytest.param(httpx.ConnectError("refused"), OnyxError, id="hard_fail_path"),
],
)
async def test_persist_session_failure_is_swallowed(
db_session: MagicMock,
http_exception: Exception | None,
expected_result: Any,
) -> None:
"""Log write raising must not mask the real return value or OnyxError."""
hook = _make_hook(fail_strategy=HookFailStrategy.HARD)
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = _RESPONSE_PAYLOAD
with (
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
patch(
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
return_value=hook,
),
patch(
"onyx.hooks.executor.get_session_with_current_tenant",
side_effect=RuntimeError("DB unavailable"),
),
patch("httpx.AsyncClient") as mock_client_cls,
):
_setup_async_client(
mock_client_cls,
response=mock_response if not http_exception else None,
side_effect=http_exception,
)
if expected_result is OnyxError:
with pytest.raises(OnyxError) as exc_info:
await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
else:
result = await execute_hook(
db_session=db_session,
hook_point=HookPoint.QUERY_PROCESSING,
payload=_PAYLOAD,
)
assert result == expected_result

View File

@@ -1,109 +0,0 @@
import io
import zipfile
from unittest.mock import MagicMock
from unittest.mock import patch
from zipfile import BadZipFile
import pytest
from fastapi import UploadFile
from starlette.datastructures import Headers
from onyx.configs.constants import FileOrigin
from onyx.server.documents.connector import upload_files
def _create_test_zip() -> bytes:
"""Create a simple in-memory zip file containing two text files."""
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as zf:
zf.writestr("file1.txt", "hello")
zf.writestr("file2.txt", "world")
return buf.getvalue()
def _make_upload_file(content: bytes, filename: str, content_type: str) -> UploadFile:
return UploadFile(
file=io.BytesIO(content),
filename=filename,
headers=Headers({"content-type": content_type}),
)
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_true_extracts_files(
mock_get_store: MagicMock,
) -> None:
"""When unzip=True (default), a zip upload is extracted into individual files."""
mock_store = MagicMock()
mock_store.save_file.side_effect = lambda **kwargs: f"id-{kwargs['display_name']}"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
upload = _make_upload_file(zip_bytes, "test.zip", "application/zip")
result = upload_files([upload], FileOrigin.CONNECTOR)
# Should have extracted the two individual files, not stored the zip itself
assert len(result.file_paths) == 2
assert "id-file1.txt" in result.file_paths
assert "id-file2.txt" in result.file_paths
assert "file1.txt" in result.file_names
assert "file2.txt" in result.file_names
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_zip_with_unzip_false_stores_zip_as_is(
mock_get_store: MagicMock,
) -> None:
"""When unzip=False, the zip file is stored as-is without extraction."""
mock_store = MagicMock()
mock_store.save_file.return_value = "zip-file-id"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
upload = _make_upload_file(zip_bytes, "site_export.zip", "application/zip")
result = upload_files([upload], FileOrigin.CONNECTOR, unzip=False)
# Should store exactly one file (the zip itself)
assert len(result.file_paths) == 1
assert result.file_paths[0] == "zip-file-id"
assert result.file_names == ["site_export.zip"]
# No zip metadata should be created
assert result.zip_metadata_file_id is None
# Verify the stored content is a valid zip
saved_content: io.BytesIO = mock_store.save_file.call_args[1]["content"]
saved_content.seek(0)
with zipfile.ZipFile(saved_content, "r") as zf:
assert set(zf.namelist()) == {"file1.txt", "file2.txt"}
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_invalid_zip_with_unzip_false_raises(
mock_get_store: MagicMock,
) -> None:
"""An invalid zip is rejected even when unzip=False (validation still runs)."""
mock_get_store.return_value = MagicMock()
bad_zip = _make_upload_file(b"not a zip", "bad.zip", "application/zip")
with pytest.raises(BadZipFile):
upload_files([bad_zip], FileOrigin.CONNECTOR, unzip=False)
@patch("onyx.server.documents.connector.get_default_file_store")
def test_upload_multiple_zips_rejected_when_unzip_false(
mock_get_store: MagicMock,
) -> None:
"""The seen_zip guard rejects a second zip even when unzip=False."""
mock_store = MagicMock()
mock_store.save_file.return_value = "zip-id"
mock_get_store.return_value = mock_store
zip_bytes = _create_test_zip()
zip1 = _make_upload_file(zip_bytes, "a.zip", "application/zip")
zip2 = _make_upload_file(zip_bytes, "b.zip", "application/zip")
with pytest.raises(Exception, match="Only one zip file"):
upload_files([zip1, zip2], FileOrigin.CONNECTOR, unzip=False)

View File

@@ -8,7 +8,7 @@
"name": "widget",
"version": "0.1.0",
"dependencies": {
"next": "^16.1.7",
"next": "^16.1.5",
"react": "^19",
"react-dom": "^19",
"react-markdown": "^10.1.0"
@@ -1023,9 +1023,9 @@
}
},
"node_modules/@next/env": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.5.tgz",
"integrity": "sha512-CRSCPJiSZoi4Pn69RYBDI9R7YK2g59vLexPQFXY0eyw+ILevIenCywzg+DqmlBik9zszEnw2HLFOUlLAcJbL7g==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@@ -1039,9 +1039,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.5.tgz",
"integrity": "sha512-eK7Wdm3Hjy/SCL7TevlH0C9chrpeOYWx2iR7guJDaz4zEQKWcS1IMVfMb9UKBFMg1XgzcPTYPIp1Vcpukkjg6Q==",
"cpu": [
"arm64"
],
@@ -1055,9 +1055,9 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.5.tgz",
"integrity": "sha512-foQscSHD1dCuxBmGkbIr6ScAUF6pRoDZP6czajyvmXPAOFNnQUJu2Os1SGELODjKp/ULa4fulnBWoHV3XdPLfA==",
"cpu": [
"x64"
],
@@ -1071,9 +1071,9 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.5.tgz",
"integrity": "sha512-qNIb42o3C02ccIeSeKjacF3HXotGsxh/FMk/rSRmCzOVMtoWH88odn2uZqF8RLsSUWHcAqTgYmPD3pZ03L9ZAA==",
"cpu": [
"arm64"
],
@@ -1087,9 +1087,9 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.5.tgz",
"integrity": "sha512-U+kBxGUY1xMAzDTXmuVMfhaWUZQAwzRaHJ/I6ihtR5SbTVUEaDRiEU9YMjy1obBWpdOBuk1bcm+tsmifYSygfw==",
"cpu": [
"arm64"
],
@@ -1103,9 +1103,9 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.5.tgz",
"integrity": "sha512-gq2UtoCpN7Ke/7tKaU7i/1L7eFLfhMbXjNghSv0MVGF1dmuoaPeEVDvkDuO/9LVa44h5gqpWeJ4mRRznjDv7LA==",
"cpu": [
"x64"
],
@@ -1119,9 +1119,9 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.5.tgz",
"integrity": "sha512-bQWSE729PbXT6mMklWLf8dotislPle2L70E9q6iwETYEOt092GDn0c+TTNj26AjmeceSsC4ndyGsK5nKqHYXjQ==",
"cpu": [
"x64"
],
@@ -1135,9 +1135,9 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.5.tgz",
"integrity": "sha512-LZli0anutkIllMtTAWZlDqdfvjWX/ch8AFK5WgkNTvaqwlouiD1oHM+WW8RXMiL0+vAkAJyAGEzPPjO+hnrSNQ==",
"cpu": [
"arm64"
],
@@ -1151,9 +1151,9 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.5.tgz",
"integrity": "sha512-7is37HJTNQGhjPpQbkKjKEboHYQnCgpVt/4rBrrln0D9nderNxZ8ZWs8w1fAtzUx7wEyYjQ+/13myFgFj6K2Ng==",
"cpu": [
"x64"
],
@@ -2564,15 +2564,12 @@
"dev": true
},
"node_modules/baseline-browser-mapping": {
"version": "2.10.8",
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.8.tgz",
"integrity": "sha512-PCLz/LXGBsNTErbtB6i5u4eLpHeMfi93aUv5duMmj6caNu6IphS4q6UevDnL36sZQv9lrP11dbPKGMaXPwMKfQ==",
"version": "2.9.14",
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.14.tgz",
"integrity": "sha512-B0xUquLkiGLgHhpPBqvl7GWegWBUNuujQ6kXd/r1U38ElPT6Ok8KZ8e+FpUGEc2ZoRQUzq/aUnaKFc/svWUGSg==",
"license": "Apache-2.0",
"bin": {
"baseline-browser-mapping": "dist/cli.cjs"
},
"engines": {
"node": ">=6.0.0"
"baseline-browser-mapping": "dist/cli.js"
}
},
"node_modules/brace-expansion": {
@@ -5929,14 +5926,14 @@
"dev": true
},
"node_modules/next": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
"version": "16.1.5",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.5.tgz",
"integrity": "sha512-f+wE+NSbiQgh3DSAlTaw2FwY5yGdVViAtp8TotNQj4kk4Q8Bh1sC/aL9aH+Rg1YAVn18OYXsRDT7U/079jgP7w==",
"license": "MIT",
"dependencies": {
"@next/env": "16.1.7",
"@next/env": "16.1.5",
"@swc/helpers": "0.5.15",
"baseline-browser-mapping": "^2.9.19",
"baseline-browser-mapping": "^2.8.3",
"caniuse-lite": "^1.0.30001579",
"postcss": "8.4.31",
"styled-jsx": "5.1.6"
@@ -5948,14 +5945,14 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.1.7",
"@next/swc-darwin-x64": "16.1.7",
"@next/swc-linux-arm64-gnu": "16.1.7",
"@next/swc-linux-arm64-musl": "16.1.7",
"@next/swc-linux-x64-gnu": "16.1.7",
"@next/swc-linux-x64-musl": "16.1.7",
"@next/swc-win32-arm64-msvc": "16.1.7",
"@next/swc-win32-x64-msvc": "16.1.7",
"@next/swc-darwin-arm64": "16.1.5",
"@next/swc-darwin-x64": "16.1.5",
"@next/swc-linux-arm64-gnu": "16.1.5",
"@next/swc-linux-arm64-musl": "16.1.5",
"@next/swc-linux-x64-gnu": "16.1.5",
"@next/swc-linux-x64-musl": "16.1.5",
"@next/swc-win32-arm64-msvc": "16.1.5",
"@next/swc-win32-x64-msvc": "16.1.5",
"sharp": "^0.34.4"
},
"peerDependencies": {

View File

@@ -9,7 +9,7 @@
"lint": "next lint"
},
"dependencies": {
"next": "^16.1.7",
"next": "^16.1.5",
"react": "^19",
"react-dom": "^19",
"react-markdown": "^10.1.0"

View File

@@ -92,7 +92,7 @@ backend = [
"python-gitlab==5.6.0",
"python-pptx==0.6.23",
"pypandoc_binary==1.16.2",
"pypdf==6.9.1",
"pypdf==6.8.0",
"pytest-mock==3.12.0",
"pytest-playwright==0.7.0",
"python-docx==1.1.2",
@@ -245,7 +245,6 @@ select = [
"ARG",
"E",
"F",
"S324",
"W",
]

8
uv.lock generated
View File

@@ -4481,7 +4481,7 @@ requires-dist = [
{ name = "pygithub", marker = "extra == 'backend'", specifier = "==2.5.0" },
{ name = "pympler", marker = "extra == 'backend'", specifier = "==1.1" },
{ name = "pypandoc-binary", marker = "extra == 'backend'", specifier = "==1.16.2" },
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.9.1" },
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.8.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.5" },
{ name = "pytest-alembic", marker = "extra == 'dev'", specifier = "==0.12.1" },
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
@@ -5727,11 +5727,11 @@ wheels = [
[[package]]
name = "pypdf"
version = "6.9.1"
version = "6.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f9/fb/dc2e8cb006e80b0020ed20d8649106fe4274e82d8e756ad3e24ade19c0df/pypdf-6.9.1.tar.gz", hash = "sha256:ae052407d33d34de0c86c5c729be6d51010bf36e03035a8f23ab449bca52377d", size = 5311551, upload-time = "2026-03-17T10:46:07.876Z" }
sdist = { url = "https://files.pythonhosted.org/packages/b4/a3/e705b0805212b663a4c27b861c8a603dba0f8b4bb281f96f8e746576a50d/pypdf-6.8.0.tar.gz", hash = "sha256:cb7eaeaa4133ce76f762184069a854e03f4d9a08568f0e0623f7ea810407833b", size = 5307831, upload-time = "2026-03-09T13:37:40.591Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f9/f4/75543fa802b86e72f87e9395440fe1a89a6d149887e3e55745715c3352ac/pypdf-6.9.1-py3-none-any.whl", hash = "sha256:f35a6a022348fae47e092a908339a8f3dc993510c026bb39a96718fc7185e89f", size = 333661, upload-time = "2026-03-17T10:46:06.286Z" },
{ url = "https://files.pythonhosted.org/packages/8c/ec/4ccf3bb86b1afe5d7176e1c8abcdbf22b53dd682ec2eda50e1caadcf6846/pypdf-6.8.0-py3-none-any.whl", hash = "sha256:2a025080a8dd73f48123c89c57174a5ff3806c71763ee4e49572dc90454943c7", size = 332177, upload-time = "2026-03-09T13:37:38.774Z" },
]
[[package]]

View File

@@ -174,7 +174,6 @@ function ContentLg({
)}
onClick={editable ? startEditing : undefined}
style={{ height: config.lineHeight }}
title={title}
>
{title}
</span>

View File

@@ -218,7 +218,6 @@ function ContentMd({
"text-text-04",
editable && "cursor-pointer"
)}
title={title}
onClick={editable ? startEditing : undefined}
style={{ height: config.lineHeight }}
>

View File

@@ -118,7 +118,6 @@ function ContentSm({
<span
className={cn("opal-content-sm-title", config.titleFont)}
style={{ height: config.lineHeight }}
title={title}
>
{title}
</span>

View File

@@ -231,7 +231,6 @@ function ContentXl({
)}
onClick={editable ? startEditing : undefined}
style={{ height: config.lineHeight }}
title={title}
>
{title}
</span>

82
web/package-lock.json generated
View File

@@ -61,7 +61,7 @@
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.7",
"next": "16.1.6",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",
@@ -2896,9 +2896,9 @@
}
},
"node_modules/@next/env": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.6.tgz",
"integrity": "sha512-N1ySLuZjnAtN3kFnwhAwPvZah8RJxKasD7x1f8shFqhncnWZn4JMfg37diLNuoHsLAlrDfM3g4mawVdtAG8XLQ==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@@ -2942,9 +2942,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.6.tgz",
"integrity": "sha512-wTzYulosJr/6nFnqGW7FrG3jfUUlEf8UjGA0/pyypJl42ExdVgC6xJgcXQ+V8QFn6niSG2Pb8+MIG1mZr2vczw==",
"cpu": [
"arm64"
],
@@ -2958,9 +2958,9 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.6.tgz",
"integrity": "sha512-BLFPYPDO+MNJsiDWbeVzqvYd4NyuRrEYVB5k2N3JfWncuHAy2IVwMAOlVQDFjj+krkWzhY2apvmekMkfQR0CUQ==",
"cpu": [
"x64"
],
@@ -2974,9 +2974,9 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.6.tgz",
"integrity": "sha512-OJYkCd5pj/QloBvoEcJ2XiMnlJkRv9idWA/j0ugSuA34gMT6f5b7vOiCQHVRpvStoZUknhl6/UxOXL4OwtdaBw==",
"cpu": [
"arm64"
],
@@ -2990,9 +2990,9 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.6.tgz",
"integrity": "sha512-S4J2v+8tT3NIO9u2q+S0G5KdvNDjXfAv06OhfOzNDaBn5rw84DGXWndOEB7d5/x852A20sW1M56vhC/tRVbccQ==",
"cpu": [
"arm64"
],
@@ -3006,9 +3006,9 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.6.tgz",
"integrity": "sha512-2eEBDkFlMMNQnkTyPBhQOAyn2qMxyG2eE7GPH2WIDGEpEILcBPI/jdSv4t6xupSP+ot/jkfrCShLAa7+ZUPcJQ==",
"cpu": [
"x64"
],
@@ -3022,9 +3022,9 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.6.tgz",
"integrity": "sha512-oicJwRlyOoZXVlxmIMaTq7f8pN9QNbdes0q2FXfRsPhfCi8n8JmOZJm5oo1pwDaFbnnD421rVU409M3evFbIqg==",
"cpu": [
"x64"
],
@@ -3038,9 +3038,9 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.6.tgz",
"integrity": "sha512-gQmm8izDTPgs+DCWH22kcDmuUp7NyiJgEl18bcr8irXA5N2m2O+JQIr6f3ct42GOs9c0h8QF3L5SzIxcYAAXXw==",
"cpu": [
"arm64"
],
@@ -3054,9 +3054,9 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.6.tgz",
"integrity": "sha512-NRfO39AIrzBnixKbjuo2YiYhB6o9d8v/ymU9m/Xk8cyVk+k7XylniXkHwjs4s70wedVffc6bQNbufk5v0xEm0A==",
"cpu": [
"x64"
],
@@ -14068,14 +14068,14 @@
"license": "MIT"
},
"node_modules/next": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
"version": "16.1.6",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.6.tgz",
"integrity": "sha512-hkyRkcu5x/41KoqnROkfTm2pZVbKxvbZRuNvKXLRXxs3VfyO0WhY50TQS40EuKO9SW3rBj/sF3WbVwDACeMZyw==",
"license": "MIT",
"dependencies": {
"@next/env": "16.1.7",
"@next/env": "16.1.6",
"@swc/helpers": "0.5.15",
"baseline-browser-mapping": "^2.9.19",
"baseline-browser-mapping": "^2.8.3",
"caniuse-lite": "^1.0.30001579",
"postcss": "8.4.31",
"styled-jsx": "5.1.6"
@@ -14087,14 +14087,14 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.1.7",
"@next/swc-darwin-x64": "16.1.7",
"@next/swc-linux-arm64-gnu": "16.1.7",
"@next/swc-linux-arm64-musl": "16.1.7",
"@next/swc-linux-x64-gnu": "16.1.7",
"@next/swc-linux-x64-musl": "16.1.7",
"@next/swc-win32-arm64-msvc": "16.1.7",
"@next/swc-win32-x64-msvc": "16.1.7",
"@next/swc-darwin-arm64": "16.1.6",
"@next/swc-darwin-x64": "16.1.6",
"@next/swc-linux-arm64-gnu": "16.1.6",
"@next/swc-linux-arm64-musl": "16.1.6",
"@next/swc-linux-x64-gnu": "16.1.6",
"@next/swc-linux-x64-musl": "16.1.6",
"@next/swc-win32-arm64-msvc": "16.1.6",
"@next/swc-win32-x64-msvc": "16.1.6",
"sharp": "^0.34.4"
},
"peerDependencies": {

View File

@@ -79,7 +79,7 @@
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.7",
"next": "16.1.6",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",

View File

@@ -12,7 +12,7 @@ import { localizeAndPrettify } from "@/lib/time";
import Button from "@/refresh-components/buttons/Button";
import Text from "@/refresh-components/texts/Text";
import { PageSelector } from "@/components/PageSelector";
import { useCallback, useEffect, useRef, useState, useMemo } from "react";
import { useEffect, useState, useMemo } from "react";
import { SvgAlertTriangle } from "@opal/icons";
export interface IndexAttemptErrorsModalProps {
errors: {
@@ -22,66 +22,93 @@ export interface IndexAttemptErrorsModalProps {
onClose: () => void;
onResolveAll: () => void;
isResolvingErrors?: boolean;
onPageChange?: (page: number) => void;
currentPage?: number;
pageSize?: number;
}
const ROW_HEIGHT = 65; // 4rem + 1px for border
export default function IndexAttemptErrorsModal({
errors,
onClose,
onResolveAll,
isResolvingErrors = false,
pageSize: propPageSize,
}: IndexAttemptErrorsModalProps) {
const observerRef = useRef<ResizeObserver | null>(null);
const [pageSize, setPageSize] = useState(10);
const [calculatedPageSize, setCalculatedPageSize] = useState(10);
const [currentPage, setCurrentPage] = useState(1);
const tableContainerRef = useCallback((container: HTMLDivElement | null) => {
if (observerRef.current) {
observerRef.current.disconnect();
observerRef.current = null;
}
// Reset to page 1 when the error list actually changes
useEffect(() => {
setCurrentPage(1);
}, [errors.items.length, errors.total_items]);
if (!container) return;
useEffect(() => {
const calculatePageSize = () => {
// Modal height is 75% of viewport height
const modalHeight = window.innerHeight * 0.6;
const observer = new ResizeObserver(() => {
const thead = container.querySelector("thead");
const theadHeight = thead?.getBoundingClientRect().height ?? 0;
const availableHeight = container.clientHeight - theadHeight;
const newPageSize = Math.max(3, Math.floor(availableHeight / ROW_HEIGHT));
setPageSize(newPageSize);
});
// Estimate heights (in pixels):
// - Modal header (title + description): ~120px
// - Table header: ~40px
// - Pagination section: ~80px
// - Modal padding: ~64px (32px top + 32px bottom)
const fixedHeight = 120 + 40 + 80 + 64;
observer.observe(container);
observerRef.current = observer;
// Available height for table rows
const availableHeight = modalHeight - fixedHeight;
// Each table row is approximately 60px (including borders and padding)
const rowHeight = 60;
// Calculate how many rows can fit, with a minimum of 3
const rowsPerPage = Math.max(3, Math.floor(availableHeight / rowHeight));
setCalculatedPageSize((prev) => {
// Only update if the new size is significantly different to prevent flickering
if (Math.abs(prev - rowsPerPage) > 0) {
return rowsPerPage;
}
return prev;
});
};
// Initial calculation
calculatePageSize();
// Debounced resize handler to prevent excessive recalculation
let resizeTimeout: NodeJS.Timeout;
const debouncedCalculatePageSize = () => {
clearTimeout(resizeTimeout);
resizeTimeout = setTimeout(calculatePageSize, 100);
};
window.addEventListener("resize", debouncedCalculatePageSize);
return () => {
window.removeEventListener("resize", debouncedCalculatePageSize);
clearTimeout(resizeTimeout);
};
}, []);
// When data changes, reset to page 1.
// When page size changes (resize), preserve the user's position by
// finding which new page contains the first item they were looking at.
const prevPageSizeRef = useRef(pageSize);
// Separate effect to reset current page when page size changes
useEffect(() => {
if (pageSize !== prevPageSizeRef.current) {
setCurrentPage((prev) => {
const firstVisibleIndex = (prev - 1) * prevPageSizeRef.current;
const newPage = Math.floor(firstVisibleIndex / pageSize) + 1;
const totalPages = Math.ceil(errors.items.length / pageSize);
return Math.min(newPage, totalPages);
});
prevPageSizeRef.current = pageSize;
} else {
setCurrentPage(1);
}
}, [errors.items.length, pageSize]);
setCurrentPage(1);
}, [calculatedPageSize]);
const pageSize = propPageSize || calculatedPageSize;
// Memoize pagination calculations to prevent unnecessary recalculations
const paginationData = useMemo(() => {
const totalPages = Math.ceil(errors.items.length / pageSize);
const startIndex = (currentPage - 1) * pageSize;
const currentPageItems = errors.items.slice(
const endIndex = startIndex + pageSize;
const currentPageItems = errors.items.slice(startIndex, endIndex);
return {
totalPages,
currentPageItems,
startIndex,
startIndex + pageSize
);
return { totalPages, currentPageItems };
endIndex,
};
}, [errors.items, pageSize, currentPage]);
const hasUnresolvedErrors = useMemo(
@@ -110,7 +137,7 @@ export default function IndexAttemptErrorsModal({
onClose={onClose}
height="fit"
/>
<Modal.Body height="full">
<Modal.Body>
{!isResolvingErrors && (
<div className="flex flex-col gap-2 flex-shrink-0">
<Text as="p">
@@ -125,10 +152,7 @@ export default function IndexAttemptErrorsModal({
</div>
)}
<div
ref={tableContainerRef}
className="flex-1 w-full overflow-hidden min-h-0"
>
<div className="flex-1 overflow-hidden min-h-0">
<Table>
<TableHeader>
<TableRow>
@@ -141,11 +165,11 @@ export default function IndexAttemptErrorsModal({
<TableBody>
{paginationData.currentPageItems.length > 0 ? (
paginationData.currentPageItems.map((error) => (
<TableRow key={error.id} className="h-[4rem]">
<TableCell>
<TableRow key={error.id} className="h-[60px] max-h-[60px]">
<TableCell className="h-[60px] align-top">
{localizeAndPrettify(error.time_created)}
</TableCell>
<TableCell>
<TableCell className="h-[60px] align-top">
{error.document_link ? (
<a
href={error.document_link}
@@ -159,12 +183,12 @@ export default function IndexAttemptErrorsModal({
error.document_id || error.entity_id || "Unknown"
)}
</TableCell>
<TableCell>
<div className="flex items-center h-[2rem] overflow-y-auto whitespace-normal">
<TableCell className="h-[60px] align-top p-0">
<div className="h-[60px] overflow-y-auto p-4 whitespace-normal">
{error.failure_message}
</div>
</TableCell>
<TableCell>
<TableCell className="h-[60px] align-top">
<span
className={`px-2 py-1 rounded text-xs ${
error.is_resolved
@@ -178,7 +202,7 @@ export default function IndexAttemptErrorsModal({
</TableRow>
))
) : (
<TableRow className="h-[4rem]">
<TableRow>
<TableCell
colSpan={4}
className="text-center py-8 text-gray-500"
@@ -191,24 +215,32 @@ export default function IndexAttemptErrorsModal({
</Table>
</div>
{paginationData.totalPages > 1 && (
<div className="flex w-full justify-center">
<PageSelector
totalPages={paginationData.totalPages}
currentPage={currentPage}
onPageChange={handlePageChange}
/>
<div className="flex-shrink-0">
{paginationData.totalPages > 1 && (
<div className="flex-1 flex justify-center mb-2">
<PageSelector
totalPages={paginationData.totalPages}
currentPage={currentPage}
onPageChange={handlePageChange}
/>
</div>
)}
<div className="flex w-full">
<div className="flex gap-2 ml-auto">
{hasUnresolvedErrors && !isResolvingErrors && (
// TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
<Button
onClick={onResolveAll}
className="ml-4 whitespace-nowrap"
>
Resolve All
</Button>
)}
</div>
</div>
)}
</div>
</Modal.Body>
<Modal.Footer>
{hasUnresolvedErrors && !isResolvingErrors && (
// TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved
<Button onClick={onResolveAll} className="ml-4 whitespace-nowrap">
Resolve All
</Button>
)}
</Modal.Footer>
</Modal.Content>
</Modal>
);

View File

@@ -18,7 +18,7 @@ import { PageSelector } from "@/components/PageSelector";
import { localizeAndPrettify } from "@/lib/time";
import { getDocsProcessedPerMinute } from "@/lib/indexAttempt";
import { InfoIcon } from "@/components/icons/icons";
import ExceptionTraceModal from "@/sections/modals/PreviewModal/ExceptionTraceModal";
import ExceptionTraceModal from "@/components/modals/ExceptionTraceModal";
import SimpleTooltip from "@/refresh-components/SimpleTooltip";
import { SvgClock } from "@opal/icons";
export interface IndexingAttemptsTableProps {

View File

@@ -21,13 +21,10 @@ export const submitGoogleSite = async (
formData.append("files", file);
});
const response = await fetch(
"/api/manage/admin/connector/file/upload?unzip=false",
{
method: "POST",
body: formData,
}
);
const response = await fetch("/api/manage/admin/connector/file/upload", {
method: "POST",
body: formData,
});
const responseJson = await response.json();
if (!response.ok) {
toast.error(`Unable to upload files - ${responseJson.detail}`);

View File

@@ -19,7 +19,6 @@ import {
} from "@/lib/types";
import type { Route } from "next";
import { useRouter } from "next/navigation";
import Truncated from "@/refresh-components/texts/Truncated";
import {
FiChevronDown,
FiChevronRight,
@@ -166,7 +165,9 @@ function ConnectorRow({
onClick={handleRowClick}
>
<TableCell className="">
<Truncated>{ccPairsIndexingStatus.name}</Truncated>
<p className="max-w-[200px] xl:max-w-[400px] inline-block ellipsis truncate">
{ccPairsIndexingStatus.name}
</p>
</TableCell>
<TableCell>
{timeAgo(ccPairsIndexingStatus?.last_success) || "-"}
@@ -245,7 +246,9 @@ function FederatedConnectorRow({
onClick={handleRowClick}
>
<TableCell className="">
<Truncated>{federatedConnector.name}</Truncated>
<p className="max-w-[200px] xl:max-w-[400px] inline-block ellipsis truncate">
{federatedConnector.name}
</p>
</TableCell>
<TableCell>N/A</TableCell>
<TableCell>

View File

@@ -4,6 +4,7 @@ import {
fetchEnterpriseSettingsSS,
fetchSettingsSS,
} from "@/components/settings/lib";
import { fetchSS } from "@/lib/utilsSS";
import {
CUSTOM_ANALYTICS_ENABLED,
GTM_ENABLED,
@@ -55,11 +56,28 @@ export async function generateMetadata(): Promise<Metadata> {
: "/onyx.ico";
}
const useCustomLogo =
enterpriseSettings && enterpriseSettings.use_custom_logo;
let logoMimeType: string = "image/png";
if (useCustomLogo) {
try {
const logoRes = await fetchSS("/enterprise-settings/logo", {
method: "HEAD",
});
logoMimeType = logoRes.headers.get("content-type") || "image/png";
} catch {
// Fall back to image/png if the HEAD request fails
}
}
return {
title: enterpriseSettings?.application_name || "Onyx",
description: "Question answering for your documents",
icons: {
icon: logoLocation,
icon: useCustomLogo
? { url: logoLocation, type: logoMimeType }
: { url: logoLocation, type: "image/x-icon" },
},
};
}

View File

@@ -0,0 +1,53 @@
import { useState } from "react";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import { SvgAlertTriangle, SvgCheck, SvgCopy } from "@opal/icons";
interface ExceptionTraceModalProps {
onOutsideClick: () => void;
exceptionTrace: string;
}
export default function ExceptionTraceModal({
onOutsideClick,
exceptionTrace,
}: ExceptionTraceModalProps) {
const [copyClicked, setCopyClicked] = useState(false);
return (
<Modal open onOpenChange={onOutsideClick}>
<Modal.Content width="lg" height="full">
<Modal.Header
icon={SvgAlertTriangle}
title="Full Exception Trace"
onClose={onOutsideClick}
height="fit"
/>
<Modal.Body>
<div className="mb-6">
{!copyClicked ? (
<button
type="button"
onClick={() => {
navigator.clipboard.writeText(exceptionTrace!);
setCopyClicked(true);
setTimeout(() => setCopyClicked(false), 2000);
}}
className="flex w-fit items-center hover:bg-accent-background p-2 border-border border rounded"
>
<Text>Copy full trace</Text>
<SvgCopy className="stroke-text-04 ml-2 h-4 w-4 flex flex-shrink-0" />
</button>
) : (
<div className="flex w-fit items-center hover:bg-accent-background p-2 border-border border rounded cursor-default">
<Text>Copied to clipboard</Text>
<SvgCheck className="stroke-text-04 my-auto ml-2 h-4 w-4 flex flex-shrink-0" />
</div>
)}
</div>
<div className="whitespace-pre-wrap">{exceptionTrace}</div>
</Modal.Body>
</Modal.Content>
</Modal>
);
}

View File

@@ -12,7 +12,6 @@ import {
SvgKey,
} from "@opal/icons";
import { Disabled } from "@opal/core";
import LineItem from "@/refresh-components/buttons/LineItem";
import Popover from "@/refresh-components/Popover";
import Separator from "@/refresh-components/Separator";
import { Section } from "@/layouts/general-layouts";
@@ -79,17 +78,18 @@ export default function UserRowActions({
return (
<>
{user.id && (
<LineItem
<Button
prominence="tertiary"
icon={SvgUsers}
onClick={() => openModal(Modal.EDIT_GROUPS)}
>
Groups &amp; Roles
</LineItem>
</Button>
)}
<Disabled disabled>
<LineItem danger icon={SvgUserX}>
<Button prominence="tertiary" variant="danger" icon={SvgUserX}>
Deactivate User
</LineItem>
</Button>
</Disabled>
<Separator paddingXRem={0.5} />
<Text as="p" secondaryBody text03 className="px-3 py-1">
@@ -102,18 +102,20 @@ export default function UserRowActions({
switch (user.status) {
case UserStatus.INVITED:
return (
<LineItem
danger
<Button
prominence="tertiary"
variant="danger"
icon={SvgXCircle}
onClick={() => openModal(Modal.CANCEL_INVITE)}
>
Cancel Invite
</LineItem>
</Button>
);
case UserStatus.REQUESTED:
return (
<LineItem
<Button
prominence="tertiary"
icon={SvgUserCheck}
onClick={() => {
setPopoverOpen(false);
@@ -131,34 +133,37 @@ export default function UserRowActions({
}}
>
Approve
</LineItem>
</Button>
);
case UserStatus.ACTIVE:
return (
<>
{user.id && (
<LineItem
<Button
prominence="tertiary"
icon={SvgUsers}
onClick={() => openModal(Modal.EDIT_GROUPS)}
>
Groups &amp; Roles
</LineItem>
</Button>
)}
<LineItem
<Button
prominence="tertiary"
icon={SvgKey}
onClick={() => openModal(Modal.RESET_PASSWORD)}
>
Reset Password
</LineItem>
</Button>
<Separator paddingXRem={0.5} />
<LineItem
danger
<Button
prominence="tertiary"
variant="danger"
icon={SvgUserX}
onClick={() => openModal(Modal.DEACTIVATE)}
>
Deactivate User
</LineItem>
</Button>
</>
);
@@ -166,34 +171,38 @@ export default function UserRowActions({
return (
<>
{user.id && (
<LineItem
<Button
prominence="tertiary"
icon={SvgUsers}
onClick={() => openModal(Modal.EDIT_GROUPS)}
>
Groups &amp; Roles
</LineItem>
</Button>
)}
<LineItem
<Button
prominence="tertiary"
icon={SvgKey}
onClick={() => openModal(Modal.RESET_PASSWORD)}
>
Reset Password
</LineItem>
</Button>
<Separator paddingXRem={0.5} />
<LineItem
<Button
prominence="tertiary"
icon={SvgUserPlus}
onClick={() => openModal(Modal.ACTIVATE)}
>
Activate User
</LineItem>
</Button>
<Separator paddingXRem={0.5} />
<LineItem
danger
<Button
prominence="tertiary"
variant="danger"
icon={SvgUserX}
onClick={() => openModal(Modal.DELETE)}
>
Delete User
</LineItem>
</Button>
</>
);

View File

@@ -1,39 +0,0 @@
import Modal from "@/refresh-components/Modal";
import { SvgAlertTriangle } from "@opal/icons";
import { CodePreview } from "@/sections/modals/PreviewModal/variants/CodePreview";
import { CopyButton } from "@/sections/modals/PreviewModal/variants/shared";
import FloatingFooter from "@/sections/modals/PreviewModal/FloatingFooter";
interface ExceptionTraceModalProps {
onOutsideClick: () => void;
exceptionTrace: string;
language?: string;
}
export default function ExceptionTraceModal({
onOutsideClick,
exceptionTrace,
language = "python",
}: ExceptionTraceModalProps) {
return (
<Modal open onOpenChange={onOutsideClick}>
<Modal.Content width="lg" height="full">
<Modal.Header
icon={SvgAlertTriangle}
title="Full Exception Trace"
onClose={onOutsideClick}
height="fit"
/>
<div className="flex flex-col flex-1 min-h-0 overflow-hidden w-full bg-background-tint-01">
<CodePreview content={exceptionTrace} language={language} normalize />
</div>
<FloatingFooter
right={<CopyButton getText={() => exceptionTrace} />}
codeBackground
/>
</Modal.Content>
</Modal>
);
}

View File

@@ -1,39 +0,0 @@
import { cn } from "@/lib/utils";
import { ReactNode } from "react";
interface FloatingFooterProps {
left?: ReactNode;
right?: ReactNode;
codeBackground?: boolean;
}
export default function FloatingFooter({
left,
right,
codeBackground,
}: FloatingFooterProps) {
return (
<div
className={cn(
"absolute bottom-0 left-0 right-0",
"flex items-center justify-between",
"p-4 pointer-events-none w-full"
)}
style={{
background: `linear-gradient(to top, var(--background-${
codeBackground ? "code-01" : "tint-01"
}) 40%, transparent)`,
}}
>
{/* Left slot */}
<div className="pointer-events-auto">{left}</div>
{/* Right slot */}
{right ? (
<div className="pointer-events-auto rounded-12 bg-background-tint-00 p-1 shadow-lg">
{right}
</div>
) : null}
</div>
);
}

View File

@@ -5,8 +5,8 @@ import { MinimalOnyxDocument } from "@/lib/search/interfaces";
import Modal from "@/refresh-components/Modal";
import Text from "@/refresh-components/texts/Text";
import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
import { cn } from "@/lib/utils";
import { Section } from "@/layouts/general-layouts";
import FloatingFooter from "@/sections/modals/PreviewModal/FloatingFooter";
import mime from "mime";
import {
getCodeLanguage,
@@ -189,12 +189,30 @@ export default function PreviewModal({
)}
</div>
{/* Floating footer */}
{!isLoading && !loadError && (
<FloatingFooter
left={variant.renderFooterLeft(ctx)}
right={variant.renderFooterRight(ctx)}
codeBackground={variant.codeBackground}
/>
<div
className={cn(
"absolute bottom-0 left-0 right-0",
"flex items-center justify-between",
"p-4 pointer-events-none w-full"
)}
style={{
background: `linear-gradient(to top, var(--background-${
variant.codeBackground ? "code-01" : "tint-01"
}) 40%, transparent)`,
}}
>
{/* Left slot */}
<div className="pointer-events-auto">
{variant.renderFooterLeft(ctx)}
</div>
{/* Right slot */}
<div className="pointer-events-auto rounded-12 bg-background-tint-00 p-1 shadow-lg">
{variant.renderFooterRight(ctx)}
</div>
</div>
)}
</Modal.Content>
</Modal>

View File

@@ -12,7 +12,6 @@ import { cn, noProp } from "@/lib/utils";
import { DRAG_TYPES } from "./constants";
import SidebarTab from "@/refresh-components/buttons/SidebarTab";
import IconButton from "@/refresh-components/buttons/IconButton";
import Truncated from "@/refresh-components/texts/Truncated";
import { Button } from "@opal/components";
import ButtonRenaming from "@/refresh-components/buttons/ButtonRenaming";
import type { IconProps } from "@opal/types";
@@ -182,7 +181,7 @@ const ProjectFolderButton = memo(({ project }: ProjectFolderButtonProps) => {
onClose={() => setIsEditing(false)}
/>
) : (
<Truncated>{project.name}</Truncated>
project.name
)}
</SidebarTab>
</Popover.Anchor>

View File

@@ -169,9 +169,7 @@ test.describe("Project Files visual regression", () => {
.first();
await expect(iconWrapper).toBeVisible();
const container = page.locator("[data-main-container]");
await expect(container).toBeVisible();
await expectElementScreenshot(container, {
await expectElementScreenshot(filesSection, {
name: "project-files-long-underscore-filename",
});